自己写的c++实现LSTM算法,有解释;使用了ADAM优化,大大加快了收敛速度,可以与我之前未使用优化的算法进行比较,大约有350倍的提升。有问题请发邮箱:fan1974815@126.com,也可以在下面提出问题,我会尽量解答。
直接上代码,3部分:头文件.h、源文件.cpp、调用方法,使用了vs2015平台
1、头文件
#pragma once
#include “iostream”
#include “math.h”
#include “stdlib.h”
#include “time.h”
#include “vector”
#include “assert.h”
#include"DataPreProc.h"
#include"string"
using namespace std;
class AdamLSTM
{
public:
AdamLSTM();
~AdamLSTM();
#define innode 1 //输入结点数,存储特征向量
#define hidenode 128 //隐藏结点数,
#define outnode 10 //输出结点数,每个时间点用一个向量代表结果
#define alpha 0.0005 //学习速率
#define timesteps 3 //time steps 时间点数量
#define epochs 25000
#define trainErrThreshold 0.9
#define beta1 0.8
#define beta2 0.999
#define eps 1e-8
#define random(x) rand()%(x)
#define randval(high) ( (double)rand() / RAND_MAX * high )
#define uniform_plus_minus_one ( (double)( 2.0 * rand() ) / ((double)RAND_MAX + 1.0) - 1.0 ) //均匀随机分布
#define CHINESE
};
2、源文件
#include “stdafx.h”
#include “AdamLSTM.h”
AdamLSTM::AdamLSTM()
{
winit((double*)W_I, hidenode * innode);
winit((double*)U_I, hidenode * hidenode);
winit((double*)B_I, hidenode * 1);
winit((double*)W_F, hidenode * innode);
winit((double*)U_F, hidenode * hidenode);
winit((double*)B_F, hidenode * 1);
winit((double*)W_O, hidenode * innode);
winit((double*)U_O, hidenode * hidenode);
winit((double*)B_O, hidenode * 1);
winit((double*)W_G, hidenode * innode);
winit((double*)U_G, hidenode * hidenode);
winit((double*)B_G, hidenode * 1);
winit((double*)W_out, hidenode * outnode);
winit((double*)B_out, outnode * 1);
}
AdamLSTM::~AdamLSTM()
{
}
double AdamLSTM::dsigmoid(double y)
{
return y * (1.0 - y);
}
double AdamLSTM::sigmoid(double x)
{
return 1.0 / (1.0 + exp(-x));
}
double AdamLSTM::dtanh(double y)
{
y = tanh(y);
return 1.0 - y * y;
}
void AdamLSTM::winit(double w[], int n)
{
for (int i = 0; i<n; i++)
{
w[i] = uniform_plus_minus_one; //均匀随机分布
}
}
void AdamLSTM::winit_withiZero(double w[], int n)
{
for (int i = 0; i<n; i++)
{
w[i] =0.0;
}
}
void AdamLSTM::train()
{
//char s[] = “十 四 五 规 划 和 二 零 三 五 年 远 景 目 标 纲 要 明 确 实 施 积 极 应 对 人 口 老 龄 化 国 家 战 略 制 定 人 口 长 期 发 展 战 略 优 化 生 育 政 策”;
//string ss = “十四五规划和二零三五年远景目标纲要明确实施积极应对人口老龄化国家战略制定人口长期发展战略优化生育政策”;
//char s[] = “锄 禾 日 当 午 汗 滴 和 下 土 谁 知 盘 中 餐 粒 粒 皆 辛 苦”;
//string ss = “锄禾日当午汗滴和下土谁知盘中餐粒粒皆辛苦”;
#ifdef CHINESE
int end_offset = (timesteps + 0) * 2;//如果用随机产生样本的方法,此处的偏移应该加0而不是1,因为随机数界限不包含最大值
#else
int end_offset = (timesteps + 1);
#endif // CHINESE
int randomMax = ss.size() - end_offset;//随机产生样本索引时,随机值的范围最大值
cout << “randomMax” << randomMax << endl;
#ifdef CHINESE
for (size_t i = 0; i < timesteps * 2; i = i + 2)//获取本样本的timesteps个时间点数据,因为是汉字占2个位置,因此2
{
curSample.push_back(dsMap[ss.substr(offset + i, 2)]);
curTimestep_truth_10.push_back(dsMap[ss.substr(offset + i + 2, 2)]);
//cout << curSample.back() << endl;
//cout << curTimestep_truth_10.back() << endl;
}
#else
for (size_t i = 0; i < timesteps; i = i + 1)//获取本样本的timesteps个时间点数据,因为是汉字占2个位置,因此2
{
curSample.push_back(dsMap[ss.substr(offset + i, 1)]);
curTimestep_truth_10.push_back(dsMap[ss.substr(offset + i + 1, 1)]);
//cout << curSample.back() << endl;
//cout << curTimestep_truth_10.back() << endl;
}
#endif // CHINESE
//对真值进行one_hot编码
truth_steps.clear();
for (size_t i = 0; i < timesteps; i++)
{
double LableD = new double[outnode] {0};//打印测试?
for (size_t j = 0; j < dsMap.size(); j++)
{
if ((j + 1) == curTimestep_truth_10[i])
{
LableD[j] = 1.0f;
/cout << “LableD:” << LableD[j] << endl;/
}
else
{
LableD[j] = 0.0f;
/ cout << “LableD:” << LableD[j] << endl;*/
}
}
/cout << “当前时间点真值:” << endl << curTimestep_truth_10[i] << endl;/
truth_steps.push_back(LableD);//矩阵必须要复制,否则每次都引用了相同的数据
/cout << “truth_steps=”<<i<<endl << truth_steps.back() << endl;/
}
//正向传播:一直计算完所有的time step
for (p = 0; p < timesteps; p++)//循环遍历二进制数组,从最低位开始,p相当于time steps
{
x[0] = (float)curSample[p];
/cout << “x[0]” << p << “=” << x[0] << endl;/
double *in_gate = new double[hidenode] {0}; //输入门取sigmoid函数后的值
double *out_gate = new double[hidenode] {0}; //输出门取sigmoid函数后的值
double *forget_gate = new double[hidenode] {0}; //遗忘门取sigmoid函数后的值
double *g_gate = new double[hidenode] {0}; //新记忆取tanh函数后的值
double *state = new double[hidenode] {0}; //状态值
double *h = new double[hidenode] {0}; //隐层输出值
double *output = new double[outnode] {0};//当前时间点的预测数组
double *y_pre = new double[outnode] {0};//当前时间点的预测数组
double * truthLabel = new double[outnode] {0};//当前时间点的真值数组
double * y_delta = new double[outnode] {0};//当前时间点的真值数组
if (p == 0)
{
//在0时刻是没有之前的隐含层的,所以初始化一个全为0的
double *S = new double[hidenode] {0}; //状态值
double *h = new double[hidenode] {0}; //输出值
for (size_t i = 0; i < hidenode; i++)
{
S[i] = 0.0;
h[i] = 0.0;
}
S_vector.push_back(S);
h_vector.push_back(h);
}
for (size_t j = 0; j < hidenode; j++)
{
double inGateValue = 0;
double forgetGateValue = 0;
double outGateValue = 0;
double gGateValue = 0;
//当前时间点输入
for (size_t k = 0; k < innode; k++)
{
forgetGateValue += x[k] * W_F[k][j];
#ifdef CHINESE
while (offset % 2 != 0)//必须是2的倍数,否则获取汉字时进入死循环
{
offset = random(randomMax);
}
#endif // CHINESE
}
vector AdamLSTM::encode2Truth(map<string, int>& dsMap, vector<double*>& predictedM)
{
vector sRes;
sRes.clear();
int maxInd = 0;
for (size_t i = 0; i < predictedM.size(); i++)
{
double * pre = predictedM[i];
for (size_t j = 0; j < outnode; j++)
{
pre[j] = abs(pre[j]);
/cout << “当前节点 " << j << " 预测的值:” << pre[j] << endl;/
}
int ind[outnode] = { 0 };
BubbleSort(pre, outnode, ind);
maxInd = ind[outnode - 1];
//cout <<“当前时间点 “<<i<<” 最大值索引:”<< maxInd << endl;
for (map<string, int>::iterator it = dsMap.begin(); it != dsMap.end(); it++)
{
if ((*it).second == (maxInd + 1))
{
sRes.push_back((*it).first);
}
}
}
return sRes;
}
vector AdamLSTM::encode2Sample(map<string, int>& dsMap, vector& sample)
{
vector sRes;
sRes.clear();
for (size_t i = 0; i < sample.size(); i++)
{
for (map<string, int>::iterator it = dsMap.begin(); it != dsMap.end(); it++)
{
if (it->second == sample[i])
{
sRes.push_back(it->first);
}
}
}
return sRes;
}
vector AdamLSTM::sample2Encode(map<string, int>& dsMap, vector sample)
{
vector res;
for (size_t i = 0; i < sample.size(); i++)
{
for (map<string, int>::iterator it = dsMap.begin(); it != dsMap.end(); it++)
{
if (it->first == sample[i])
{
res.push_back(it->second);
}
}
}
return res;
}
void AdamLSTM::BubbleSort(double * p, int length, int * ind_diff)
{
for (int m = 0; m < length; m++)
{
ind_diff[m] = m;
}
}
void AdamLSTM::predict()
{
DataPreProc ddp;
}
3、调用方法
int main()
{
}