Materials available at: https://forejune.co/cuda/
Reviews
Multi-Layer Perceptron (MLP)
L + 1 layers, e.g. L = 4
Operations of a Neuron
like a biological neuron
|
|
zi ~ logit
int n1; //number of inputs and bias
int m1; //number of hidden nodes and bias per layer
int K; //number of outputs
int L; //L+1 layers
int sizes[L+1]; //sizes of the layers: sizes[0] = n1, ..., sizes[L] = K
double w[L][N][N];
double z[L+1][N]; //linear sum of products of weights and 'inputs'
double a[L+1][N]; //value of neuron g(z[j])
double y[K]; //predicted output
//Forward propagation
double* MLP::forward(double x[])
{
for(int i = 0; i < n1; i++)
a[0][i] = z[0][i] = x[i];
for(int l = 0; l < L; l++) {
int left = sizes[l]; //size of left side layer
int right = sizes[l+1]; //size of right side layer
for(int j = 0; j < right; j++) {
z[l+1][j] = 0;
for(int i = 0; i < left; i++)
z[l+1][j] += w[l][i][j] * a[l][i];
if ( l < L-1 && j == 0 ) //hidden layer
a[l+1][j] = 1; //bias
else
a[l+1][j] = f( z[l+1][j] );
}
}
for (int k = 0; k < K; k++)
y[k] = a[L][k];
return y; //return output
}
|
Backward Propagation
|
//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
double delta[L][N]; //hidden layers
//output delta
for (int k = 0; k < K; k++) {
double e = yd[k] - y[k]; //output layer error
delta[L-1][k] = e * fd(z[L][k]);
}
for(int l = L - 1; l >= 1; l--){
int left = sizes[l]; //size of left layer
int right = sizes[l+1]; //size of right layer
for (int j = 0; j < left; j++) {
delta[l-1][j] = 0;
for (int k = 0; k < right; k++)
delta[l-1][j] += w[l][j][k] * delta[l][k];
delta[l-1][j] *= fd( z[l][j] );
}
}
//Update weights
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++)
for(int j = 0; j < right; j++)
w[l][i][j] += eta * delta[l][j] * a[l][i];
}
}
|
|
|
// mlp2.h
#ifndef __MLP2_H__
#define __MLP2_H__
#include <vector>
#include <array>
using namespace std;
//maximum number of nodes allowed in a layer
#define maxNodes 30
const int n1 = 9; //number of inputs and bias
const int m1 = 15; //number of hidden nodes and bias per layer
const int H = 3; //number of hidden layers
const int K = 7; //number of outputs
const int L = H + 1; //totally L + 1 layers: 0 --> L
struct database {
vector<array<double, n1>> inputs;
vector<array<double, K>> labels;
};
class MLP
{
private:
double w[L][maxNodes][maxNodes]; //L weights: 0, ..., L-1
double z[L+1][maxNodes]; //linear sum of products of weights and 'inputs'
double a[L+1][maxNodes]; //value of neuron f(z[j])
double y[K]; //predicted output
int sizes[L+1]; //size of each layer
double eta; //learning rate
public:
MLP(double learning_rate);
double f(double x);
double fd(double x); //Derivative of sigmoid function
double* forward(double x[]); //Forward propagation
void backward(double yd[], double x[]); //Backward prop, yd is desired output
void train(const database &db, int epochs); //Train the Perceptron
~MLP();
int saveWeights(char fname[]);
int readWeights(char fname[]);
};
#endif
|
// mlp2.cpp
// https://forejune.co/cuda/
#include <cmath>
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include "mlp2.h"
using namespace std;
// constructor
MLP::MLP(double learning_rate)
{
if ( n1 > maxNodes || m1 > maxNodes || K > maxNodes){
cout << "A layer has exceeded max number of nodes allowed!" << endl;
exit( 1 );
}
eta = learning_rate;
sizes[0] = n1; //size of input layer
for (int i = 1; i <= H; i++)
sizes[i] = m1;
sizes[L] = K; //size of output layer
for (int l = 0; l < L; l++) {
for (int i = 0; i < maxNodes; i++)
for (int j = 0; j < maxNodes; j++){
w[l][i][j] = rand() % 1000 / 1000.0;
if ( rand() % 2 == 0 )
w[l][i][j] = -w[l][i][j];
}
}
}
//Sigmoid activation function
double MLP::f(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//Derivative of sigmoid function
double MLP::fd(double x)
{
double s = f(x);
return s * (1 - s);
}
//Forward propagation
double* MLP::forward(double x[])
{
for(int i = 0; i < n1; i++)
a[0][i] = z[0][i] = x[i];
for(int l = 0; l < L; l++) {
int left = sizes[l]; //size of left side layer
int right = sizes[l+1]; //size of right side layer
for(int j = 0; j < right; j++) {
z[l+1][j] = 0;
for(int i = 0; i < left; i++)
z[l+1][j] += w[l][i][j] * a[l][i];
if ( l < L-1 && j == 0 ) //hidden layer bias
a[l+1][j] = 1;
else
a[l+1][j] = f( z[l+1][j] );
}
}
for (int k = 0; k < K; k++)
y[k] = a[L][k];
return y; //return output
}
//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
double delta[L][maxNodes]; //hidden layers
for (int k = 0; k < K; k++) {
double e = yd[k] - y[k]; //output layer error
delta[L-1][k] = e * fd(z[L][k]); // can use fd(a[L][k]) also
}
for(int l = L - 1; l >= 1; l--){
int left = sizes[l]; //size of left layer
int right = sizes[l+1]; //size of right layer
for (int j = 0; j < left; j++) {
delta[l-1][j] = 0;
for (int k = 0; k < right; k++)
delta[l-1][j] += w[l][j][k] * delta[l][k];
delta[l-1][j] *= fd( z[l][j] );
}
}
//Update weights
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++)
for(int j = 0; j < right; j++)
w[l][i][j] += eta * delta[l][j] * a[l][i];
}
}
// Train the perceptron
// eta = learning rate
// epochs = number of times of training
// numSamples = number of different input sets (x_data)
// yd is target output
void MLP::train(const database &db, int epochs)
{
double x[n1]; //inputs
double yd[K]; //desired output
int numSamples = db.inputs.size();
cout << "numSamples=" << numSamples << endl;
for (int epoch = 0; epoch < epochs; epoch++){
for (int k = 0; k < numSamples; k++) {
for (int i = 0; i < n1; i++)
x[i] = db.inputs[k][i];
for (int i = 0; i < K; i++)
yd[i] = db.labels[k][i];
forward( x );
backward(yd, x);
}
}
}
int MLP::saveWeights(char fname[])
{
FILE *fp;
if ( (fp = fopen(fname, "wt")) == NULL ) {
printf("\nError opening file %s\n", fname);
return -1;
}
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++) {
for(int j = 0; j < right; j++)
fprintf(fp, "%6.3f ", w[l][i][j]);
fprintf(fp, "\n");
}
}
fclose( fp );
return 1;
}
int MLP::readWeights(char fname[])
{
FILE *fp;
if ( (fp = fopen(fname, "rt")) == NULL ) {
printf("\nEroor opening file %s\n", fname);
return -1;
}
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++) {
for(int j = 0; j < right; j++)
fscanf(fp, "%lf ", &w[l][i][j]);
}
}
fclose( fp );
return 1;
}
MLP::~MLP()
{
}
|
// nimmlp.cpp -- testing an MLP with nim
// https://forejune.co/cuda/
#include "mlp2.h"
#include <iostream>
using namespace std;
#define NMAX 255
//nim game using dynamic programming
int getScore(int n, int S[])
{
if (n == 1)
S[n] = -1; //got last block
if (S[n] != 0)
return S[n];
int m = n / 2; //floor of n/2.0
for (int i = 1; i <= m; i++) {
int nextN = n - i;
if (getScore(nextN, S) == -1) { //found a path the other player loses
S[n] = 1;
return S[n];
}
}
S[n] = -1;
return S[n];
}
//choose a winning move i if one exists, otherwise pick a random legal move
int bestMove(int n, int score[])
{
int m = n / 2;
if ( m < 1 )
m = 1;
for (int i = 1; i <= m; i++) {
if (getScore(n - i, score) == -1)
return i; //choose move so that the other player loses
}
return (rand() % m + 1); //no winning move; pick any random legal move
}
// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
int numSamples = db.inputs.size();
array<double, n1>tempi;
array<double, K>tempt;
for (int i = 0; i < n1; i++) //n1 = 19, K = 9
tempi[i] = in[i];
db.inputs.push_back( tempi );
for (int i = 0; i < K; i++)
tempt[i] = targets[i];
db.labels.push_back( tempt );
}
//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value, double a[])
{
int i = 0;
while ( i < size ) {
a[i] = (double) (value & 1); //bitwise and
value >>= 1; //shift right by 1 bit
i++;
}
}
//maximum number of blocks n
void buildDatabase(int n, database &db)
{
double inputs[n1], labels[K] = {0};
int score[n] = {0};
int move;
inputs[0] = 1.0; //bias
for (int i = 2; i <= n; i++) {
move = bestMove(i, score);
int2binArray(n1-1, i, &inputs[1]); //obtain input pattern
int2binArray(K, move, labels); //convert to output pattern
addDatabase(db, inputs, labels);
}
}
// convert K output bits to an integer
int out2move(double out[K])
{
double min = 0.2;
int move = 0;
for (int i = K-1; i >= 0; i--) {
move <<= 1;
if (out[i] > min) {
move = move | 1;
}
}
if ( move == 0 ) move = 1;
return move;
}
int main()
{
int n = NMAX;
int score[NMAX+1] = {0};
vector <int> diff; //diffence between dp and mlp
database db;
buildDatabase(NMAX, db);
MLP mlp(0.15);
cout << "Training the MLP ..." << endl;
int iterations = 1000; //number of epochs for training
mlp.train(db, iterations);
mlp.saveWeights( (char *) "nimWeights.txt" );
// mlp.readWeights( (char *) "nimWeights.txt" );
double *outs;
double ins[n1];
double labels[K] = {0};
cout << "After training!" << endl;
cout << "Checking: " << endl;
int move, neuralMove;
ins[0] = 1.0; //bias
for (int i = 2; i <= NMAX; i++){
move = bestMove(i, score); //move from DP
int2binArray(n1-1, i, &ins[1]); //obtain input pattern
outs = mlp.forward( ins );
neuralMove = out2move( outs ); //move from MLP
if ( move != neuralMove )
diff.push_back ( i );
}
cout << "\ndiff size = " << diff.size() << ":" << endl;
for (int i = 0; i < diff.size(); i++)
cout << diff[i] << ", ";
cout << endl;
return 0;
}
|
|
|