Materials available at: https://forejune.co/cuda/
Reviews
Multi-Layer Perceptron (MLP)
L + 1 layers, e.g. L = 4
C/C++ Implementation
fixed-size arrays ~ for clarity of presentation
vectors ~ flexibility
// mlp3.h
// https://forejune.co/cuda/
#ifndef __MLP3_H__
#define __MLP3_H__
#include <vector>
using namespace std;
//maximum number of nodes allowed in a layer
#define maxNodes 30
struct database {
vector< vector< double>> inputs; //n1 inputs
vector< vector< double>> labels; //K outputs
};
class MLP
{
private:
//default values
double eta = 0.15; //learning rate
int n1 = 9; //number of inputs and bias
int m1 = 15; //number of hidden nodes and bias per layer
int H = 3; //number of hidden layers
int K = 7; //number of outputs
int L = H + 1; //totally L + 1 layers: 0 --> L
vector< vector< vector< double>>> w; //weights w[L][][]
vector< vector< double>>z; //linear sum of products of weights and 'inputs'z[L+1][]
vector< vector< double>>a; //value of nuuron f(z[j]} a[L+1][]
vector< double> y; //predicted output y[K]
vector< int>sizes; //sizes of each layer sizes[L+1]
public:
MLP(); //default constructor
MLP(double learning_rate);
MLP(double learning_rate, int n10, int K0, vector< int>hidden);
double f(double x);
double fd(double x); //Derivative of sigmoid function
double* forward(double x[]); //Forward propagation
void backward(double yd[], double x[]); //Backward prop, yd is desired output
void train(const database &db, int epochs); //Train the Perceptron
~MLP();
int inputSize(); //input size of MLP (n1)
int outputSize(); //output size of MLP (K)
int saveWeightsAndParam(char fname[]);
int readWeightsAndParam(char fname[]);
};
#endif
// mlp3.cpp
// https://forejune.co/cuda/
#include <cmath>
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include "mlp3.h"
using namespace std;
void set_yzaCapacity( vector< double> &y, vector< vector< double>> &z, vector< vector< double>> &a,
const vector< int> &sizes, const int L)
{
int K = sizes[L]; //output size
y.reserve( K ); //set capacity of y to K, i.e. y[K]
z.reserve( L+1 ); // number of rows
for (int l = 0; l < L + 1; l++){
vector< double>row;
row.reserve(sizes[l]);
z.push_back(move(row));
}
a.reserve( L+1 ); // number of rows
for (int l = 0; l < L + 1; l++){
vector< double>row;
row.reserve(sizes[l]);
a.push_back(move(row));
}
}
void setWeightCapacity( vector< vector< vector< double>>> &w, const vector< int> &sizes, const int L)
{
w.reserve( L );
for ( int l = 0; l < L; l++) {
vector< vector< double>>row;
row.reserve( sizes[l] );
for(int i = 0; i < sizes[l]; i++){
vector< double>col;
col.reserve( sizes[l+1] );
row.push_back(move(col));
}
w.push_back( move(row) );
}
}
void initializeWeights( vector< vector< vector< double>>> &w, const vector< int> &sizes, const int L)
{
for (int l = 0; l < L; l++) {
for (int i = 0; i < sizes[l]; i++)
for (int j = 0; j < sizes[l+1]; j++){
w[l][i][j] = rand() % 1000 / 1000.0;
if ( rand() % 2 == 0 )
w[l][i][j] = -w[l][i][j];
}
}
}
// constructors
MLP::MLP()
{
}
MLP::MLP(double learning_rate)
{
if ( n1 > maxNodes || m1 > maxNodes || K > maxNodes){
cout << "A layer has exceeded max number of nodes allowed!" << endl;
exit( 1 );
}
eta = learning_rate;
sizes.push_back( n1 );
for (int i = 0; i < H; i++)
sizes.push_back( m1 ); // sizes[i] = m1;
sizes.push_back( K ); // sizes[L] = K; size of output layer
setWeightCapacity(w, sizes, L);
set_yzaCapacity(y, z, a, sizes, L);
initializeWeights(w, sizes, L);
}
MLP::MLP(double learning_rate, int n10, int K0, vector< int>hidden)
{
H = hidden.size();
if ( n1 > maxNodes || K > maxNodes){
cout << "Input or Output layer has exceeded max number of nodes allowed!" << endl;
exit( 1 );
}
for (int i = 0; i < H; i++){
if ( hidden[i] > maxNodes ) {
cout << "A hidden layer has exceeded max number of nodes allowed!" << endl;
exit( 1 );
}
}
eta = learning_rate;
n1 = n10;
K = K0;
sizes.push_back( n1 );
for (int i = 0; i < H; i++)
sizes.push_back( hidden[i] ); // sizes[i] = m1;
sizes.push_back( K ); // sizes[L] = K; size of output layer
L = H + 1;
setWeightCapacity(w, sizes, L);
set_yzaCapacity(y, z, a, sizes, L);
initializeWeights(w, sizes, L);
}
//Sigmoid activation function
double MLP::f(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//Derivative of sigmoid function
double MLP::fd(double x)
{
double s = f(x);
return s * (1 - s);
}
//Forward propagation
double* MLP::forward(double x[])
{
for(int i = 0; i < n1; i++)
a[0][i] = z[0][i] = x[i];
for(int l = 0; l < L; l++) {
int left = sizes[l]; //size of left side layer
int right = sizes[l+1]; //size of right side layer
for(int j = 0; j < right; j++) {
z[l+1][j] = 0;
for(int i = 0; i < left; i++)
z[l+1][j] += w[l][i][j] * a[l][i];
if ( l < L-1 && j == 0 ) //hidden layer bias
a[l+1][j] = 1;
else
a[l+1][j] = f( z[l+1][j] );
}
}
for (int k = 0; k < K; k++)
y[k] = a[L][k];
return (double *) &y[0]; //return output
}
//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
double delta[L][maxNodes]; //hidden layers
for (int k = 0; k < K; k++) {
double e = yd[k] - y[k]; //output layer error
delta[L-1][k] = e * fd(z[L][k]); // can use fd(a[L][k]) also
}
for(int l = L - 1; l >= 1; l--){
int left = sizes[l]; //size of left layer
int right = sizes[l+1]; //size of right layer
for (int j = 0; j < left; j++) {
delta[l-1][j] = 0;
for (int k = 0; k < right; k++)
delta[l-1][j] += w[l][j][k] * delta[l][k];
delta[l-1][j] *= fd( z[l][j] );
}
}
//Update weights
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++)
for(int j = 0; j < right; j++)
w[l][i][j] += eta * delta[l][j] * a[l][i];
}
}
// Train the perceptron
// eta = learning rate
// epochs = number of times of training
// numSamples = number of different input sets (x_data)
// yd is target output
void MLP::train(const database &db, int epochs)
{
double x[n1]; //inputs
double yd[K]; //desired output
int numSamples = db.inputs.size();
cout << "numSamples=" << numSamples << endl;
for (int epoch = 0; epoch < epochs; epoch++){
for (int k = 0; k < numSamples; k++) {
for (int i = 0; i < n1; i++)
x[i] = db.inputs[k][i];
for (int i = 0; i < K; i++)
yd[i] = db.labels[k][i];
forward( x );
backward(yd, x);
}
}
}
int MLP::inputSize()
{
return n1;
}
int MLP::outputSize()
{
return K;
}
// save weights and parameters
int MLP::saveWeightsAndParam(char fname[])
{
FILE *fp;
if ( (fp = fopen(fname, "wt")) == NULL ) {
printf("\nError opening file %s\n", fname);
return -1;
}
fprintf(fp, "%6.3f \n", eta);
fprintf(fp, "%d\n", L);
for (int l = 0; l < L+1; l++)
fprintf(fp, "%d ", sizes[l]);
fprintf(fp, "\n");
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++) {
for(int j = 0; j < right; j++)
fprintf(fp, "%7.4f ", w[l][i][j]);
fprintf(fp, "\n");
}
}
fclose( fp );
return 1;
}
int MLP::readWeightsAndParam(char fname[])
{
FILE *fp;
if ( (fp = fopen(fname, "rt")) == NULL ) {
printf("\nEroor opening file %s\n", fname);
return -1;
}
fscanf(fp, "%lf \n", &eta);
fscanf(fp, "%d", &L);
for (int l = 0; l < L+1; l++){
int sz;
fscanf(fp, "%d ", &sz);
sizes.push_back( sz );
}
n1 = sizes[0];
K = sizes[L];
setWeightCapacity(w, sizes, L);
for (int l = 0; l < L; l++) {
int left = sizes[l];
int right = sizes[l+1];
for(int i = 0; i < left; i++) {
for(int j = 0; j < right; j++)
fscanf(fp, "%lf ", &w[l][i][j]);
}
}
fclose( fp );
set_yzaCapacity(y, z, a, sizes, L);
return 1;
}
MLP::~MLP()
{
}
|
Testing Routine
// nimmlp.cpp -- testing an MLP with nim
// https://forejune.co/cuda/
#include "mlp3.h"
#include <iostream>
#include <cmath>
using namespace std;
//nim game using dynamic programming
int getScore(int n, int S[])
{
if (n == 1)
S[n] = -1; //got last block
if (S[n] != 0)
return S[n];
int m = n / 2; //floor of n/2.0
for (int i = 1; i <= m; i++) {
int nextN = n - i;
if (getScore(nextN, S) == -1) { //found a path the other player loses
S[n] = 1;
return S[n];
}
}
S[n] = -1;
return S[n];
}
//choose a winning move i if one exists, otherwise pick a random legal move
int bestMove(int n, int score[])
{
int m = n / 2;
if ( m < 1 )
m = 1;
for (int i = 1; i <= m; i++) {
if (getScore(n - i, score) == -1)
return i; //choose move so that the other player loses
}
return 1; // no winning move
}
// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[], int n1, int K)
{
int numSamples = db.inputs.size();
vector< double>tempi(n1);
vector< double>tempt(K);
for (int i = 0; i < n1; i++) //n1 = 19, K = 9
tempi[i] = in[i];
db.inputs.push_back( tempi );
for (int i = 0; i < K; i++)
tempt[i] = targets[i];
db.labels.push_back( tempt );
}
//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value, double a[])
{
int i = 0;
while ( i < size ) {
a[i] = (double) (value & 1); //bitwise and
value >>= 1; //shift right by 1 bit
i++;
}
}
//maximum number of blocks n
void buildDatabase(int n, database &db, int n1, int K)
{
double inputs[n1], labels[K] = {0};
int score[n] = {0};
int move;
inputs[0] = 1.0; //bias
for (int i = 2; i <= n; i++) {
move = bestMove(i, score);
int2binArray(n1-1, i, &inputs[1]); //obtain input pattern
int2binArray(K, move, labels); //convert to output pattern
addDatabase(db, inputs, labels, n1, K);
}
}
// convert K output bits to an integer
int out2move(double out[], int K)
{
double min = 0.2;
int move = 0;
for (int i = K-1; i >= 0; i--) {
move <<= 1;
if (out[i] > min) {
move = move | 1;
}
}
if ( move == 0 ) move = 1;
return move;
}
int main(int argc, char *argv[])
{
double learning_rate;
int n1=9, K=7;
int m1 = 15;
vector< int>hsizes; // hidden layers sizes
vector < int> diff; // diffence between dp and mlp
database db;
MLP *mlp;
if ( argc < 2 ) {
cout << "Usage: " << argv[0] << " learning_rate [n_inputs n_outputs n_hidden] " << endl;
exit( 1 );
}
learning_rate = atof ( argv[1] );
if (learning_rate > 0) {
if ( argc < 3 )
mlp = new MLP ( learning_rate );
else {
if (argc < 5) {
hsizes.push_back ( m1 ); //use default value
if (argc > 2)
n1 = atoi( argv[2] );
if (argc > 3)
K = atoi( argv[3] );
}
for ( int i = 4; i < argc; i++)
hsizes.push_back(atoi(argv[i]));
mlp = new MLP (learning_rate, n1, K, hsizes);
}
} else {
mlp = new MLP();
mlp->readWeightsAndParam( (char *) "nimWeights.txt" );
}
n1 = mlp->inputSize();
K = mlp->outputSize();
db.inputs.reserve( n1 );
db.labels.reserve( K );
int NMAX = (int) pow(2, n1-1) - 1;
int score[NMAX+1] = {0};
buildDatabase(NMAX, db, n1, K);
if ( learning_rate > 0 ) {
cout << "Training the MLP ..." << endl;
int iterations = 1000; //number of epochs for training
mlp->train(db, iterations);
mlp->saveWeightsAndParam( (char *) "nimWeights.txt" );
}
double *outs;
double ins[n1];
double labels[K] = {0};
cout << "Checking: " << endl;
int move, neuralMove;
ins[0] = 1.0; //bias
for (int i = 2; i <= NMAX; i++){
move = bestMove(i, score); //move from DP
int2binArray(n1-1, i, &ins[1]); //obtain input pattern
outs = mlp->forward( ins );
neuralMove = out2move( outs, K ); //move from MLP
if ( move != neuralMove ){
diff.push_back ( i );
}
}
cout << "\ndiff size = " << diff.size() << ":" << endl;
for (int i = 0; i < diff.size(); i++)
cout << diff[i] << ", ";
cout << endl;
delete mlp;
return 0;
}
|