Implementing a Neural Network with Many Hidden Layers in C/C++

Materials available at: https://forejune.co/cuda/

Reviews

https://forejune.co/cuda/ai/mlp/mlp.html (Perceptron with one hidden layer)

Multi-Layer Perceptron (MLP)

	L + 1 layers, e.g. L = 4

Operations of a Neuron

like a biological neuron

	
	
	
	
	
	
	
	
	
	
	

	z_i ~ logit

Forward Propagation

Implementation

use fixed-size arrays ~ for clarity of presentation


	int n1;  	//number of inputs and bias
	int m1; 	//number of hidden nodes and bias per layer
	int K;   	//number of outputs
	int L;		//L+1 layers
	int sizes[L+1];	//sizes of the layers: sizes[0] = n1, ..., sizes[L] = K 
	double w[L][N][N];
	double z[L+1][N];   //linear sum of products of weights and 'inputs'
    	double a[L+1][N];   //value of neuron g(z[j])
    	double y[K];   //predicted output
	
	//Forward propagation
	double* MLP::forward(double x[])
	{
  	  for(int i = 0; i < n1; i++)
    	    a[0][i] = z[0][i] = x[i];

	  for(int l = 0; l < L; l++) {
  	     int left = sizes[l];        //size of left side layer
    	     int right = sizes[l+1];     //size of right side layer
    	     for(int j = 0; j < right; j++) {
      		  z[l+1][j] = 0;
      	     for(int i = 0; i < left; i++)
        	z[l+1][j] += w[l][i][j] * a[l][i];

      	      if ( l < L-1 && j == 0 )  //hidden layer
        	a[l+1][j] = 1;          //bias
             else
        	a[l+1][j] = f( z[l+1][j] );
    	    }
  	  }
 	  for (int k = 0; k < K; k++)
    	     y[k] = a[L][k];

  	  return y;  //return output
      }

Backward Propagation

//Backward propagation, yd is the desired output void MLP::backward(double yd[], double x[]) { double delta[L][N]; //hidden layers //output delta for (int k = 0; k < K; k++) { double e = yd[k] - y[k]; //output layer error delta[L-1][k] = e * fd(z[L][k]); } for(int l = L - 1; l >= 1; l--){ int left = sizes[l]; //size of left layer int right = sizes[l+1]; //size of right layer for (int j = 0; j < left; j++) { delta[l-1][j] = 0; for (int k = 0; k < right; k++) delta[l-1][j] += w[l][j][k] * delta[l][k]; delta[l-1][j] *= fd( z[l][j] ); } } //Update weights for (int l = 0; l < L; l++) { int left = sizes[l]; int right = sizes[l+1]; for(int i = 0; i < left; i++) for(int j = 0; j < right; j++) w[l][i][j] += eta * delta[l][j] * a[l][i]; } }

C/C++ Implementation


// mlp2.h
#ifndef __MLP2_H__
#define __MLP2_H__
#include <vector>
#include <array>

using namespace std;

//maximum number of nodes allowed in a layer
#define maxNodes 30	
const int n1 =  9;  //number of inputs and bias
const int m1 = 15;  //number of hidden nodes and bias per layer
const int H =  3;   //number of hidden layers
const int K =  7;   //number of outputs
const int L = H + 1; //totally L + 1 layers: 0 --> L

struct database {
  vector<array<double, n1>> inputs;
  vector<array<double, K>> labels;
};

class MLP
{
private:
    double w[L][maxNodes][maxNodes];  //L weights: 0, ..., L-1 
    double z[L+1][maxNodes];   //linear sum of products of weights and 'inputs'
    double a[L+1][maxNodes];   //value of neuron f(z[j])
    double y[K];   	//predicted output
    int sizes[L+1];     //size of each layer
    double eta; 	//learning rate
public:
    MLP(double learning_rate);
    double f(double x);
    double fd(double x);  	  //Derivative of sigmoid function
    double* forward(double x[]);  //Forward propagation
    void backward(double yd[], double x[]);  //Backward prop, yd is desired output
    void train(const database &db, int epochs);  //Train the Perceptron
    ~MLP();
    int  saveWeights(char fname[]);
    int  readWeights(char fname[]);
};
#endif


// mlp2.cpp
// https://forejune.co/cuda/

#include <cmath>
#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include "mlp2.h"

using namespace std;

// constructor
MLP::MLP(double learning_rate) 
{
  if ( n1 > maxNodes || m1 > maxNodes || K > maxNodes){
	  cout << "A layer has exceeded max number of nodes allowed!" << endl;
	  exit( 1 );
  }
  eta = learning_rate;
  sizes[0] = n1;	//size of input layer
  for (int i = 1; i <= H; i++)
    sizes[i] = m1;
  sizes[L] = K;		//size of output layer

  for (int l = 0; l < L; l++) {
      for (int i = 0; i < maxNodes; i++)
        for (int j = 0; j < maxNodes; j++){ 
	    w[l][i][j] = rand() % 1000 / 1000.0;  
	    if ( rand() % 2 == 0 )
               w[l][i][j] = -w[l][i][j];
	}
   }
}

//Sigmoid activation function
double MLP::f(double x) 
{
      return 1.0 / (1.0 + exp(-x));
}

//Derivative of sigmoid function
double MLP::fd(double x) 
{
     double s = f(x);
     
     return s * (1 - s);
}

//Forward propagation
double* MLP::forward(double x[]) 
{
  for(int i = 0; i < n1; i++) 
    a[0][i] = z[0][i] = x[i];

  for(int l = 0; l < L; l++) {
    int left = sizes[l];	//size of left side layer
    int right = sizes[l+1]; 	//size of right side layer
    for(int j = 0; j < right; j++) {
      z[l+1][j] = 0;
      for(int i = 0; i < left; i++)
	z[l+1][j] += w[l][i][j] * a[l][i]; 
      if ( l < L-1 && j == 0 )	//hidden layer bias
	a[l+1][j] = 1;		
      else			
        a[l+1][j] = f( z[l+1][j] );
    }
  }

  for (int k = 0; k < K; k++)
    y[k] = a[L][k];

  return y;  //return output
}

//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
    double delta[L][maxNodes]; //hidden layers

   for (int k = 0; k < K; k++) {
      double e = yd[k] - y[k];  //output layer error
      delta[L-1][k] = e * fd(z[L][k]);	// can use fd(a[L][k]) also
   }

   for(int l = L - 1; l >= 1; l--){
     int left = sizes[l];	//size of left layer
     int right = sizes[l+1];	//size of right layer
     for (int j = 0; j < left; j++) {
       	delta[l-1][j] = 0;
       	for (int k = 0; k < right; k++)
	  delta[l-1][j] += w[l][j][k] * delta[l][k];
	delta[l-1][j] *= fd( z[l][j] );
     }
   }

   //Update weights 
   for (int l = 0; l < L; l++) {
     int left = sizes[l];
     int right = sizes[l+1];

     for(int i = 0; i < left; i++)
       for(int j = 0; j < right; j++) 
            w[l][i][j] += eta * delta[l][j] * a[l][i];
     
   }
} 

// Train the perceptron
// eta = learning rate
// epochs = number of times of training 
// numSamples = number of different input sets (x_data)
// yd is target output
void MLP::train(const database &db, int epochs)
{
    double x[n1];	//inputs
    double yd[K];	//desired output
   int numSamples = db.inputs.size();
   cout << "numSamples=" << numSamples << endl;
    for (int epoch = 0; epoch < epochs; epoch++){
      for (int k = 0; k < numSamples; k++) {
	 for (int i = 0; i < n1; i++)
	   x[i] = db.inputs[k][i];
	 for (int i = 0; i < K; i++)
	   yd[i] = db.labels[k][i]; 
	 forward( x );
         backward(yd, x);
      }
    }
}

int  MLP::saveWeights(char fname[])
{
	
  FILE *fp;

  if ( (fp = fopen(fname, "wt")) == NULL ) {
    printf("\nError opening file %s\n", fname);
    return -1;
  }

  for (int l = 0; l < L; l++) {
     int left = sizes[l];
     int right = sizes[l+1];

     for(int i = 0; i < left; i++) {
       for(int j = 0; j < right; j++)
         fprintf(fp, "%6.3f ", w[l][i][j]);
        fprintf(fp, "\n");
      }
   }

  fclose( fp );

  return 1;
}

int  MLP::readWeights(char fname[])
{
  FILE *fp;

  if ( (fp = fopen(fname, "rt")) == NULL ) {
    printf("\nEroor opening file %s\n", fname);
    return -1;
  }
  
  for (int l = 0; l < L; l++) {
     int left = sizes[l];
     int right = sizes[l+1];

     for(int i = 0; i < left; i++) {
       for(int j = 0; j < right; j++)
         fscanf(fp, "%lf ", &w[l][i][j]);
     }
   }
  
  fclose( fp );

  return 1;
}

MLP::~MLP()
{
}

Testing Routine

https://forejune.co/cuda/ai/nimmlp/nimmlp.html


// nimmlp.cpp -- testing an MLP with nim
// https://forejune.co/cuda/

#include "mlp2.h"
#include <iostream>

using namespace std;

#define NMAX 255

//nim game using dynamic programming
int getScore(int n, int S[])
{
  if (n == 1)
     S[n] = -1;         //got last block
  if (S[n] != 0)
    return S[n];
  int m = n / 2;	//floor of n/2.0
  for (int i = 1; i <= m; i++) {
    int nextN = n - i;
    if (getScore(nextN, S)  == -1) {	//found a path the other player loses
       S[n] = 1;
       return S[n];
    }
  }

  S[n] = -1;

  return S[n];
}

//choose a winning move i if one exists, otherwise pick a random legal move
int bestMove(int n, int score[]) 
{
  int m = n / 2;
  if ( m < 1 )
    m = 1;
  for (int i = 1; i <= m; i++) {
     if (getScore(n - i, score) == -1) 
	return i;  //choose move so that the other player loses
  }

  return (rand() % m + 1); //no winning move; pick any random legal move
}

// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
  int numSamples = db.inputs.size();
  array<double, n1>tempi;
  array<double, K>tempt;

  for (int i = 0; i < n1; i++)	//n1 = 19, K = 9
    tempi[i] = in[i];

  db.inputs.push_back( tempi );

  for (int i = 0; i < K; i++)
    tempt[i] = targets[i];

  db.labels.push_back( tempt );
}

//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value,  double a[])
{
  int i = 0;
  while ( i < size ) {
    a[i] = (double) (value & 1);  //bitwise and
    value >>= 1;                  //shift right by 1 bit
    i++;
  }
}

//maximum number of blocks n
void buildDatabase(int n, database &db)
{
  double inputs[n1], labels[K] = {0};
  int score[n] = {0};
  int move;

  inputs[0] = 1.0; 	//bias
  for (int i = 2; i <= n; i++) {
    move = bestMove(i, score);
    int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
    int2binArray(K, move, labels);	//convert to output pattern
    addDatabase(db, inputs, labels);
  }
}

// convert K output bits to an integer
int out2move(double out[K])
{
  double min = 0.2;
  int move = 0;
  for (int i = K-1; i >= 0; i--) {
    move <<= 1;
    if (out[i] > min) {
      move = move | 1;
    }
  }

  if ( move == 0 ) move = 1;

  return move;
}

int main()
{
   int n = NMAX;
   int score[NMAX+1] = {0};
   vector <int> diff;		//diffence between dp and mlp
   database db;
   
   buildDatabase(NMAX, db);

   MLP mlp(0.15);		
   cout << "Training the MLP ..." << endl;

   int iterations = 1000;	//number of epochs for training
   
   mlp.train(db, iterations);

   mlp.saveWeights( (char *) "nimWeights.txt" );
//   mlp.readWeights( (char *) "nimWeights.txt" );
   double *outs;
   double ins[n1];
   double labels[K] = {0};

   cout << "After training!" << endl;
   cout << "Checking: " << endl;

   int move, neuralMove;
   ins[0] = 1.0;	//bias
   for (int i = 2; i <= NMAX; i++){
      move = bestMove(i, score);	//move from DP
      int2binArray(n1-1, i,  &ins[1]); //obtain input pattern
      outs = mlp.forward( ins );
      neuralMove = out2move( outs );	//move from MLP
      if ( move != neuralMove )
	 diff.push_back ( i );
   }
   cout << "\ndiff size = " << diff.size() << ":" << endl;
   for (int i = 0; i < diff.size(); i++) 
     cout << diff[i] << ", ";
   cout << endl;

   return 0;
}