Implementing a Neural Network with Many Hidden Layers in C/C++

Materials available at: https://forejune.co/cuda/

Reviews

  • https://forejune.co/cuda/ai/mlp/mlp.html (Perceptron with one hidden layer)
  • Multi-Layer Perceptron (MLP)

    	L + 1 layers, e.g. L = 4
    

    Operations of a Neuron

    like a biological neuron

    	
    	
    
    	zi ~ logit
    	
    Forward Propagation
    
    	
    
    Implementation

    use fixed-size arrays ~ for clarity of presentation
    
    	int n1;  	//number of inputs and bias
    	int m1; 	//number of hidden nodes and bias per layer
    	int K;   	//number of outputs
    	int L;		//L+1 layers
    	int sizes[L+1];	//sizes of the layers: sizes[0] = n1, ..., sizes[L] = K 
    	double w[L][N][N];
    	double z[L+1][N];   //linear sum of products of weights and 'inputs'
        	double a[L+1][N];   //value of neuron g(z[j])
        	double y[K];   //predicted output
    	
    	//Forward propagation
    	double* MLP::forward(double x[])
    	{
      	  for(int i = 0; i < n1; i++)
        	    a[0][i] = z[0][i] = x[i];
    
    	  for(int l = 0; l < L; l++) {
      	     int left = sizes[l];        //size of left side layer
        	     int right = sizes[l+1];     //size of right side layer
        	     for(int j = 0; j < right; j++) {
          		  z[l+1][j] = 0;
          	     for(int i = 0; i < left; i++)
            	z[l+1][j] += w[l][i][j] * a[l][i];
    
          	      if ( l < L-1 && j == 0 )  //hidden layer
            	a[l+1][j] = 1;          //bias
                 else
            	a[l+1][j] = f( z[l+1][j] );
        	    }
      	  }
     	  for (int k = 0; k < K; k++)
        	     y[k] = a[L][k];
    
      	  return y;  //return output
          }
    	
    	

    Backward Propagation

    
    //Backward propagation, yd is the desired output
    void MLP::backward(double yd[], double x[])
    {
        double delta[L][N]; //hidden layers
    
       //output delta
       for (int k = 0; k < K; k++) {
          double e = yd[k] - y[k];  //output layer error
          delta[L-1][k] = e * fd(z[L][k]);
       }
       for(int l = L - 1; l >= 1; l--){
         int left = sizes[l];    //size of left layer
         int right = sizes[l+1]; //size of right layer
         for (int j = 0; j < left; j++) {
            delta[l-1][j] = 0;
            for (int k = 0; k < right; k++)
              delta[l-1][j] += w[l][j][k] * delta[l][k];
            delta[l-1][j] *= fd( z[l][j] );
         }
       }
    
       //Update weights 
       for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++)
           for(int j = 0; j < right; j++)
              w[l][i][j] += eta * delta[l][j] * a[l][i];
    
       }
    }
    	
    	

    	
    C/C++ Implementation

    
    // mlp2.h
    #ifndef __MLP2_H__
    #define __MLP2_H__
    #include <vector>
    #include <array>
    
    using namespace std;
    
    //maximum number of nodes allowed in a layer
    #define maxNodes 30	
    const int n1 =  9;  //number of inputs and bias
    const int m1 = 15;  //number of hidden nodes and bias per layer
    const int H =  3;   //number of hidden layers
    const int K =  7;   //number of outputs
    const int L = H + 1; //totally L + 1 layers: 0 --> L
    
    struct database {
      vector<array<double, n1>> inputs;
      vector<array<double, K>> labels;
    };
    
    class MLP
    {
    private:
        double w[L][maxNodes][maxNodes];  //L weights: 0, ..., L-1 
        double z[L+1][maxNodes];   //linear sum of products of weights and 'inputs'
        double a[L+1][maxNodes];   //value of neuron f(z[j])
        double y[K];   	//predicted output
        int sizes[L+1];     //size of each layer
        double eta; 	//learning rate
    public:
        MLP(double learning_rate);
        double f(double x);
        double fd(double x);  	  //Derivative of sigmoid function
        double* forward(double x[]);  //Forward propagation
        void backward(double yd[], double x[]);  //Backward prop, yd is desired output
        void train(const database &db, int epochs);  //Train the Perceptron
        ~MLP();
        int  saveWeights(char fname[]);
        int  readWeights(char fname[]);
    };
    #endif
    	
    
    // mlp2.cpp
    // https://forejune.co/cuda/
    
    #include <cmath>
    #include <stdlib.h>
    #include <stdio.h>
    #include <iostream>
    #include "mlp2.h"
    
    using namespace std;
    
    // constructor
    MLP::MLP(double learning_rate) 
    {
      if ( n1 > maxNodes || m1 > maxNodes || K > maxNodes){
    	  cout << "A layer has exceeded max number of nodes allowed!" << endl;
    	  exit( 1 );
      }
      eta = learning_rate;
      sizes[0] = n1;	//size of input layer
      for (int i = 1; i <= H; i++)
        sizes[i] = m1;
      sizes[L] = K;		//size of output layer
    
      for (int l = 0; l < L; l++) {
          for (int i = 0; i < maxNodes; i++)
            for (int j = 0; j < maxNodes; j++){ 
    	    w[l][i][j] = rand() % 1000 / 1000.0;  
    	    if ( rand() % 2 == 0 )
                   w[l][i][j] = -w[l][i][j];
    	}
       }
    }
    
    //Sigmoid activation function
    double MLP::f(double x) 
    {
          return 1.0 / (1.0 + exp(-x));
    }
    
    //Derivative of sigmoid function
    double MLP::fd(double x) 
    {
         double s = f(x);
         
         return s * (1 - s);
    }
    
    //Forward propagation
    double* MLP::forward(double x[]) 
    {
      for(int i = 0; i < n1; i++) 
        a[0][i] = z[0][i] = x[i];
    
      for(int l = 0; l < L; l++) {
        int left = sizes[l];	//size of left side layer
        int right = sizes[l+1]; 	//size of right side layer
        for(int j = 0; j < right; j++) {
          z[l+1][j] = 0;
          for(int i = 0; i < left; i++)
    	z[l+1][j] += w[l][i][j] * a[l][i]; 
          if ( l < L-1 && j == 0 )	//hidden layer bias
    	a[l+1][j] = 1;		
          else			
            a[l+1][j] = f( z[l+1][j] );
        }
      }
    
      for (int k = 0; k < K; k++)
        y[k] = a[L][k];
    
      return y;  //return output
    }
    
    //Backward propagation, yd is the desired output
    void MLP::backward(double yd[], double x[])
    {
        double delta[L][maxNodes]; //hidden layers
    
       for (int k = 0; k < K; k++) {
          double e = yd[k] - y[k];  //output layer error
          delta[L-1][k] = e * fd(z[L][k]);	// can use fd(a[L][k]) also
       }
    
       for(int l = L - 1; l >= 1; l--){
         int left = sizes[l];	//size of left layer
         int right = sizes[l+1];	//size of right layer
         for (int j = 0; j < left; j++) {
           	delta[l-1][j] = 0;
           	for (int k = 0; k < right; k++)
    	  delta[l-1][j] += w[l][j][k] * delta[l][k];
    	delta[l-1][j] *= fd( z[l][j] );
         }
       }
    
       //Update weights 
       for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++)
           for(int j = 0; j < right; j++) 
                w[l][i][j] += eta * delta[l][j] * a[l][i];
         
       }
    } 
    
    // Train the perceptron
    // eta = learning rate
    // epochs = number of times of training 
    // numSamples = number of different input sets (x_data)
    // yd is target output
    void MLP::train(const database &db, int epochs)
    {
        double x[n1];	//inputs
        double yd[K];	//desired output
       int numSamples = db.inputs.size();
       cout << "numSamples=" << numSamples << endl;
        for (int epoch = 0; epoch < epochs; epoch++){
          for (int k = 0; k < numSamples; k++) {
    	 for (int i = 0; i < n1; i++)
    	   x[i] = db.inputs[k][i];
    	 for (int i = 0; i < K; i++)
    	   yd[i] = db.labels[k][i]; 
    	 forward( x );
             backward(yd, x);
          }
        }
    }
    
    int  MLP::saveWeights(char fname[])
    {
    	
      FILE *fp;
    
      if ( (fp = fopen(fname, "wt")) == NULL ) {
        printf("\nError opening file %s\n", fname);
        return -1;
      }
    
      for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++) {
           for(int j = 0; j < right; j++)
             fprintf(fp, "%6.3f ", w[l][i][j]);
            fprintf(fp, "\n");
          }
       }
    
      fclose( fp );
    
      return 1;
    }
    
    int  MLP::readWeights(char fname[])
    {
      FILE *fp;
    
      if ( (fp = fopen(fname, "rt")) == NULL ) {
        printf("\nEroor opening file %s\n", fname);
        return -1;
      }
      
      for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++) {
           for(int j = 0; j < right; j++)
             fscanf(fp, "%lf ", &w[l][i][j]);
         }
       }
      
      fclose( fp );
    
      return 1;
    }
    
    MLP::~MLP()
    {
    }

    Testing Routine

    
    // nimmlp.cpp -- testing an MLP with nim
    // https://forejune.co/cuda/
    
    #include "mlp2.h"
    #include <iostream>
    
    using namespace std;
    
    #define NMAX 255
    
    //nim game using dynamic programming
    int getScore(int n, int S[])
    {
      if (n == 1)
         S[n] = -1;         //got last block
      if (S[n] != 0)
        return S[n];
      int m = n / 2;	//floor of n/2.0
      for (int i = 1; i <= m; i++) {
        int nextN = n - i;
        if (getScore(nextN, S)  == -1) {	//found a path the other player loses
           S[n] = 1;
           return S[n];
        }
      }
    
      S[n] = -1;
    
      return S[n];
    }
    
    //choose a winning move i if one exists, otherwise pick a random legal move
    int bestMove(int n, int score[]) 
    {
      int m = n / 2;
      if ( m < 1 )
        m = 1;
      for (int i = 1; i <= m; i++) {
         if (getScore(n - i, score) == -1) 
    	return i;  //choose move so that the other player loses
      }
    
      return (rand() % m + 1); //no winning move; pick any random legal move
    }
    
    // Add to the database a set of inputs and targeted outputs
    void addDatabase(database &db, double in[], double targets[])
    {
      int numSamples = db.inputs.size();
      array<double, n1>tempi;
      array<double, K>tempt;
    
      for (int i = 0; i < n1; i++)	//n1 = 19, K = 9
        tempi[i] = in[i];
    
      db.inputs.push_back( tempi );
    
      for (int i = 0; i < K; i++)
        tempt[i] = targets[i];
    
      db.labels.push_back( tempt );
    }
    
    //convert an integer value to a binary array a of size bits
    void int2binArray(int size, int value,  double a[])
    {
      int i = 0;
      while ( i < size ) {
        a[i] = (double) (value & 1);  //bitwise and
        value >>= 1;                  //shift right by 1 bit
        i++;
      }
    }
    
    //maximum number of blocks n
    void buildDatabase(int n, database &db)
    {
      double inputs[n1], labels[K] = {0};
      int score[n] = {0};
      int move;
    
      inputs[0] = 1.0; 	//bias
      for (int i = 2; i <= n; i++) {
        move = bestMove(i, score);
        int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
        int2binArray(K, move, labels);	//convert to output pattern
        addDatabase(db, inputs, labels);
      }
    }
    
    // convert K output bits to an integer
    int out2move(double out[K])
    {
      double min = 0.2;
      int move = 0;
      for (int i = K-1; i >= 0; i--) {
        move <<= 1;
        if (out[i] > min) {
          move = move | 1;
        }
      }
    
      if ( move == 0 ) move = 1;
    
      return move;
    }
    
    int main()
    {
       int n = NMAX;
       int score[NMAX+1] = {0};
       vector <int> diff;		//diffence between dp and mlp
       database db;
       
       buildDatabase(NMAX, db);
    
       MLP mlp(0.15);		
       cout << "Training the MLP ..." << endl;
    
       int iterations = 1000;	//number of epochs for training
       
       mlp.train(db, iterations);
    
       mlp.saveWeights( (char *) "nimWeights.txt" );
    //   mlp.readWeights( (char *) "nimWeights.txt" );
       double *outs;
       double ins[n1];
       double labels[K] = {0};
    
       cout << "After training!" << endl;
       cout << "Checking: " << endl;
    
       int move, neuralMove;
       ins[0] = 1.0;	//bias
       for (int i = 2; i <= NMAX; i++){
          move = bestMove(i, score);	//move from DP
          int2binArray(n1-1, i,  &ins[1]); //obtain input pattern
          outs = mlp.forward( ins );
          neuralMove = out2move( outs );	//move from MLP
          if ( move != neuralMove )
    	 diff.push_back ( i );
       }
       cout << "\ndiff size = " << diff.size() << ":" << endl;
       for (int i = 0; i < diff.size(); i++) 
         cout << diff[i] << ", ";
       cout << endl;
    
       return 0;
    }