Implementing a Multi-Layer Perception Using Vectors in C/C++

Materials available at: https://forejune.co/cuda/

Reviews

  • https://forejune.co/cuda/ai/mlp2/mlp2.html (Neural Network in C/C++)
  • Multi-Layer Perceptron (MLP)

    	L + 1 layers, e.g. L = 4
    

    C/C++ Implementation

           	fixed-size arrays  ~  for clarity of presentation
      	vectors ~ flexibility
          
    
    // mlp3.h
    // https://forejune.co/cuda/
    
    #ifndef __MLP3_H__
    #define __MLP3_H__
    #include <vector>
    
    using namespace std;
    
    //maximum number of nodes allowed in a layer
    #define maxNodes 30
    
    struct database {
      vector< vector< double>> inputs;	//n1 inputs
      vector< vector< double>> labels;	//K outputs
    };
    	
    class MLP
    {
    private:
        //default values
        double eta = 0.15; 	//learning rate
        int n1 =  9;  //number of inputs and bias
        int m1 = 15;  //number of hidden nodes and bias per layer
        int H =  3;   //number of hidden layers
        int K =  7;   //number of outputs
        int L = H + 1; //totally L + 1 layers: 0 --> L
        vector< vector< vector< double>>> w; //weights  w[L][][]
        vector< vector< double>>z;	//linear sum of products of weights and 'inputs'z[L+1][]
        vector< vector< double>>a;	//value of nuuron f(z[j]} a[L+1][]
        vector< double> y;		//predicted output y[K]
        vector< int>sizes;		//sizes of each layer sizes[L+1]
    
    public:
        MLP();			//default constructor
        MLP(double learning_rate);
        MLP(double learning_rate, int n10, int K0, vector< int>hidden);
        double f(double x);
        double fd(double x);  	  //Derivative of sigmoid function
        double* forward(double x[]);  //Forward propagation
        void backward(double yd[], double x[]);  //Backward prop, yd is desired output
        void train(const database &db, int epochs);  //Train the Perceptron
        ~MLP();
        int inputSize();	//input size of MLP (n1)
        int outputSize();	//output size of MLP (K)
        int saveWeightsAndParam(char fname[]);
        int readWeightsAndParam(char fname[]);
    };
    #endif	
    
    // mlp3.cpp
    // https://forejune.co/cuda/
    
    #include <cmath>
    #include <stdlib.h>
    #include <stdio.h>
    #include <iostream>
    #include "mlp3.h"
    
    using namespace std;
    
    void set_yzaCapacity( vector< double> &y,  vector< vector< double>> &z, vector< vector< double>> &a,   
    		const vector< int> &sizes, const int L)
    {
      int K = sizes[L];	//output size
      y.reserve( K );	//set capacity of y to K, i.e. y[K]
      z.reserve( L+1 );	// number of rows
      for (int l = 0; l < L + 1; l++){
        vector< double>row;
        row.reserve(sizes[l]);
        z.push_back(move(row));
       }
    
      a.reserve( L+1 );	// number of rows
      for (int l = 0; l < L + 1; l++){
        vector< double>row;
        row.reserve(sizes[l]);
        a.push_back(move(row));
       }
    }
    
    void setWeightCapacity( vector< vector< vector< double>>> &w, const vector< int> &sizes, const int L)
    {
      w.reserve( L );
      for ( int l = 0; l < L; l++) {
        vector< vector< double>>row;
        row.reserve( sizes[l] );
        for(int i = 0; i < sizes[l]; i++){
           vector< double>col;
           col.reserve( sizes[l+1] );
           row.push_back(move(col));
        }
        w.push_back( move(row) );
      }
    }
    
    void initializeWeights( vector< vector< vector< double>>> &w, const vector< int> &sizes, const int L)
    {  
      for (int l = 0; l < L; l++) {
          for (int i = 0; i < sizes[l]; i++)
            for (int j = 0; j < sizes[l+1]; j++){ 
    	    w[l][i][j] = rand() % 1000 / 1000.0;  
    	    if ( rand() % 2 == 0 )
                   w[l][i][j] = -w[l][i][j];
    	}
       }
    }
    
    // constructors
    MLP::MLP() 
    {
    }
    
    MLP::MLP(double learning_rate) 
    {
      if ( n1 > maxNodes || m1 > maxNodes || K > maxNodes){
    	  cout << "A layer has exceeded max number of nodes allowed!" << endl;
    	  exit( 1 );
      }
      eta = learning_rate;
    
      sizes.push_back( n1 );
      for (int i = 0; i < H; i++)
        sizes.push_back( m1 );  // sizes[i] = m1;
      sizes.push_back( K );     //  sizes[L] = K; size of output layer
      setWeightCapacity(w, sizes, L);
      set_yzaCapacity(y, z, a, sizes, L);
      initializeWeights(w, sizes, L);
    }
    
    MLP::MLP(double learning_rate, int n10, int K0, vector< int>hidden) 
    {
      H = hidden.size();
      if ( n1 > maxNodes || K > maxNodes){
    	  cout << "Input or Output layer has exceeded max number of nodes allowed!" << endl;
    	  exit( 1 );
      }
      for (int i = 0; i < H; i++){
         if ( hidden[i] > maxNodes ) {
    	  cout << "A hidden layer has exceeded max number of nodes allowed!" << endl;
    	  exit( 1 );
         }
      }
      eta = learning_rate;
      n1 = n10;
      K = K0;
      sizes.push_back( n1 );
      for (int i = 0; i < H; i++)
        sizes.push_back( hidden[i] );  // sizes[i] = m1;
      sizes.push_back( K );  //  sizes[L] = K;  size of output layer
    
      L = H + 1;
      setWeightCapacity(w, sizes, L);
      set_yzaCapacity(y, z, a, sizes, L);
      initializeWeights(w, sizes, L);
    }
    
    //Sigmoid activation function
    double MLP::f(double x) 
    {
          return 1.0 / (1.0 + exp(-x));
    }
    
    //Derivative of sigmoid function
    double MLP::fd(double x) 
    {
         double s = f(x);
         
         return s * (1 - s);
    }
    
    
    //Forward propagation
    double* MLP::forward(double x[]) 
    {
      for(int i = 0; i < n1; i++) 
        a[0][i] = z[0][i] = x[i];
    
      for(int l = 0; l < L; l++) {
        int left = sizes[l];	//size of left side layer
        int right = sizes[l+1]; 	//size of right side layer
        for(int j = 0; j < right; j++) {
          z[l+1][j] = 0;
          for(int i = 0; i < left; i++)
    	z[l+1][j] += w[l][i][j] * a[l][i]; 
          if ( l < L-1 && j == 0 )	//hidden layer bias
    	a[l+1][j] = 1;		
          else			
            a[l+1][j] = f( z[l+1][j] );
        }
      }
    
      for (int k = 0; k < K; k++)
        y[k] = a[L][k];
    
      return (double *) &y[0];  //return output
    }
    
    
    //Backward propagation, yd is the desired output
    void MLP::backward(double yd[], double x[])
    {
       double delta[L][maxNodes]; //hidden layers
    			      
       for (int k = 0; k < K; k++) {
          double e = yd[k] - y[k];  //output layer error
          delta[L-1][k] = e * fd(z[L][k]);	// can use fd(a[L][k]) also
       }
    
       for(int l = L - 1; l >= 1; l--){
         int left = sizes[l];	//size of left layer
         int right = sizes[l+1];	//size of right layer
         for (int j = 0; j < left; j++) {
           	delta[l-1][j] = 0;
           	for (int k = 0; k < right; k++)
    	  delta[l-1][j] += w[l][j][k] * delta[l][k];
    	delta[l-1][j] *= fd( z[l][j] );
         }
       }
    
       //Update weights 
       for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++)
           for(int j = 0; j < right; j++) 
                w[l][i][j] += eta * delta[l][j] * a[l][i];
         
       }
    } 
    
    // Train the perceptron
    // eta = learning rate
    // epochs = number of times of training 
    // numSamples = number of different input sets (x_data)
    // yd is target output
    void MLP::train(const database &db, int epochs)
    {
        double x[n1];	//inputs
        double yd[K];	//desired output
       int numSamples = db.inputs.size();
       cout << "numSamples=" << numSamples << endl;
        for (int epoch = 0; epoch < epochs; epoch++){
          for (int k = 0; k < numSamples; k++) {
    	 for (int i = 0; i < n1; i++)
    	   x[i] = db.inputs[k][i];
    	 for (int i = 0; i < K; i++)
    	   yd[i] = db.labels[k][i]; 
    	 forward( x );
             backward(yd, x);
          }
        }
    }
    
    int  MLP::inputSize()
    {
      return n1;
    }
    
    int  MLP::outputSize()
    {
      return K;
    }
    
    // save weights and parameters
    int  MLP::saveWeightsAndParam(char fname[])
    {
    	
      FILE *fp;
    
      if ( (fp = fopen(fname, "wt")) == NULL ) {
        printf("\nError opening file %s\n", fname);
        return -1;
      }
    
      fprintf(fp, "%6.3f \n", eta);
      fprintf(fp, "%d\n", L);
      for (int l = 0; l < L+1; l++) 
       fprintf(fp, "%d ", sizes[l]);
      fprintf(fp, "\n");
    
      for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++) {
           for(int j = 0; j < right; j++)
             fprintf(fp, "%7.4f ", w[l][i][j]);
            fprintf(fp, "\n");
          }
       }
    
      fclose( fp );
    
      return 1;
    }
    
    int  MLP::readWeightsAndParam(char fname[])
    {
      FILE *fp;
    
      if ( (fp = fopen(fname, "rt")) == NULL ) {
        printf("\nEroor opening file %s\n", fname);
        return -1;
      }
      
      fscanf(fp, "%lf \n", &eta);
      fscanf(fp, "%d", &L);
      for (int l = 0; l < L+1; l++){
        int sz;  
        fscanf(fp, "%d ", &sz);
        sizes.push_back( sz );
      }
      n1 = sizes[0];
      K = sizes[L];
    
      setWeightCapacity(w, sizes, L);
      for (int l = 0; l < L; l++) {
         int left = sizes[l];
         int right = sizes[l+1];
    
         for(int i = 0; i < left; i++) {
           for(int j = 0; j < right; j++)
             fscanf(fp, "%lf ", &w[l][i][j]);
         }
       }
      
      fclose( fp );
      set_yzaCapacity(y, z, a, sizes, L);
    
      return 1;
    }
    
    MLP::~MLP()
    {
    }

    Testing Routine

    
    // nimmlp.cpp -- testing an MLP with nim
    // https://forejune.co/cuda/
    
    #include "mlp3.h"
    #include <iostream>
    #include <cmath>
    
    using namespace std;
    
    //nim game using dynamic programming
    int getScore(int n, int S[])
    {
      if (n == 1)
         S[n] = -1;         //got last block
      if (S[n] != 0)
        return S[n];
      int m = n / 2;	//floor of n/2.0
      for (int i = 1; i <= m; i++) {
        int nextN = n - i;
        if (getScore(nextN, S)  == -1) {	//found a path the other player loses
           S[n] = 1;
           return S[n];
        }
      }
    
      S[n] = -1;
    
      return S[n];
    }
    
    //choose a winning move i if one exists, otherwise pick a random legal move
    int bestMove(int n, int score[]) 
    {
      int m = n / 2;
      if ( m < 1 )
        m = 1;
      for (int i = 1; i <= m; i++) {
         if (getScore(n - i, score) == -1) 
    	return i;  //choose move so that the other player loses
      }
    
      return 1;	// no winning move
    }
    
    // Add to the database a set of inputs and targeted outputs
    void addDatabase(database &db, double in[], double targets[], int n1, int K)
    {
      int numSamples = db.inputs.size();
      vector< double>tempi(n1);
      vector< double>tempt(K);
    
      for (int i = 0; i < n1; i++)	//n1 = 19, K = 9
        tempi[i] = in[i];
    
      db.inputs.push_back( tempi );
    
      for (int i = 0; i < K; i++)
        tempt[i] = targets[i];
    
      db.labels.push_back( tempt );
    }
    
    //convert an integer value to a binary array a of size bits
    void int2binArray(int size, int value,  double a[])
    {
      int i = 0;
      while ( i < size ) {
        a[i] = (double) (value & 1);  //bitwise and
        value >>= 1;                  //shift right by 1 bit
        i++;
      }
    }
    
    //maximum number of blocks n
    void buildDatabase(int n, database &db, int n1, int K)
    {
      double inputs[n1], labels[K] = {0};
      int score[n] = {0};
      int move;
    
      inputs[0] = 1.0; 	//bias
      for (int i = 2; i <= n; i++) {
        move = bestMove(i, score);
        int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
        int2binArray(K, move, labels);	//convert to output pattern
        addDatabase(db, inputs, labels, n1, K);
      }
    }
    
    // convert K output bits to an integer
    int out2move(double out[], int K)
    {
      double min = 0.2;
      int move = 0;
      for (int i = K-1; i >= 0; i--) {
        move <<= 1;
        if (out[i] > min) {
          move = move | 1;
        }
      }
    
      if ( move == 0 ) move = 1;
    
      return move;
    }
    
    
    int main(int argc, char *argv[])
    {
       double learning_rate;
       int n1=9, K=7;
       int m1 = 15;
       vector< int>hsizes; // hidden layers sizes
       vector < int> diff; // diffence between dp and mlp
       database db;
       MLP *mlp;
    
       if ( argc < 2 ) {
         cout << "Usage: " << argv[0] << " learning_rate [n_inputs n_outputs n_hidden] " << endl;
         exit( 1 );
       } 
       learning_rate = atof ( argv[1] );
       if (learning_rate > 0) {
         if ( argc < 3 )
           mlp = new MLP ( learning_rate );		
         else {
           if (argc < 5) {
             hsizes.push_back ( m1 );		//use default value
           if (argc > 2)
             n1 = atoi( argv[2] );
           if (argc > 3)
             K = atoi( argv[3] );
           }
           for ( int i = 4; i < argc; i++)
             hsizes.push_back(atoi(argv[i])); 
           mlp = new MLP (learning_rate, n1, K, hsizes);		
         }
       } else {
         mlp = new MLP();
         mlp->readWeightsAndParam( (char *) "nimWeights.txt" );
       }
       n1 = mlp->inputSize();
       K = mlp->outputSize();
       db.inputs.reserve( n1 );
       db.labels.reserve( K );   
       int NMAX = (int) pow(2, n1-1) - 1;
       int score[NMAX+1] = {0};
       buildDatabase(NMAX, db, n1, K);
    
       if ( learning_rate > 0 ) {
         cout << "Training the MLP ..." << endl;
         int iterations = 1000;	//number of epochs for training
         mlp->train(db, iterations);
         mlp->saveWeightsAndParam( (char *) "nimWeights.txt" );
       }
    
       double *outs;
       double ins[n1];
       double labels[K] = {0};
    
       cout << "Checking: " << endl;
    
       int move, neuralMove;
       ins[0] = 1.0;	//bias
       for (int i = 2; i <= NMAX; i++){
          move = bestMove(i, score);	//move from DP
          int2binArray(n1-1, i,  &ins[1]); //obtain input pattern
          outs = mlp->forward( ins );
          neuralMove = out2move( outs, K );	//move from MLP
          if ( move != neuralMove ){
    	 diff.push_back ( i );
          }
       }
       cout << "\ndiff size = " << diff.size() << ":" << endl;
       for (int i = 0; i < diff.size(); i++) 
         cout << diff[i] << ", ";
       cout << endl;
    
       delete mlp;
    
       return 0;
    }