Training a Neural Network to Play a NIM Game in C/C++

Materials available at: https://forejune.co/cuda/

Reviews

  • https://forejune.co/cuda/ai/nim/nim.html (Nim)
  • https://forejune.co/cuda/ai/mlp/mlp.html (Perceptron)
  • Multi-Layer Perceptron (MLP) for NIM

    Binary Inputs and Outputs
    	
    	
    
    
    We consider initial n < 128 
      n = current number of blocks
      k = number of blocks to be removed
    
    A Simple way:
       use 128 binary inputs with n-th bit set to 1, others set to 0
    	63 binary outputs with k-th bit set to 1, others set to 0
       e.g. n = 5, k = 2
                                        x0 = 1 	(bias)
        x127 .... x7  x6  x5  x4  x3  x2  x1  	(number of current remaining blocks n)	
        0   0 ... 0   0   1   0   0   0   0
    Outputs:
        y63  ...   y5  y4  y3  y2  y1  y0   	(number of blocks to be removed k ≤ n/2)	
        0   ... 0  0   0   0   1   0   0
    
    Too many inputs and outputs, may take long time to train
    
    Better use binary representation of n and k
    Need 7 input bits
    and 6 output bits (because next move is ≤ n / 2)
    

    Inputs: x0 = 1 (bias) x7 x6 x5 x4 x3 x2 x1 (number of current remaining blocks n) Outputs: y5 y4 y3 y2 y1 y0 (number of blocks to be removed k ≤ n/2) Example:
     
    Example: n = 5, k = 2
    Inputs:
    	                1
    	    0 0 0 0 1 0 1	n
    Outputs:
    	      0 0 0 0 1 0	k

      
      // mlp.h
      #ifndef __MLP_H__
      #define __MLP_H__
      #include <vector>
      #include <array>
      
      using namespace std;
      
      //maximum number of blocks allowed
      #define NMAX 127
      const int n1 = 8;  //number of inputs and bias
      const int m1 = 15; //number of hidden nodes and bias
      const int K =  6;  //number of outputs 
      struct database {
        vector<array<double, n1>> inputs;	//double inputs[][n1];
        vector<array<double, K>> labels;	//double labels[][K];
      };
      
      class MLP
      {
      private:
          double w[n1][m1];
          double wo[m1][K];
          double a[m1];  //linear sum of products of inputs and weights
          double h[m1];  //hidden nodes h[j] = g(a[j])
          double y[K];   //predicted output
          double z[K];   //linear sum of products of weights and hidden nodes
          double eta; //learning rate
      public:
          MLP(double learning_rate);
          double g(double x);
          double gd(double x);  //Derivative of sigmoid function
          double* forward(double x[]);  //Forward propagation
          void backward(double yd[], double x[]);  //Backward prop, yd is desired output
          void train(const database &db, int epochs);  //Train the Perceptron
          ~MLP();
          int  saveWeights(char fname[]);
          int  readWeights(char fname[]);
      };
      #endif
      	
    Building a Database for Training
    // Add to the database a set of inputs and targeted outputs
    void addDatabase(database &db, double in[], double targets[])
    {
      array<double, n1>tempi;
      array<double, K>tempt;
    
      for (int i = 0; i < n1; i++)  //n1 = 19, K = 9
        tempi[i] = in[i];
    
      db.inputs.push_back( tempi );
    
      for (int i = 0; i < K; i++)
        tempt[i] = targets[i];
    
      db.labels.push_back( tempt );
    }
    
    
    //convert an integer value to a binary array a of size bits
    void int2binArray(int size, int value,  double a[])
    {
      int i = 0;
      while ( i < size ) {
        a[i] = (double) (value & 1);        //bitwise and
        value >>= 1;                //shift right by 1 bit
        i++;
      }
    }
    
    void buildDatabase(int n, database &db)
    {
    
      double inputs[n1], labels[K] = {0};
      int score[n] = {0};
      int move;
    
      inputs[0] = 1.0;      //bias
      for (int i = 2; i <= n; i++) {
        move = bestMove(i, score);
        int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
        int2binArray(K, move, labels);      //convert to output pattern
        addDatabase(db, inputs, labels);
      }
    }
    	
    C/C++ Implementation

    
    // nimmlp.cpp -- training an MLP to play nim
    // https://forejune.co/cuda/
    
    #include "mlp.h"
    #include <iostream>
    
    using namespace std;
    
    //nim game using dynamic programming
    int getScore(int n, int S[])
    {
      if (n == 1)
         S[n] = -1;         //got last block
      if (S[n] != 0)
        return S[n];
      int m = n / 2;	//floor of n/2.0
      for (int i = 1; i <= m; i++) {
        int nextN = n - i;
        if (getScore(nextN, S)  == -1) {	//found a path the other player loses
           S[n] = 1;
           return S[n];
        }
      }
    
      S[n] = -1;
    
      return S[n];
    }
    
    //choose a winning move i if one exists, otherwise pick a random legal move
    int bestMove(int n, int score[]) 
    {
      int m = n / 2;
      if ( m < 1 )
        m = 1;
      for (int i = 1; i <= m; i++) {
         if (getScore(n - i, score) == -1) 
    	return i;  //choose move so that the other player loses
      }
    
      return (rand() % m + 1); //no winning move; pick any random legal move
    }
    
    // Add to the database a set of inputs and targeted outputs
    void addDatabase(database &db, double in[], double targets[])
    {
      int numSamples = db.inputs.size();
      array< double, n1>tempi;
      array< double, K>tempt;
    
      for (int i = 0; i < n1; i++)	//n1 = 19, K = 9
        tempi[i] = in[i];
    
      db.inputs.push_back( tempi );
    
      for (int i = 0; i < K; i++)
        tempt[i] = targets[i];
    
      db.labels.push_back( tempt );
    }
    
    //convert an integer value to a binary array a of size bits
    void int2binArray(int size, int value,  double a[])
    {
      int i = 0;
      while ( i < size ) {
        a[i] = (double) (value & 1);	//bitwise and
        value >>= 1;		//shift right by 1 bit
        i++;
      }
    }
    
    //maximum number of blocks n
    void buildDatabase(int n, database &db)
    {
    
      double inputs[n1], labels[K] = {0};
      int score[n] = {0};
      int move;
    
      inputs[0] = 1.0; 	//bias
      for (int i = 2; i <= n; i++) {
        move = bestMove(i, score);
        int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
        int2binArray(K, move, labels);	//convert to output pattern
        addDatabase(db, inputs, labels);
      }
    }
    
    // convert K output bits to an integer
    int out2move(double out[K])
    {
      double min = 0.2;
      int move = 0;
      for (int i = K-1; i >= 0; i--) {
        move <<= 1;
        if (out[i] > min) {
          move = move | 1;
        }
      }
    
      if ( move == 0 ) move = 1;
    
      return move;
    }
    
    int main()
    {
       int n = NMAX;
       int score[NMAX+1] = {0};
       vector < int> diff;		//diffence between dp and mlp
       database db;
       
       buildDatabase(NMAX, db);
    
       cout << "Training the MLP ..." << endl;
       MLP mlp(0.12);		
    
       int iterations =  4000;		//number of epochs for training
       
       mlp.train(db, iterations);
    
       mlp.saveWeights( (char *) "nimWeights.txt" );
         
       double *outs;
       double ins[n1];
       double labels[K] = {0};
    
       cout << "After training" << endl;
       cout << "Checking: " << endl;
    
       int move, neuralMove;
       ins[0] = 1.0;	//bias
       for (int i = 2; i <= NMAX; i++){
          move = bestMove(i, score);	//move from DP
          int2binArray(n1-1, i,  &ins[1]); //obtain input pattern
          outs = mlp.forward( ins );
          neuralMove = out2move( outs );		//move from MLP
          cout << i << ": " << move << ", (" << neuralMove << ")\t";
          if ( (i+1)%4 == 0 ) cout << endl;
          if ( move != neuralMove )
    	 diff.push_back ( i );
       }
       cout << "\ndiff size = " << diff.size() << ":" << endl;
       for (int i = 0; i < diff.size(); i++) 
         cout << diff[i] << ", ";
       cout << endl;
    
       do {
          cout << "Enter number of blocks you want to start (2 to " << NMAX <<"): ";
          cin >> n;
        } while (n < 2 || n > NMAX);
    
        int n0 = n;
        cout << "AI moves first. Starting with n = " << n << endl;
    
       while ( true ) {
        // --- AI turn ---
        if (n == 1) {
           cout << "AI lost; you won!" << endl;
           break;
        }
    
          move = bestMove(n, score);	//move from DP
          int2binArray(n1-1, n,  &ins[1]); //obtain input pattern
          outs = mlp.forward( ins );
          neuralMove = out2move( outs );		//move from MLP
          n -= neuralMove;
          cout << "AI takes " << neuralMove << "(move: " << move << ")" << n << " remain" << endl;
        
          // Your turn
          if (n == 1) {
                cout << "You lost!" << endl;
                break;
           }
    
          int m = n / 2;
    
          for ( ; ; ){
    	cout << "Your turn. Enter number (1-" << m << "): ";
            cin >> move;
            if ( move < 1 || move > m ) {
              cout << "Invalid move; try again: \n";
              continue;
            } else
              break;
          }
          n -= move;
          cout << "You take " << move << ", " << n << " remain" << endl;
    
      }  //while true
    
      return 0;
    }
      

    	
    Using a GUI

    // testNimmlpg.cpp
    // https://forejune.co/cuda
    #include <GL/glut.h>
    #include <stdlib.h>
    #include <iostream>
    #include <math.h>
    #include "mlp.h"
    
    using namespace std;
    
    int window;
    int nBlocks = 0;	//current number of blocks in pile
    int takeN = 0;		//number of blocks to be removed
    int state = 0;		//state of the game
    char digits[4]; 	//holds digits the user enters
    int nDigits = 0;	//number of digits entered
    bool gameOver = false;	//tells if game is finished
    
    MLP mlp (0.12);
    
    /*  Initialize material property, light source, lighting model,
     *  and depth buffer.
     */
    void init(void) 
    {
       srand(time(0));
       GLfloat mat_specular[] = { 1.0, 1.0, 1.0, 1.0 };
       GLfloat mat_diffuse[] = { 1.0, 0.5, 0.5, 1.0 };
       GLfloat mat_shininess[] = { 50.0 };
       GLfloat light_position[] = { 1.0, 1.0, 1.0, 0.0 };
       GLfloat light[] = { 1.0, 0.8, 0.8 };
       GLfloat lmodel_ambient[] = { 0.8, 0.8, 0.8, 1.0 };
       glClearColor (1.0, 1.0, 1.0, 0.0);
       glShadeModel (GL_SMOOTH);
    
       glMaterialfv(GL_FRONT, GL_SPECULAR, mat_specular);
       glMaterialfv(GL_FRONT, GL_DIFFUSE, mat_diffuse);
       glMaterialfv(GL_FRONT, GL_SHININESS, mat_shininess);
       glLightfv(GL_LIGHT0, GL_POSITION, light_position);
    
       glLightfv(GL_LIGHT0, GL_DIFFUSE, light );
       glLightfv(GL_LIGHT0, GL_SPECULAR, light );
       glLightModelfv(GL_LIGHT_MODEL_AMBIENT, lmodel_ambient);
    
       glEnable(GL_LIGHTING);
       glEnable(GL_LIGHT0);
       glEnable(GL_DEPTH_TEST);
       glMatrixMode (GL_PROJECTION);
       glLoadIdentity();
       glOrtho (-10, 10, -10, 10, -10.0, 10.0);
       glMatrixMode(GL_MODELVIEW);
       mlp.readWeights((char*) "nimWeights.txt");
    }
    
    void message ( char *s )
    {
      char *p;
    
      for ( p = s; *p; p++ )
         glutBitmapCharacter (GLUT_BITMAP_TIMES_ROMAN_24, (int) *p);
    }
    
    void resetDigits()
    {
      nDigits = 0;
      for (int i = 0; i < 4; i++)
        digits[i] = 0;
    }
    
    void display(void)
    {
       glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
       char s[100];
    
       // display the blocks
       if ( nBlocks > 0 && nBlocks <= NMAX ) {
         float dx = 1.2;	//displacement in x-direction
         float dy = 1.2;	//displacement in y-direction
         int nRows = ceil(nBlocks/10.0);
         int k = 0;
         for (int i = 0; i < nRows; i++) {
             glLoadIdentity();
             glTranslatef ( -8, 8-i*dy, 0 );
           for (int j = 0; j < 10; j++ ){
             if ( j == 5 ) glTranslatef(dx, 0, 0);
             glTranslatef ( dx, 0, 0 );
             glutSolidSphere (0.5, 32, 48);
    	 k++;
    	 if ( k >= nBlocks ) break;
           }
         } 
       }
    
       //display messages
       glLoadIdentity();
       glRasterPos3f(-8, -7, 0);
      
       if ( state == 0 ) {	//initial state of game
            char *s = (char *) "Enter initial blocks (2-127):";
            message( s );
            if ( nDigits > 0 )
    	    message( digits );
       } else if ( state == 1 ) {	//game ready for play
    	sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); 
            message( s );
            glRasterPos3f(-8, -8, 0);
    	sprintf(s, "%s", "Press 'p' to play; AI moves first!");
            message( s );
        } else if (state == 2){	//AI
            sprintf(s, "%s %d", "You took ", takeN);
            message( s );
            glRasterPos3f(-8, -8, 0);
    	sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); 
            message( s );
            glRasterPos3f(-8, -9, 0);
    	if (nBlocks > 1) {
              sprintf(s, "%s", "Press 'a' for AI move!");
              message( s );
    	  resetDigits();
    	} else {
              sprintf(s, "%s", "You have won! Press 'r' to restart!");
              message( s );
    	  gameOver = true;
    	}
        } else if ( state == 3 ){		//Your move
            sprintf(s, "%s %d", "AI took ", takeN);
            message( s );
            glRasterPos3f(-8, -8, 0);
    	sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); 
            message( s );
            glRasterPos3f(-8, -9, 0);
    	if (nBlocks > 1) {
              sprintf(s, "%s %d %s", "Your move (1-", nBlocks/2, "):");
              message( s );
    	  message( digits );
    	} else {
    	  sprintf(s, "%s", "AI has won! Press 'r' to restart!");
    	  message ( s );
    	  gameOver = true;
    	}
       } 
       glFlush ();
    }
    
    void int2binArray(int size, int value,  double a[])
    {
      int i = 0;
      while ( i < size ) {
        a[i] = (double) (value & 1);        //bitwise and
        value >>= 1;                //shift right by 1 bit
        i++;
      }
    }
    
    int out2move(double out[K])
    {
      double min = 0.2;
      int move = 0;
      for (int i = K-1; i >= 0; i--) {
        move <<= 1;
        if (out[i] > min) 
          move = move | 1;
      }
    
      if ( move == 0 ) move = 1;
    
      return move;
    }
    
    
    int aiMove()
    {
       double *outs, ins[n1];
       int move;
    
       ins[0] = 1;		//bias
       int2binArray(n1-1, nBlocks, &ins[1]);
       outs = mlp.forward( ins );
       move = out2move( outs );
    
       return move;
    }
    
    void play()
    {
      takeN = aiMove();
       nBlocks -= takeN;
       state = 3;	// now your turn (AI moves first)
       resetDigits();
       glutPostRedisplay();
    }
    
    void reset()	//reset the game
    {
      nBlocks = 0;
      state = 0;
      resetDigits();
      gameOver = false;
      glutPostRedisplay();
    }
    
    int digitsValue()
    {
      int d = 0;
      for (int i = 0; i < nDigits; i++) {
        d *= 10;
        d +=  (digits[i] - '0');
      }
    
      return d;
    }
    
    void keyboard(unsigned char key, int x, int y)
    {
      if (key == 27) {	//escape key
            glutDestroyWindow(window);
            exit(0);
      }
      if ( key == 'r' ) {
        reset();
        return;
      }
      if ( gameOver )
        return;
      if ( key >= '0' && key <= '9' ) {
        if ( state == 0 || state == 3 ) {
          int maxDigits;
          if ( state == 0 ) maxDigits = 3;
          else maxDigits = 2;
          if ( nDigits < maxDigits) {
            digits[nDigits++] = key;
            digits[nDigits] = 0;	//form null-terminated string
            glutPostRedisplay();
          }
        }
        return;
      }
      switch(key) {
        case 'p':   //play game
    	if (state < 2 )	
              play();
            break;
        case '\r':
    	if ( nDigits == 0 ) break;
    	if (state == 0 ) {
    	  nBlocks = digitsValue();
    	  if (nBlocks < 2) break;	//initial states
              glutPostRedisplay();
    	  state = 1;
    	} else if (state == 3) {	//Your turn	
    	  takeN = digitsValue();
    	  if ( takeN < 1 || takeN > nBlocks/2){
    	      resetDigits();
                  glutPostRedisplay();
    	      break;
    	  }
    	  nBlocks -= takeN;
    	  digits[nDigits+1] = key;
              glutPostRedisplay();
    	  resetDigits();
    	  state = 2;		//next turn is AI
    	} 
    	break;
         case 'a':  	
    	if ( state == 2 ) {	//AI turn
    	  takeN = aiMove();
    	  nBlocks -= takeN;
    	  state = 3;	//next is your move
    	
              glutPostRedisplay();
    	}
    	break;
      }
    }
    
    int main(int argc, char** argv)
    {
       glutInit(&argc, argv);
       glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH);
       glutInitWindowSize (600, 600); 
       glutInitWindowPosition (100, 100);
       window = glutCreateWindow (argv[0]);
       init ();
       glutDisplayFunc(display); 
       glutKeyboardFunc(keyboard);
    
       glutMainLoop();
       return 0;
    }