Training a Neural Network to Play a NIM Game in C/C++

Materials available at: https://forejune.co/cuda/

Reviews

https://forejune.co/cuda/ai/nim/nim.html (Nim)

https://forejune.co/cuda/ai/mlp/mlp.html (Perceptron)

Multi-Layer Perceptron (MLP) for NIM

Binary Inputs and Outputs
	
	

We consider initial n < 128 
  n = current number of blocks
  k = number of blocks to be removed

A Simple way:
   use 128 binary inputs with n-th bit set to 1, others set to 0
	63 binary outputs with k-th bit set to 1, others set to 0
   e.g. n = 5, k = 2
                                    x₀ = 1 	(bias)
    x₁₂₇ .... x₇  x₆  x₅  x₄  x₃  x₂  x₁  	(number of current remaining blocks n)	
    0   0 ... 0   0   1   0   0   0   0
Outputs:
    y₆₃  ...   y₅  y₄  y₃  y₂  y₁  y₀   	(number of blocks to be removed k ≤ n/2)	
    0   ... 0  0   0   0   1   0   0

Too many inputs and outputs, may take long time to train

Better use binary representation of n and k
Need 7 input bits
and 6 output bits (because next move is ≤ n / 2)

Inputs:
                           x₀ = 1 	(bias)
    x₇  x₆  x₅  x₄  x₃  x₂  x₁  	        (number of current remaining blocks n)	
Outputs:
        y₅  y₄  y₃  y₂  y₁  y₀   	(number of blocks to be removed k ≤ n/2)	

Example:




    

Example: n = 5, k = 2
Inputs:
	                1
	    0 0 0 0 1 0 1	n
Outputs:
	      0 0 0 0 1 0	k

  
        
// mlp.h
#ifndef __MLP_H__
#define __MLP_H__
#include <vector>
#include <array>

using namespace std;

//maximum number of blocks allowed
#define NMAX 127
const int n1 = 8;  //number of inputs and bias
const int m1 = 15; //number of hidden nodes and bias
const int K =  6;  //number of outputs 
struct database {
  vector<array<double, n1>> inputs;	//double inputs[][n1];
  vector<array<double, K>> labels;	//double labels[][K];
};

class MLP
{
private:
    double w[n1][m1];
    double wo[m1][K];
    double a[m1];  //linear sum of products of inputs and weights
    double h[m1];  //hidden nodes h[j] = g(a[j])
    double y[K];   //predicted output
    double z[K];   //linear sum of products of weights and hidden nodes
    double eta; //learning rate
public:
    MLP(double learning_rate);
    double g(double x);
    double gd(double x);  //Derivative of sigmoid function
    double* forward(double x[]);  //Forward propagation
    void backward(double yd[], double x[]);  //Backward prop, yd is desired output
    void train(const database &db, int epochs);  //Train the Perceptron
    ~MLP();
    int  saveWeights(char fname[]);
    int  readWeights(char fname[]);
};
#endif

Building a Database for Training

// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
  array<double, n1>tempi;
  array<double, K>tempt;

  for (int i = 0; i < n1; i++)  //n1 = 19, K = 9
    tempi[i] = in[i];

  db.inputs.push_back( tempi );

  for (int i = 0; i < K; i++)
    tempt[i] = targets[i];

  db.labels.push_back( tempt );
}


//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value,  double a[])
{
  int i = 0;
  while ( i < size ) {
    a[i] = (double) (value & 1);        //bitwise and
    value >>= 1;                //shift right by 1 bit
    i++;
  }
}

void buildDatabase(int n, database &db)
{

  double inputs[n1], labels[K] = {0};
  int score[n] = {0};
  int move;

  inputs[0] = 1.0;      //bias
  for (int i = 2; i <= n; i++) {
    move = bestMove(i, score);
    int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
    int2binArray(K, move, labels);      //convert to output pattern
    addDatabase(db, inputs, labels);
  }
}

C/C++ Implementation


// nimmlp.cpp -- training an MLP to play nim
// https://forejune.co/cuda/

#include "mlp.h"
#include <iostream>

using namespace std;

//nim game using dynamic programming
int getScore(int n, int S[])
{
  if (n == 1)
     S[n] = -1;         //got last block
  if (S[n] != 0)
    return S[n];
  int m = n / 2;	//floor of n/2.0
  for (int i = 1; i <= m; i++) {
    int nextN = n - i;
    if (getScore(nextN, S)  == -1) {	//found a path the other player loses
       S[n] = 1;
       return S[n];
    }
  }

  S[n] = -1;

  return S[n];
}

//choose a winning move i if one exists, otherwise pick a random legal move
int bestMove(int n, int score[]) 
{
  int m = n / 2;
  if ( m < 1 )
    m = 1;
  for (int i = 1; i <= m; i++) {
     if (getScore(n - i, score) == -1) 
	return i;  //choose move so that the other player loses
  }

  return (rand() % m + 1); //no winning move; pick any random legal move
}

// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
  int numSamples = db.inputs.size();
  array< double, n1>tempi;
  array< double, K>tempt;

  for (int i = 0; i < n1; i++)	//n1 = 19, K = 9
    tempi[i] = in[i];

  db.inputs.push_back( tempi );

  for (int i = 0; i < K; i++)
    tempt[i] = targets[i];

  db.labels.push_back( tempt );
}

//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value,  double a[])
{
  int i = 0;
  while ( i < size ) {
    a[i] = (double) (value & 1);	//bitwise and
    value >>= 1;		//shift right by 1 bit
    i++;
  }
}

//maximum number of blocks n
void buildDatabase(int n, database &db)
{

  double inputs[n1], labels[K] = {0};
  int score[n] = {0};
  int move;

  inputs[0] = 1.0; 	//bias
  for (int i = 2; i <= n; i++) {
    move = bestMove(i, score);
    int2binArray(n1-1, i,  &inputs[1]); //obtain input pattern
    int2binArray(K, move, labels);	//convert to output pattern
    addDatabase(db, inputs, labels);
  }
}

// convert K output bits to an integer
int out2move(double out[K])
{
  double min = 0.2;
  int move = 0;
  for (int i = K-1; i >= 0; i--) {
    move <<= 1;
    if (out[i] > min) {
      move = move | 1;
    }
  }

  if ( move == 0 ) move = 1;

  return move;
}

int main()
{
   int n = NMAX;
   int score[NMAX+1] = {0};
   vector < int> diff;		//diffence between dp and mlp
   database db;
   
   buildDatabase(NMAX, db);

   cout << "Training the MLP ..." << endl;
   MLP mlp(0.12);		

   int iterations =  4000;		//number of epochs for training
   
   mlp.train(db, iterations);

   mlp.saveWeights( (char *) "nimWeights.txt" );
     
   double *outs;
   double ins[n1];
   double labels[K] = {0};

   cout << "After training" << endl;
   cout << "Checking: " << endl;

   int move, neuralMove;
   ins[0] = 1.0;	//bias
   for (int i = 2; i <= NMAX; i++){
      move = bestMove(i, score);	//move from DP
      int2binArray(n1-1, i,  &ins[1]); //obtain input pattern
      outs = mlp.forward( ins );
      neuralMove = out2move( outs );		//move from MLP
      cout << i << ": " << move << ", (" << neuralMove << ")\t";
      if ( (i+1)%4 == 0 ) cout << endl;
      if ( move != neuralMove )
	 diff.push_back ( i );
   }
   cout << "\ndiff size = " << diff.size() << ":" << endl;
   for (int i = 0; i < diff.size(); i++) 
     cout << diff[i] << ", ";
   cout << endl;

   do {
      cout << "Enter number of blocks you want to start (2 to " << NMAX <<"): ";
      cin >> n;
    } while (n < 2 || n > NMAX);

    int n0 = n;
    cout << "AI moves first. Starting with n = " << n << endl;

   while ( true ) {
    // --- AI turn ---
    if (n == 1) {
       cout << "AI lost; you won!" << endl;
       break;
    }

      move = bestMove(n, score);	//move from DP
      int2binArray(n1-1, n,  &ins[1]); //obtain input pattern
      outs = mlp.forward( ins );
      neuralMove = out2move( outs );		//move from MLP
      n -= neuralMove;
      cout << "AI takes " << neuralMove << "(move: " << move << ")" << n << " remain" << endl;
    
      // Your turn
      if (n == 1) {
            cout << "You lost!" << endl;
            break;
       }

      int m = n / 2;

      for ( ; ; ){
	cout << "Your turn. Enter number (1-" << m << "): ";
        cin >> move;
        if ( move < 1 || move > m ) {
          cout << "Invalid move; try again: \n";
          continue;
        } else
          break;
      }
      n -= move;
      cout << "You take " << move << ", " << n << " remain" << endl;

  }  //while true

  return 0;
}

Using a GUI

// testNimmlpg.cpp // https://forejune.co/cuda #include <GL/glut.h> #include <stdlib.h> #include <iostream> #include <math.h> #include "mlp.h" using namespace std; int window; int nBlocks = 0; //current number of blocks in pile int takeN = 0; //number of blocks to be removed int state = 0; //state of the game char digits[4]; //holds digits the user enters int nDigits = 0; //number of digits entered bool gameOver = false; //tells if game is finished MLP mlp (0.12); /* Initialize material property, light source, lighting model, * and depth buffer. */ void init(void) { srand(time(0)); GLfloat mat_specular[] = { 1.0, 1.0, 1.0, 1.0 }; GLfloat mat_diffuse[] = { 1.0, 0.5, 0.5, 1.0 }; GLfloat mat_shininess[] = { 50.0 }; GLfloat light_position[] = { 1.0, 1.0, 1.0, 0.0 }; GLfloat light[] = { 1.0, 0.8, 0.8 }; GLfloat lmodel_ambient[] = { 0.8, 0.8, 0.8, 1.0 }; glClearColor (1.0, 1.0, 1.0, 0.0); glShadeModel (GL_SMOOTH); glMaterialfv(GL_FRONT, GL_SPECULAR, mat_specular); glMaterialfv(GL_FRONT, GL_DIFFUSE, mat_diffuse); glMaterialfv(GL_FRONT, GL_SHININESS, mat_shininess); glLightfv(GL_LIGHT0, GL_POSITION, light_position); glLightfv(GL_LIGHT0, GL_DIFFUSE, light ); glLightfv(GL_LIGHT0, GL_SPECULAR, light ); glLightModelfv(GL_LIGHT_MODEL_AMBIENT, lmodel_ambient); glEnable(GL_LIGHTING); glEnable(GL_LIGHT0); glEnable(GL_DEPTH_TEST); glMatrixMode (GL_PROJECTION); glLoadIdentity(); glOrtho (-10, 10, -10, 10, -10.0, 10.0); glMatrixMode(GL_MODELVIEW); mlp.readWeights((char*) "nimWeights.txt"); } void message ( char *s ) { char *p; for ( p = s; *p; p++ ) glutBitmapCharacter (GLUT_BITMAP_TIMES_ROMAN_24, (int) *p); } void resetDigits() { nDigits = 0; for (int i = 0; i < 4; i++) digits[i] = 0; } void display(void) { glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); char s[100]; // display the blocks if ( nBlocks > 0 && nBlocks <= NMAX ) { float dx = 1.2; //displacement in x-direction float dy = 1.2; //displacement in y-direction int nRows = ceil(nBlocks/10.0); int k = 0; for (int i = 0; i < nRows; i++) { glLoadIdentity(); glTranslatef ( -8, 8-i*dy, 0 ); for (int j = 0; j < 10; j++ ){ if ( j == 5 ) glTranslatef(dx, 0, 0); glTranslatef ( dx, 0, 0 ); glutSolidSphere (0.5, 32, 48); k++; if ( k >= nBlocks ) break; } } } //display messages glLoadIdentity(); glRasterPos3f(-8, -7, 0); if ( state == 0 ) { //initial state of game char *s = (char *) "Enter initial blocks (2-127):"; message( s ); if ( nDigits > 0 ) message( digits ); } else if ( state == 1 ) { //game ready for play sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); message( s ); glRasterPos3f(-8, -8, 0); sprintf(s, "%s", "Press 'p' to play; AI moves first!"); message( s ); } else if (state == 2){ //AI sprintf(s, "%s %d", "You took ", takeN); message( s ); glRasterPos3f(-8, -8, 0); sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); message( s ); glRasterPos3f(-8, -9, 0); if (nBlocks > 1) { sprintf(s, "%s", "Press 'a' for AI move!"); message( s ); resetDigits(); } else { sprintf(s, "%s", "You have won! Press 'r' to restart!"); message( s ); gameOver = true; } } else if ( state == 3 ){ //Your move sprintf(s, "%s %d", "AI took ", takeN); message( s ); glRasterPos3f(-8, -8, 0); sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks); message( s ); glRasterPos3f(-8, -9, 0); if (nBlocks > 1) { sprintf(s, "%s %d %s", "Your move (1-", nBlocks/2, "):"); message( s ); message( digits ); } else { sprintf(s, "%s", "AI has won! Press 'r' to restart!"); message ( s ); gameOver = true; } } glFlush (); } void int2binArray(int size, int value, double a[]) { int i = 0; while ( i < size ) { a[i] = (double) (value & 1); //bitwise and value >>= 1; //shift right by 1 bit i++; } } int out2move(double out[K]) { double min = 0.2; int move = 0; for (int i = K-1; i >= 0; i--) { move <<= 1; if (out[i] > min) move = move | 1; } if ( move == 0 ) move = 1; return move; } int aiMove() { double *outs, ins[n1]; int move; ins[0] = 1; //bias int2binArray(n1-1, nBlocks, &ins[1]); outs = mlp.forward( ins ); move = out2move( outs ); return move; } void play() { takeN = aiMove(); nBlocks -= takeN; state = 3; // now your turn (AI moves first) resetDigits(); glutPostRedisplay(); } void reset() //reset the game { nBlocks = 0; state = 0; resetDigits(); gameOver = false; glutPostRedisplay(); } int digitsValue() { int d = 0; for (int i = 0; i < nDigits; i++) { d *= 10; d += (digits[i] - '0'); } return d; } void keyboard(unsigned char key, int x, int y) { if (key == 27) { //escape key glutDestroyWindow(window); exit(0); } if ( key == 'r' ) { reset(); return; } if ( gameOver ) return; if ( key >= '0' && key <= '9' ) { if ( state == 0 || state == 3 ) { int maxDigits; if ( state == 0 ) maxDigits = 3; else maxDigits = 2; if ( nDigits < maxDigits) { digits[nDigits++] = key; digits[nDigits] = 0; //form null-terminated string glutPostRedisplay(); } } return; } switch(key) { case 'p': //play game if (state < 2 ) play(); break; case '\r': if ( nDigits == 0 ) break; if (state == 0 ) { nBlocks = digitsValue(); if (nBlocks < 2) break; //initial states glutPostRedisplay(); state = 1; } else if (state == 3) { //Your turn takeN = digitsValue(); if ( takeN < 1 || takeN > nBlocks/2){ resetDigits(); glutPostRedisplay(); break; } nBlocks -= takeN; digits[nDigits+1] = key; glutPostRedisplay(); resetDigits(); state = 2; //next turn is AI } break; case 'a': if ( state == 2 ) { //AI turn takeN = aiMove(); nBlocks -= takeN; state = 3; //next is your move glutPostRedisplay(); } break; } } int main(int argc, char** argv) { glutInit(&argc, argv); glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH); glutInitWindowSize (600, 600); glutInitWindowPosition (100, 100); window = glutCreateWindow (argv[0]); init (); glutDisplayFunc(display); glutKeyboardFunc(keyboard); glutMainLoop(); return 0; }