Materials available at: https://forejune.co/cuda/
Reviews
Multi-Layer Perceptron (MLP) for NIM
Binary Inputs and Outputs![]()
We consider initial n < 128 n = current number of blocks k = number of blocks to be removed A Simple way: use 128 binary inputs with n-th bit set to 1, others set to 0 63 binary outputs with k-th bit set to 1, others set to 0 e.g. n = 5, k = 2 x0 = 1 (bias) x127 .... x7 x6 x5 x4 x3 x2 x1 (number of current remaining blocks n) 0 0 ... 0 0 1 0 0 0 0 Outputs: y63 ... y5 y4 y3 y2 y1 y0 (number of blocks to be removed k ≤ n/2) 0 ... 0 0 0 0 1 0 0 Too many inputs and outputs, may take long time to train Better use binary representation of n and k Need 7 input bits and 6 output bits (because next move is ≤ n / 2)Inputs: x0 = 1 (bias) x7 x6 x5 x4 x3 x2 x1 (number of current remaining blocks n) Outputs: y5 y4 y3 y2 y1 y0 (number of blocks to be removed k ≤ n/2) Example:
Example: n = 5, k = 2 Inputs: 1 0 0 0 0 1 0 1 n Outputs: 0 0 0 0 1 0 k
// mlp.h #ifndef __MLP_H__ #define __MLP_H__ #include <vector> #include <array> using namespace std; //maximum number of blocks allowed #define NMAX 127 const int n1 = 8; //number of inputs and bias const int m1 = 15; //number of hidden nodes and bias const int K = 6; //number of outputs struct database { vector<array<double, n1>> inputs; //double inputs[][n1]; vector<array<double, K>> labels; //double labels[][K]; }; class MLP { private: double w[n1][m1]; double wo[m1][K]; double a[m1]; //linear sum of products of inputs and weights double h[m1]; //hidden nodes h[j] = g(a[j]) double y[K]; //predicted output double z[K]; //linear sum of products of weights and hidden nodes double eta; //learning rate public: MLP(double learning_rate); double g(double x); double gd(double x); //Derivative of sigmoid function double* forward(double x[]); //Forward propagation void backward(double yd[], double x[]); //Backward prop, yd is desired output void train(const database &db, int epochs); //Train the Perceptron ~MLP(); int saveWeights(char fname[]); int readWeights(char fname[]); }; #endif
// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
array<double, n1>tempi;
array<double, K>tempt;
for (int i = 0; i < n1; i++) //n1 = 19, K = 9
tempi[i] = in[i];
db.inputs.push_back( tempi );
for (int i = 0; i < K; i++)
tempt[i] = targets[i];
db.labels.push_back( tempt );
}
//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value, double a[])
{
int i = 0;
while ( i < size ) {
a[i] = (double) (value & 1); //bitwise and
value >>= 1; //shift right by 1 bit
i++;
}
}
void buildDatabase(int n, database &db)
{
double inputs[n1], labels[K] = {0};
int score[n] = {0};
int move;
inputs[0] = 1.0; //bias
for (int i = 2; i <= n; i++) {
move = bestMove(i, score);
int2binArray(n1-1, i, &inputs[1]); //obtain input pattern
int2binArray(K, move, labels); //convert to output pattern
addDatabase(db, inputs, labels);
}
}
|
// nimmlp.cpp -- training an MLP to play nim
// https://forejune.co/cuda/
#include "mlp.h"
#include <iostream>
using namespace std;
//nim game using dynamic programming
int getScore(int n, int S[])
{
if (n == 1)
S[n] = -1; //got last block
if (S[n] != 0)
return S[n];
int m = n / 2; //floor of n/2.0
for (int i = 1; i <= m; i++) {
int nextN = n - i;
if (getScore(nextN, S) == -1) { //found a path the other player loses
S[n] = 1;
return S[n];
}
}
S[n] = -1;
return S[n];
}
//choose a winning move i if one exists, otherwise pick a random legal move
int bestMove(int n, int score[])
{
int m = n / 2;
if ( m < 1 )
m = 1;
for (int i = 1; i <= m; i++) {
if (getScore(n - i, score) == -1)
return i; //choose move so that the other player loses
}
return (rand() % m + 1); //no winning move; pick any random legal move
}
// Add to the database a set of inputs and targeted outputs
void addDatabase(database &db, double in[], double targets[])
{
int numSamples = db.inputs.size();
array< double, n1>tempi;
array< double, K>tempt;
for (int i = 0; i < n1; i++) //n1 = 19, K = 9
tempi[i] = in[i];
db.inputs.push_back( tempi );
for (int i = 0; i < K; i++)
tempt[i] = targets[i];
db.labels.push_back( tempt );
}
//convert an integer value to a binary array a of size bits
void int2binArray(int size, int value, double a[])
{
int i = 0;
while ( i < size ) {
a[i] = (double) (value & 1); //bitwise and
value >>= 1; //shift right by 1 bit
i++;
}
}
//maximum number of blocks n
void buildDatabase(int n, database &db)
{
double inputs[n1], labels[K] = {0};
int score[n] = {0};
int move;
inputs[0] = 1.0; //bias
for (int i = 2; i <= n; i++) {
move = bestMove(i, score);
int2binArray(n1-1, i, &inputs[1]); //obtain input pattern
int2binArray(K, move, labels); //convert to output pattern
addDatabase(db, inputs, labels);
}
}
// convert K output bits to an integer
int out2move(double out[K])
{
double min = 0.2;
int move = 0;
for (int i = K-1; i >= 0; i--) {
move <<= 1;
if (out[i] > min) {
move = move | 1;
}
}
if ( move == 0 ) move = 1;
return move;
}
int main()
{
int n = NMAX;
int score[NMAX+1] = {0};
vector < int> diff; //diffence between dp and mlp
database db;
buildDatabase(NMAX, db);
cout << "Training the MLP ..." << endl;
MLP mlp(0.12);
int iterations = 4000; //number of epochs for training
mlp.train(db, iterations);
mlp.saveWeights( (char *) "nimWeights.txt" );
double *outs;
double ins[n1];
double labels[K] = {0};
cout << "After training" << endl;
cout << "Checking: " << endl;
int move, neuralMove;
ins[0] = 1.0; //bias
for (int i = 2; i <= NMAX; i++){
move = bestMove(i, score); //move from DP
int2binArray(n1-1, i, &ins[1]); //obtain input pattern
outs = mlp.forward( ins );
neuralMove = out2move( outs ); //move from MLP
cout << i << ": " << move << ", (" << neuralMove << ")\t";
if ( (i+1)%4 == 0 ) cout << endl;
if ( move != neuralMove )
diff.push_back ( i );
}
cout << "\ndiff size = " << diff.size() << ":" << endl;
for (int i = 0; i < diff.size(); i++)
cout << diff[i] << ", ";
cout << endl;
do {
cout << "Enter number of blocks you want to start (2 to " << NMAX <<"): ";
cin >> n;
} while (n < 2 || n > NMAX);
int n0 = n;
cout << "AI moves first. Starting with n = " << n << endl;
while ( true ) {
// --- AI turn ---
if (n == 1) {
cout << "AI lost; you won!" << endl;
break;
}
move = bestMove(n, score); //move from DP
int2binArray(n1-1, n, &ins[1]); //obtain input pattern
outs = mlp.forward( ins );
neuralMove = out2move( outs ); //move from MLP
n -= neuralMove;
cout << "AI takes " << neuralMove << "(move: " << move << ")" << n << " remain" << endl;
// Your turn
if (n == 1) {
cout << "You lost!" << endl;
break;
}
int m = n / 2;
for ( ; ; ){
cout << "Your turn. Enter number (1-" << m << "): ";
cin >> move;
if ( move < 1 || move > m ) {
cout << "Invalid move; try again: \n";
continue;
} else
break;
}
n -= move;
cout << "You take " << move << ", " << n << " remain" << endl;
} //while true
return 0;
}
|
Using a GUI
// testNimmlpg.cpp
// https://forejune.co/cuda
#include <GL/glut.h>
#include <stdlib.h>
#include <iostream>
#include <math.h>
#include "mlp.h"
using namespace std;
int window;
int nBlocks = 0; //current number of blocks in pile
int takeN = 0; //number of blocks to be removed
int state = 0; //state of the game
char digits[4]; //holds digits the user enters
int nDigits = 0; //number of digits entered
bool gameOver = false; //tells if game is finished
MLP mlp (0.12);
/* Initialize material property, light source, lighting model,
* and depth buffer.
*/
void init(void)
{
srand(time(0));
GLfloat mat_specular[] = { 1.0, 1.0, 1.0, 1.0 };
GLfloat mat_diffuse[] = { 1.0, 0.5, 0.5, 1.0 };
GLfloat mat_shininess[] = { 50.0 };
GLfloat light_position[] = { 1.0, 1.0, 1.0, 0.0 };
GLfloat light[] = { 1.0, 0.8, 0.8 };
GLfloat lmodel_ambient[] = { 0.8, 0.8, 0.8, 1.0 };
glClearColor (1.0, 1.0, 1.0, 0.0);
glShadeModel (GL_SMOOTH);
glMaterialfv(GL_FRONT, GL_SPECULAR, mat_specular);
glMaterialfv(GL_FRONT, GL_DIFFUSE, mat_diffuse);
glMaterialfv(GL_FRONT, GL_SHININESS, mat_shininess);
glLightfv(GL_LIGHT0, GL_POSITION, light_position);
glLightfv(GL_LIGHT0, GL_DIFFUSE, light );
glLightfv(GL_LIGHT0, GL_SPECULAR, light );
glLightModelfv(GL_LIGHT_MODEL_AMBIENT, lmodel_ambient);
glEnable(GL_LIGHTING);
glEnable(GL_LIGHT0);
glEnable(GL_DEPTH_TEST);
glMatrixMode (GL_PROJECTION);
glLoadIdentity();
glOrtho (-10, 10, -10, 10, -10.0, 10.0);
glMatrixMode(GL_MODELVIEW);
mlp.readWeights((char*) "nimWeights.txt");
}
void message ( char *s )
{
char *p;
for ( p = s; *p; p++ )
glutBitmapCharacter (GLUT_BITMAP_TIMES_ROMAN_24, (int) *p);
}
void resetDigits()
{
nDigits = 0;
for (int i = 0; i < 4; i++)
digits[i] = 0;
}
void display(void)
{
glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
char s[100];
// display the blocks
if ( nBlocks > 0 && nBlocks <= NMAX ) {
float dx = 1.2; //displacement in x-direction
float dy = 1.2; //displacement in y-direction
int nRows = ceil(nBlocks/10.0);
int k = 0;
for (int i = 0; i < nRows; i++) {
glLoadIdentity();
glTranslatef ( -8, 8-i*dy, 0 );
for (int j = 0; j < 10; j++ ){
if ( j == 5 ) glTranslatef(dx, 0, 0);
glTranslatef ( dx, 0, 0 );
glutSolidSphere (0.5, 32, 48);
k++;
if ( k >= nBlocks ) break;
}
}
}
//display messages
glLoadIdentity();
glRasterPos3f(-8, -7, 0);
if ( state == 0 ) { //initial state of game
char *s = (char *) "Enter initial blocks (2-127):";
message( s );
if ( nDigits > 0 )
message( digits );
} else if ( state == 1 ) { //game ready for play
sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks);
message( s );
glRasterPos3f(-8, -8, 0);
sprintf(s, "%s", "Press 'p' to play; AI moves first!");
message( s );
} else if (state == 2){ //AI
sprintf(s, "%s %d", "You took ", takeN);
message( s );
glRasterPos3f(-8, -8, 0);
sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks);
message( s );
glRasterPos3f(-8, -9, 0);
if (nBlocks > 1) {
sprintf(s, "%s", "Press 'a' for AI move!");
message( s );
resetDigits();
} else {
sprintf(s, "%s", "You have won! Press 'r' to restart!");
message( s );
gameOver = true;
}
} else if ( state == 3 ){ //Your move
sprintf(s, "%s %d", "AI took ", takeN);
message( s );
glRasterPos3f(-8, -8, 0);
sprintf(s, "%s %d", "Remaining number of blocks: ", nBlocks);
message( s );
glRasterPos3f(-8, -9, 0);
if (nBlocks > 1) {
sprintf(s, "%s %d %s", "Your move (1-", nBlocks/2, "):");
message( s );
message( digits );
} else {
sprintf(s, "%s", "AI has won! Press 'r' to restart!");
message ( s );
gameOver = true;
}
}
glFlush ();
}
void int2binArray(int size, int value, double a[])
{
int i = 0;
while ( i < size ) {
a[i] = (double) (value & 1); //bitwise and
value >>= 1; //shift right by 1 bit
i++;
}
}
int out2move(double out[K])
{
double min = 0.2;
int move = 0;
for (int i = K-1; i >= 0; i--) {
move <<= 1;
if (out[i] > min)
move = move | 1;
}
if ( move == 0 ) move = 1;
return move;
}
int aiMove()
{
double *outs, ins[n1];
int move;
ins[0] = 1; //bias
int2binArray(n1-1, nBlocks, &ins[1]);
outs = mlp.forward( ins );
move = out2move( outs );
return move;
}
void play()
{
takeN = aiMove();
nBlocks -= takeN;
state = 3; // now your turn (AI moves first)
resetDigits();
glutPostRedisplay();
}
void reset() //reset the game
{
nBlocks = 0;
state = 0;
resetDigits();
gameOver = false;
glutPostRedisplay();
}
int digitsValue()
{
int d = 0;
for (int i = 0; i < nDigits; i++) {
d *= 10;
d += (digits[i] - '0');
}
return d;
}
void keyboard(unsigned char key, int x, int y)
{
if (key == 27) { //escape key
glutDestroyWindow(window);
exit(0);
}
if ( key == 'r' ) {
reset();
return;
}
if ( gameOver )
return;
if ( key >= '0' && key <= '9' ) {
if ( state == 0 || state == 3 ) {
int maxDigits;
if ( state == 0 ) maxDigits = 3;
else maxDigits = 2;
if ( nDigits < maxDigits) {
digits[nDigits++] = key;
digits[nDigits] = 0; //form null-terminated string
glutPostRedisplay();
}
}
return;
}
switch(key) {
case 'p': //play game
if (state < 2 )
play();
break;
case '\r':
if ( nDigits == 0 ) break;
if (state == 0 ) {
nBlocks = digitsValue();
if (nBlocks < 2) break; //initial states
glutPostRedisplay();
state = 1;
} else if (state == 3) { //Your turn
takeN = digitsValue();
if ( takeN < 1 || takeN > nBlocks/2){
resetDigits();
glutPostRedisplay();
break;
}
nBlocks -= takeN;
digits[nDigits+1] = key;
glutPostRedisplay();
resetDigits();
state = 2; //next turn is AI
}
break;
case 'a':
if ( state == 2 ) { //AI turn
takeN = aiMove();
nBlocks -= takeN;
state = 3; //next is your move
glutPostRedisplay();
}
break;
}
}
int main(int argc, char** argv)
{
glutInit(&argc, argv);
glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH);
glutInitWindowSize (600, 600);
glutInitWindowPosition (100, 100);
window = glutCreateWindow (argv[0]);
init ();
glutDisplayFunc(display);
glutKeyboardFunc(keyboard);
glutMainLoop();
return 0;
}
|