Kmeans Clustering Using Multilayer Perceptron (MLP) in C/C++

Materials available at: https://forejune.co/cuda/

Overview

Review

    K-means Clustering

       see https://forejune.co/cuda/ai/unsupervised/unsuper.html

    Multi-layer Perceptron (MLP)

       see https://forejune.co/cuda/ai/mlp/mlp.html
	
 
                                
// mlp.h
#ifndef __MLP_H__
#define __MLP_H__
const int n1 = 3;  //number of inputs and bias
const int m1 = 7;  //number of hidden nodes and bias
const int K =  4;   //number of outputs = number of clusters

class MLP
{
private:
    double w1[n1][m1];
    double w[n1][m1];
    double wo[m1][K];
    double a[m1];  //linear sum of products of inputs and weights
    double h[m1];  //hidden nodes h[j] = g(a[j])
    double y[K];   //predicted output
    double z[K];   //linear sum of products of weights and hidden nodes
    double eta; //learning rate
public:
    MLP(double learning_rate);
    double g(double x);
    double gd(double x);  //Derivative of sigmoid function
    double* forward(double x[]);  //Forward propagation
    void backward(double yd[], double x[]);  //Backward prop, yd is desired output
    void train(const vector<Point> &points, int epochs);  //Train the Perceptron
    ~MLP();
};
#endif
 
                                
//mlp.cpp

#include <iostream>
#include <iomanip>
#include <cmath>
#include "kmeans.h"
#include "mlp.h"

using namespace std;

// constructor
MLP::MLP(double learning_rate) 
{
	eta = learning_rate;
	h[0] = 1;	//for bias 
	//random weights
	for (int i = 0; i < n1; i++)
	  for (int j = 0; j < m1; j++) 
	    w[i][j] = rand() % 1000 / 1000.0;
	
	for (int j = 0; j < m1; j++)
	  for (int l = 0; l < K; l++)
	    wo[j][l] = rand() % 1000 / 1000.0;
}

//Sigmoid activation function
double MLP::g(double x) 
{
      return 1.0 / (1.0 + exp(-x));
}

//Derivative of sigmoid function
double MLP::gd(double x) 
{
     double s = g(x);
     
     return s * (1 - s);
}


//Forward propagation
double* MLP::forward(double x[]) 
{
    //hidden layer activation
    for (int j = 0; j < m1; j++) {
          a[j] = 0;
          for (int i = 0; i < n1; i++) 
            a[j] += w[i][j] * x[i]; 
	   if ( j > 0 )
	     h[j] = g( a[j] );
    }
    //output layer 
    for (int l = 0; l < K; l++){
	   z[l] = 0; 
	   for(int j = 0; j < m1; j++) 
	     z[l] += wo[j][l] * h[j];  
	   y[l] = g( z[l] );
     }

     return y;  //return output
}

//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
    double delta[m1];
    double deltao[K];

   for (int l = 0; l < K; l++) { 
      double e = yd[l] - y[l];  //output layer error
     
      deltao[l] = e * gd(z[l]);
   }
   
  for(int j = 0; j < m1; j++){
      delta[j] = 0;
      for(int l = 0; l < K; l++)
        delta[j] += deltao[l] * wo[j][l] * gd(a[j]);
   }

   //Update weights (hidden to output) 
   for(int j = 0; j < m1; j++)
     for(int l = 0; l < K; l++) 
        wo[j][l] += eta * deltao[l] * h[j];
     
   //Update weights (input to hidden)
   for(int i = 0; i < n1; i++)
      for(int j = 0; j < m1; j++)
        w[i][j] += eta * delta[j] * x[i];
} 

// Train the perceptron
// eta = learning rate
// epochs = number of times of training 
// numSamples = number of different input sets
// yd is target output
void MLP::train(const vector<Point> &points, int epochs)
{
    double x[n1];	//inputs
    //double *yd = &labels[0][0];
    double yd[K];	//desired output
    int numSamples = points.size();

    x[0] = 1; //bias
    for (int epoch = 0; epoch < epochs; epoch++){
       for (int k = 0; k < numSamples; k++) {
          x[1] = points[k].x;
          x[2] = points[k].y;
          forward( x );
          for (int i = 0; i < K; i++)
            if (points[k].cluster == i)
              yd[i] = 1;
            else
              yd[i] = 0;
	
          backward(yd, x);
        }
     }
}

MLP::~MLP()
{
}

Testing K-means MLP routines
 
                                
// testKmlp.cpp 
// https://forejune.co/cuda/

#include <iostream>
#include <iomanip>
#include <cmath>
#include "kmeans.h"
#include "mlp.h"

using namespace std;

const int screenWidth  = 700;
const int screenHeight = 600;
void graphics(int argc, char *argv[]);

int classifier( double y[])
{
  int value = 0;
  double max = y[0];
  for (int i = 1; i < K; i++) {
    if (y[i] > max) {
      max = y[i];
      value = i;
    }
  }

  return value;
}

bool overlap(const vector< Point> & centroids)
{
   int n = centroids.size();
   for (int i = 0; i < n; i++)
     for (int j=i+1; j < n; j++)
	if ( abs(centroids[i].x - centroids[j].x) < 0.01  &&
	  abs(centroids[i].y - centroids[j].y) < 0.01 ) 
	  return true;

   return false;
}


// cluster the points using MLP
void useMlp(MLP &mlp, vector< Point> &points, int delta)
{
  int n = points.size();
  double x[3];
  x[0] = 1;		//bias

  int j = 0;
  for (int i = 0; i < n; i+=delta) {
      x[1] = points[i].x;
      x[2] = points[i].y;
      double *output = mlp.forward(x);
      cout << fixed << setprecision(2) << "  " << x[1] << " " <<  x[2] 
      << " : " << classifier(output) << "" << " (" << setprecision(1);
      for(int k = 0; k < K; k++) {
        cout <<  output[k];
        if (k < K - 1)
          cout << " ";
        else
          cout << ")" << "\t";
      }
      points[i].cluster = (int) classifier(output);
      if ( ++j % 2 == 0 ) cout << endl;
    }

}
    
MLP mlp(0.5); 
vector< Point> centroids(K);

int main(int argc, char *argv[]) 
{
   srand(time(0));
   const int numSamples = 900; 
   int iterations = 50;  // Number of K-means iterations

    // Generate data
    vector< Point> points;
    for (int i = 0; i < numSamples; ++i) {
        Point p;
	p.x = (float) (rand() % screenWidth) /screenWidth;
	p.y = (float) (rand() % screenHeight) / screenHeight;
        points.push_back(p);
    }
    int c[K];
    // Randomly initialize centroids from points
    do {
      for (int i = 0; i < K; i++) {
        c[i] = rand() % points.size();
        centroids[i] = {points[c[i]].x, points[c[i]].y};
      }
    } while (overlap(centroids));

    kmeans(points, centroids, iterations);
    int delta = 30;
    //output results
    int k = 0;
    for (int j = 0; j < points.size(); j+=delta ) {
        cout << fixed << setprecision(2) << "(" << points[j].x << ", " 
	<< points[j].y << ") :  " << points[j].cluster << "\t";
        if ( ++k % 2 == 0 ) cout << endl;
	
    }
    cout << endl;
    cout << "Training the MLP ..." << endl;
   
    mlp.train(points, 10000);

    // Test the MLP
    cout << "Testing MLP:" << endl;
    useMlp(mlp, points, delta);
    getchar();
    graphics(argc, argv);

    return 0;
}

// graphics routines
#include <GL/glut.h>

//vectors to store points 
vector< Point>pointVec;

void init()
{
  glClearColor(1, 1, 1, 1);     //clear color buffer with white color
  glClear(GL_COLOR_BUFFER_BIT); //clear color buffer
  //define coordinate system
  glMatrixMode(GL_PROJECTION);
  glLoadIdentity();
  gluOrtho2D(0, screenWidth, 0, screenHeight);
  glPointSize( 3 );
  glColor3f(0.0, 0.0, 0.0);             //draw with black color
}

void display()
{
  //flush out data to color buffer
  glFlush();
}

//draw one point
void drawDot(int x, int y)
{
  glBegin(GL_POINTS);
    glVertex2i(x, y);
  glEnd();
  glFlush();
}


//display cluster points with different colors and centroids
void displayClusters( vector< Point> &points)
{
  glClear(GL_COLOR_BUFFER_BIT); //clear color buffer
  //display data points
  int n = points.size();
  int k = centroids.size();

  glPointSize( 5 );
  for (int i = 0; i < n; i++) {
    int c = points[i].cluster;
    float color[3] = {0};       //display color (R, G, B)
    if ( c < 3 )
      color[c] = 1;    //Red, green or blue
    //fourth color is black
    glColor3fv(color);
    drawDot(screenWidth*points[i].x, screenHeight*points[i].y);
  }
  //display centroids
  glPointSize( 9 );
  glColor3f(1, 0, 1);   //magenta
  for(int j = 0; j < k; j++)
    drawDot(screenWidth*centroids[j].x, screenHeight*centroids[j].y);
  glColor3f(0, 0, 0);   //restore black color
  glPointSize( 3 );
  glutPostRedisplay();
}

void keyboard(unsigned char key, int x, int y)
{
  switch ( key ) {
    case 27:            //escape
        exit( -1 );
    case 'd':
        displayClusters(pointVec);
        break;
    case 'm':
        useMlp(mlp, pointVec, 1);
	break;
  }
}

void mouse(int button, int state, int mx, int my)
{
   int x = mx, y = screenHeight - my;
   if ( button == GLUT_LEFT_BUTTON && state == GLUT_DOWN ){
     drawDot(x, y);
     Point pt = {(double) x / screenWidth, (double) y / screenHeight};
     pointVec.push_back( pt );
   }
   glFlush();
}

void graphics(int argc, char *argv[])
{
  glutInit(&argc, argv);        //Initialization
  glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB );
  glutInitWindowSize(screenWidth, screenHeight);
  glutInitWindowPosition(59, 50);
  glutCreateWindow( argv[0] );
  init();
  // specifies calback functions
  glutKeyboardFunc( keyboard );
  glutMouseFunc( mouse );
  glutDisplayFunc( display );
  glutMainLoop();               //go into perpetual loop
}

Using a Makefile:

	CC=g++

	testKmlp: testKmlp.o mlp.o kmeans.o
        	g++ -o testKmlp testKmlp.o kmeans.o mlp.o -lGL -lGLU -lglut

	testKmlp.o:
        	$(CC) -c testKmlp.cpp

	mlp.o:
        	$(CC) -c mlp.cpp

	kmeans.o:
        	$(CC) -c kmeans.cpp

	clean:
        	rm testKmlp.o
	

Sample Outputs:

(0.41, 0.41) :  0	(0.71, 0.35) :  3
(0.87, 0.95) :  1	(0.33, 0.48) :  0
(0.90, 0.33) :  3	(0.30, 0.75) :  2
(0.82, 0.20) :  3	(0.58, 0.60) :  1
(0.58, 0.61) :  1	(0.55, 0.65) :  1
(0.56, 0.19) :  3	(0.46, 0.41) :  0
(0.45, 0.41) :  0	(0.17, 0.29) :  0
(0.24, 0.93) :  2	(0.97, 0.54) :  1
(0.46, 0.37) :  0	(0.61, 0.15) :  3
(0.06, 0.41) :  0	(0.35, 0.52) :  2
(0.01, 0.34) :  0	(0.54, 0.15) :  3
(0.31, 0.05) :  0	(0.48, 0.82) :  2
(0.89, 0.94) :  1	(0.36, 0.86) :  2
(0.05, 0.90) :  2	(0.92, 0.14) :  3
(0.81, 0.02) :  3	(0.71, 0.54) :  1

Training the MLP ...
Testing MLP:
  0.41 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.71 0.35 : 3 (0.0 0.0 0.0 1.0)
  0.87 0.95 : 1 (0.0 1.0 0.0 0.0)	  0.33 0.48 : 0 (1.0 0.0 0.0 0.0)
  0.90 0.33 : 3 (0.0 0.0 0.0 1.0)	  0.30 0.75 : 2 (0.0 0.0 1.0 0.0)
  0.82 0.20 : 3 (0.0 0.0 0.0 1.0)	  0.58 0.60 : 1 (0.0 1.0 0.0 0.0)
  0.58 0.61 : 1 (0.0 1.0 0.0 0.0)	  0.55 0.65 : 1 (0.0 1.0 0.0 0.0)
  0.56 0.19 : 3 (0.0 0.0 0.0 1.0)	  0.46 0.41 : 0 (1.0 0.0 0.0 0.0)
  0.45 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.17 0.29 : 0 (1.0 0.0 0.0 0.0)
  0.24 0.93 : 2 (0.0 0.0 1.0 0.0)	  0.97 0.54 : 1 (0.0 1.0 0.0 0.0)
  0.46 0.37 : 0 (1.0 0.0 0.0 0.0)	  0.61 0.15 : 3 (0.0 0.0 0.0 1.0)
  0.06 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.35 0.52 : 2 (0.0 0.0 1.0 0.0)
  0.01 0.34 : 0 (1.0 0.0 0.0 0.0)	  0.54 0.15 : 3 (0.0 0.0 0.0 1.0)
  0.31 0.05 : 0 (1.0 0.0 0.0 0.0)	  0.48 0.82 : 2 (0.0 0.0 1.0 0.0)
  0.89 0.94 : 1 (0.0 1.0 0.0 0.0)	  0.36 0.86 : 2 (0.0 0.0 1.0 0.0)
  0.05 0.90 : 2 (0.0 0.0 1.0 0.0)	  0.92 0.14 : 3 (0.0 0.0 0.0 1.0)
  0.81 0.02 : 3 (0.0 0.0 0.0 1.0)	  0.71 0.54 : 1 (0.0 1.0 0.0 0.0)

  //graphics output
  0.15 0.87 : 2 (0.0 0.0 1.0 0.0)	  0.34 0.86 : 2 (0.0 0.0 1.0 0.0)
  0.54 0.82 : 1 (0.0 1.0 0.0 0.0)	  0.79 0.82 : 1 (0.0 1.0 0.0 0.0)
  0.15 0.71 : 2 (0.0 0.0 1.0 0.0)	  0.32 0.68 : 2 (0.0 0.0 1.0 0.0)
  0.45 0.56 : 2 (0.0 0.0 1.0 0.0)	  0.47 0.65 : 2 (0.0 0.0 1.0 0.0)
  0.64 0.67 : 1 (0.0 1.0 0.0 0.0)	  0.75 0.65 : 1 (0.0 1.0 0.0 0.0)
  0.84 0.61 : 1 (0.0 1.0 0.0 0.0)	  0.76 0.50 : 1 (0.0 1.0 0.0 0.0)
  0.63 0.52 : 1 (0.0 1.0 0.0 0.0)	  0.20 0.41 : 0 (1.0 0.0 0.0 0.0)
  0.28 0.27 : 0 (1.0 0.0 0.0 0.0)	  0.40 0.40 : 0 (1.0 0.0 0.0 0.0)
  0.59 0.20 : 3 (0.0 0.0 0.0 1.0)	  0.43 0.14 : 0 (1.0 0.0 0.0 0.0)
  ......