Kmeans Clustering Using Multilayer Perceptron (MLP) in C/C++

Materials available at: https://forejune.co/cuda/

Overview

K-means clustering -- unsupervised learning algorithm
Multilayer Perceptron (MLP) -- supervised learning
K-means-MLP :
Find clusters of a large data set using K-means
Train the MLP using the data set and the centroids found.
The trained MLP can be used to find clusters of new data.
Advantages:
Only need to train the MLP once.
Can be used to classify new data efficiently.
Example Application:
Electroencephalogram (EEG) signal classification:

K-means can be used to cluster wavelet coefficients of EEG signals,
an MLP is trained to classify the signals using the cluster probabilities

Geological model inversion:

In geophysics, K-means can be used to cluster one-dimensional data
such as Airborne Transient Electromagnetic (ATEM) responses
An MLP is used to map these clusters to geological model types

Review

    K-means Clustering

       see https://forejune.co/cuda/ai/unsupervised/unsuper.html

    Multi-layer Perceptron (MLP)

       see https://forejune.co/cuda/ai/mlp/mlp.html

                                
// mlp.h
#ifndef __MLP_H__
#define __MLP_H__
const int n1 = 3;  //number of inputs and bias
const int m1 = 7;  //number of hidden nodes and bias
const int K =  4;   //number of outputs = number of clusters

class MLP
{
private:
    double w1[n1][m1];
    double w[n1][m1];
    double wo[m1][K];
    double a[m1];  //linear sum of products of inputs and weights
    double h[m1];  //hidden nodes h[j] = g(a[j])
    double y[K];   //predicted output
    double z[K];   //linear sum of products of weights and hidden nodes
    double eta; //learning rate
public:
    MLP(double learning_rate);
    double g(double x);
    double gd(double x);  //Derivative of sigmoid function
    double* forward(double x[]);  //Forward propagation
    void backward(double yd[], double x[]);  //Backward prop, yd is desired output
    void train(const vector<Point> &points, int epochs);  //Train the Perceptron
    ~MLP();
};
#endif

                                
//mlp.cpp

#include <iostream>
#include <iomanip>
#include <cmath>
#include "kmeans.h"
#include "mlp.h"

using namespace std;

// constructor
MLP::MLP(double learning_rate) 
{
	eta = learning_rate;
	h[0] = 1;	//for bias 
	//random weights
	for (int i = 0; i < n1; i++)
	  for (int j = 0; j < m1; j++) 
	    w[i][j] = rand() % 1000 / 1000.0;
	
	for (int j = 0; j < m1; j++)
	  for (int l = 0; l < K; l++)
	    wo[j][l] = rand() % 1000 / 1000.0;
}

//Sigmoid activation function
double MLP::g(double x) 
{
      return 1.0 / (1.0 + exp(-x));
}

//Derivative of sigmoid function
double MLP::gd(double x) 
{
     double s = g(x);
     
     return s * (1 - s);
}


//Forward propagation
double* MLP::forward(double x[]) 
{
    //hidden layer activation
    for (int j = 0; j < m1; j++) {
          a[j] = 0;
          for (int i = 0; i < n1; i++) 
            a[j] += w[i][j] * x[i]; 
	   if ( j > 0 )
	     h[j] = g( a[j] );
    }
    //output layer 
    for (int l = 0; l < K; l++){
	   z[l] = 0; 
	   for(int j = 0; j < m1; j++) 
	     z[l] += wo[j][l] * h[j];  
	   y[l] = g( z[l] );
     }

     return y;  //return output
}

//Backward propagation, yd is the desired output
void MLP::backward(double yd[], double x[])
{
    double delta[m1];
    double deltao[K];

   for (int l = 0; l < K; l++) { 
      double e = yd[l] - y[l];  //output layer error
     
      deltao[l] = e * gd(z[l]);
   }
   
  for(int j = 0; j < m1; j++){
      delta[j] = 0;
      for(int l = 0; l < K; l++)
        delta[j] += deltao[l] * wo[j][l] * gd(a[j]);
   }

   //Update weights (hidden to output) 
   for(int j = 0; j < m1; j++)
     for(int l = 0; l < K; l++) 
        wo[j][l] += eta * deltao[l] * h[j];
     
   //Update weights (input to hidden)
   for(int i = 0; i < n1; i++)
      for(int j = 0; j < m1; j++)
        w[i][j] += eta * delta[j] * x[i];
} 

// Train the perceptron
// eta = learning rate
// epochs = number of times of training 
// numSamples = number of different input sets
// yd is target output
void MLP::train(const vector<Point> &points, int epochs)
{
    double x[n1];	//inputs
    //double *yd = &labels[0][0];
    double yd[K];	//desired output
    int numSamples = points.size();

    x[0] = 1; //bias
    for (int epoch = 0; epoch < epochs; epoch++){
       for (int k = 0; k < numSamples; k++) {
          x[1] = points[k].x;
          x[2] = points[k].y;
          forward( x );
          for (int i = 0; i < K; i++)
            if (points[k].cluster == i)
              yd[i] = 1;
            else
              yd[i] = 0;
	
          backward(yd, x);
        }
     }
}

MLP::~MLP()
{
}

Testing K-means MLP routines

// testKmlp.cpp // https://forejune.co/cuda/ #include <iostream> #include <iomanip> #include <cmath> #include "kmeans.h" #include "mlp.h" using namespace std; const int screenWidth = 700; const int screenHeight = 600; void graphics(int argc, char *argv[]); int classifier( double y[]) { int value = 0; double max = y[0]; for (int i = 1; i < K; i++) { if (y[i] > max) { max = y[i]; value = i; } } return value; } bool overlap(const vector< Point> & centroids) { int n = centroids.size(); for (int i = 0; i < n; i++) for (int j=i+1; j < n; j++) if ( abs(centroids[i].x - centroids[j].x) < 0.01 && abs(centroids[i].y - centroids[j].y) < 0.01 ) return true; return false; } // cluster the points using MLP void useMlp(MLP &mlp, vector< Point> &points, int delta) { int n = points.size(); double x[3]; x[0] = 1; //bias int j = 0; for (int i = 0; i < n; i+=delta) { x[1] = points[i].x; x[2] = points[i].y; double *output = mlp.forward(x); cout << fixed << setprecision(2) << " " << x[1] << " " << x[2] << " : " << classifier(output) << "" << " (" << setprecision(1); for(int k = 0; k < K; k++) { cout << output[k]; if (k < K - 1) cout << " "; else cout << ")" << "\t"; } points[i].cluster = (int) classifier(output); if ( ++j % 2 == 0 ) cout << endl; } } MLP mlp(0.5); vector< Point> centroids(K); int main(int argc, char *argv[]) { srand(time(0)); const int numSamples = 900; int iterations = 50; // Number of K-means iterations // Generate data vector< Point> points; for (int i = 0; i < numSamples; ++i) { Point p; p.x = (float) (rand() % screenWidth) /screenWidth; p.y = (float) (rand() % screenHeight) / screenHeight; points.push_back(p); } int c[K]; // Randomly initialize centroids from points do { for (int i = 0; i < K; i++) { c[i] = rand() % points.size(); centroids[i] = {points[c[i]].x, points[c[i]].y}; } } while (overlap(centroids)); kmeans(points, centroids, iterations); int delta = 30; //output results int k = 0; for (int j = 0; j < points.size(); j+=delta ) { cout << fixed << setprecision(2) << "(" << points[j].x << ", " << points[j].y << ") : " << points[j].cluster << "\t"; if ( ++k % 2 == 0 ) cout << endl; } cout << endl; cout << "Training the MLP ..." << endl; mlp.train(points, 10000); // Test the MLP cout << "Testing MLP:" << endl; useMlp(mlp, points, delta); getchar(); graphics(argc, argv); return 0; } // graphics routines #include <GL/glut.h> //vectors to store points vector< Point>pointVec; void init() { glClearColor(1, 1, 1, 1); //clear color buffer with white color glClear(GL_COLOR_BUFFER_BIT); //clear color buffer //define coordinate system glMatrixMode(GL_PROJECTION); glLoadIdentity(); gluOrtho2D(0, screenWidth, 0, screenHeight); glPointSize( 3 ); glColor3f(0.0, 0.0, 0.0); //draw with black color } void display() { //flush out data to color buffer glFlush(); } //draw one point void drawDot(int x, int y) { glBegin(GL_POINTS); glVertex2i(x, y); glEnd(); glFlush(); } //display cluster points with different colors and centroids void displayClusters( vector< Point> &points) { glClear(GL_COLOR_BUFFER_BIT); //clear color buffer //display data points int n = points.size(); int k = centroids.size(); glPointSize( 5 ); for (int i = 0; i < n; i++) { int c = points[i].cluster; float color[3] = {0}; //display color (R, G, B) if ( c < 3 ) color[c] = 1; //Red, green or blue //fourth color is black glColor3fv(color); drawDot(screenWidth*points[i].x, screenHeight*points[i].y); } //display centroids glPointSize( 9 ); glColor3f(1, 0, 1); //magenta for(int j = 0; j < k; j++) drawDot(screenWidth*centroids[j].x, screenHeight*centroids[j].y); glColor3f(0, 0, 0); //restore black color glPointSize( 3 ); glutPostRedisplay(); } void keyboard(unsigned char key, int x, int y) { switch ( key ) { case 27: //escape exit( -1 ); case 'd': displayClusters(pointVec); break; case 'm': useMlp(mlp, pointVec, 1); break; } } void mouse(int button, int state, int mx, int my) { int x = mx, y = screenHeight - my; if ( button == GLUT_LEFT_BUTTON && state == GLUT_DOWN ){ drawDot(x, y); Point pt = {(double) x / screenWidth, (double) y / screenHeight}; pointVec.push_back( pt ); } glFlush(); } void graphics(int argc, char *argv[]) { glutInit(&argc, argv); //Initialization glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB ); glutInitWindowSize(screenWidth, screenHeight); glutInitWindowPosition(59, 50); glutCreateWindow( argv[0] ); init(); // specifies calback functions glutKeyboardFunc( keyboard ); glutMouseFunc( mouse ); glutDisplayFunc( display ); glutMainLoop(); //go into perpetual loop }

Using a Makefile:

	CC=g++

	testKmlp: testKmlp.o mlp.o kmeans.o
        	g++ -o testKmlp testKmlp.o kmeans.o mlp.o -lGL -lGLU -lglut

	testKmlp.o:
        	$(CC) -c testKmlp.cpp

	mlp.o:
        	$(CC) -c mlp.cpp

	kmeans.o:
        	$(CC) -c kmeans.cpp

	clean:
        	rm testKmlp.o

Sample Outputs:

(0.41, 0.41) :  0	(0.71, 0.35) :  3
(0.87, 0.95) :  1	(0.33, 0.48) :  0
(0.90, 0.33) :  3	(0.30, 0.75) :  2
(0.82, 0.20) :  3	(0.58, 0.60) :  1
(0.58, 0.61) :  1	(0.55, 0.65) :  1
(0.56, 0.19) :  3	(0.46, 0.41) :  0
(0.45, 0.41) :  0	(0.17, 0.29) :  0
(0.24, 0.93) :  2	(0.97, 0.54) :  1
(0.46, 0.37) :  0	(0.61, 0.15) :  3
(0.06, 0.41) :  0	(0.35, 0.52) :  2
(0.01, 0.34) :  0	(0.54, 0.15) :  3
(0.31, 0.05) :  0	(0.48, 0.82) :  2
(0.89, 0.94) :  1	(0.36, 0.86) :  2
(0.05, 0.90) :  2	(0.92, 0.14) :  3
(0.81, 0.02) :  3	(0.71, 0.54) :  1

Training the MLP ...
Testing MLP:
  0.41 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.71 0.35 : 3 (0.0 0.0 0.0 1.0)
  0.87 0.95 : 1 (0.0 1.0 0.0 0.0)	  0.33 0.48 : 0 (1.0 0.0 0.0 0.0)
  0.90 0.33 : 3 (0.0 0.0 0.0 1.0)	  0.30 0.75 : 2 (0.0 0.0 1.0 0.0)
  0.82 0.20 : 3 (0.0 0.0 0.0 1.0)	  0.58 0.60 : 1 (0.0 1.0 0.0 0.0)
  0.58 0.61 : 1 (0.0 1.0 0.0 0.0)	  0.55 0.65 : 1 (0.0 1.0 0.0 0.0)
  0.56 0.19 : 3 (0.0 0.0 0.0 1.0)	  0.46 0.41 : 0 (1.0 0.0 0.0 0.0)
  0.45 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.17 0.29 : 0 (1.0 0.0 0.0 0.0)
  0.24 0.93 : 2 (0.0 0.0 1.0 0.0)	  0.97 0.54 : 1 (0.0 1.0 0.0 0.0)
  0.46 0.37 : 0 (1.0 0.0 0.0 0.0)	  0.61 0.15 : 3 (0.0 0.0 0.0 1.0)
  0.06 0.41 : 0 (1.0 0.0 0.0 0.0)	  0.35 0.52 : 2 (0.0 0.0 1.0 0.0)
  0.01 0.34 : 0 (1.0 0.0 0.0 0.0)	  0.54 0.15 : 3 (0.0 0.0 0.0 1.0)
  0.31 0.05 : 0 (1.0 0.0 0.0 0.0)	  0.48 0.82 : 2 (0.0 0.0 1.0 0.0)
  0.89 0.94 : 1 (0.0 1.0 0.0 0.0)	  0.36 0.86 : 2 (0.0 0.0 1.0 0.0)
  0.05 0.90 : 2 (0.0 0.0 1.0 0.0)	  0.92 0.14 : 3 (0.0 0.0 0.0 1.0)
  0.81 0.02 : 3 (0.0 0.0 0.0 1.0)	  0.71 0.54 : 1 (0.0 1.0 0.0 0.0)

  //graphics output
  0.15 0.87 : 2 (0.0 0.0 1.0 0.0)	  0.34 0.86 : 2 (0.0 0.0 1.0 0.0)
  0.54 0.82 : 1 (0.0 1.0 0.0 0.0)	  0.79 0.82 : 1 (0.0 1.0 0.0 0.0)
  0.15 0.71 : 2 (0.0 0.0 1.0 0.0)	  0.32 0.68 : 2 (0.0 0.0 1.0 0.0)
  0.45 0.56 : 2 (0.0 0.0 1.0 0.0)	  0.47 0.65 : 2 (0.0 0.0 1.0 0.0)
  0.64 0.67 : 1 (0.0 1.0 0.0 0.0)	  0.75 0.65 : 1 (0.0 1.0 0.0 0.0)
  0.84 0.61 : 1 (0.0 1.0 0.0 0.0)	  0.76 0.50 : 1 (0.0 1.0 0.0 0.0)
  0.63 0.52 : 1 (0.0 1.0 0.0 0.0)	  0.20 0.41 : 0 (1.0 0.0 0.0 0.0)
  0.28 0.27 : 0 (1.0 0.0 0.0 0.0)	  0.40 0.40 : 0 (1.0 0.0 0.0 0.0)
  0.59 0.20 : 3 (0.0 0.0 0.0 1.0)	  0.43 0.14 : 0 (1.0 0.0 0.0 0.0)
  ......