Materials available at: https://forejune.co/cuda/
Machine Learning
|
|
Example: K-means Clustering
Groups data points into clusters based on their similarity.
Suppose we have customer data with features such as:
Age, Income, Shopping frequency
K-Means Clustering can group these customers into, say, 3 clusters:
Cluster 1: Young and budget-conscious
Cluster 2: Middle-aged professionals
Cluster 3: High-income frequent shoppers
A C/C++ Implementation of K-means
// kmeans.h
#ifndef __KMEANS__
#define __KMEANS__
#include <iostream>
#include <vector>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <limits>
using namespace std;
struct Point
{
double x, y;
int cluster;
};
void kmeans(vector<Point>& points, vector<Point>& centroids, int iterations);
#endif
|
// kmeans.cpp
// k-means clustering functions
// https://forejune.co/cuda/
#include "kmeans.h"
// distance from a point p to a centroid c
double distance(const Point& p, const Point& c)
{
return sqrt((p.x - c.x)*(p.x - c.x) + (p.y - c.y)*(p.y - c.y));
}
// Clustering 2D points stored in vector points and returns the centroids
// in vector centroids
void kmeans(vector< Point>& points, vector< Point>& centroids, int iterations)
{
int n = points.size();
int k = centroids.size();
for (int it = 0; it < iterations; it++) {
// Assign points to nearest centroid
for (int j = 0; j < n; j++ ){
double minDist = 9999;
int bestCluster = 0;
for (int c = 0; c < k; c++) {
double d = distance(points[j], centroids[c]);
if (d < minDist) {
minDist = d;
bestCluster = c;
}
}
points[j].cluster = bestCluster;
}
// Recompute centroids
int counts[k] = {0};
Point newCentroids[k] = {(0, 0, 0)};
for (int j = 0; j < n; j++){
int cl = points[j].cluster;
newCentroids[cl].x += points[j].x;
newCentroids[cl].y += points[j].y;
counts[cl]++;
}
for (int c = 0; c < k; c++) {
if (counts[c] != 0) {
centroids[c].x = newCentroids[c].x / counts[c];
centroids[c].y = newCentroids[c].y / counts[c];
}
} // for c
} // for it
}
|
Testing K-means routine
// testKmeans.cpp
// routine to test the k-means function
// https://forejune.co/cuda/
#include "kmeans.h"
int main()
{
srand(time(0));
//Some random 2D points
vector< Point> points = {
{10, 20}, {15, 18}, {50, 40},
{40, 40}, {30, 60}, {45, 55},
{18, 20}, {10, 20}, {50, 30}
};
int k = 3; // Number of clusters
int iterations = 20; // Number of K-means iterations
vector< Point> centroids(k);
int c[k];
// Randomly initialize centroids from points
for (int i = 0; i < k; i++) {
c[i] = rand() % points.size();
centroids[i] = {points[c[i]].x, points[c[i]].y};
}
kmeans(points, centroids, iterations);
//output results
for (int j = 0; j < points.size(); j++ ) {
cout << "Point (" << points[j].x << ", " << points[j].y
<< ") : Cluster " << points[j].cluster << endl;
}
return 0;
}
|
Sample Outputs:
Point (10, 20) : Cluster 0 Point (15, 18) : Cluster 0 Point (50, 40) : Cluster 1 Point (40, 40) : Cluster 1 Point (30, 60) : Cluster 2 Point (45, 55) : Cluster 1 Point (18, 20) : Cluster 0 Point (10, 20) : Cluster 0 Point (50, 30) : Cluster 1
Using a Simple Graphical Interface
// testKmeansg.cpp -- Testing kmeans routine with graphical interface
// g++ -c testKmeansg.cpp
// g++ -o testKmeansg testKmeansg.o kmeans.o -lglut -lGL -lGLU
// https://forejune.co/cuda/
#include "kmeans.h"
//calling kmeans routine
int doKmeans(vector < Point> &points, vector< Point> & centroids)
{
srand(time(0));
int n = points.size();
int k; // Number of clusters
int iterations = 20; // Number of K-means iterations
if ( n < 15 )
k = 3;
else
k = 4;
centroids.resize(k);
// Randomly initialize centroids from points
int c[k];
for (int i = 0; i < k; i++) {
c[i] = rand() % points.size();
centroids[i] = {points[c[i]].x, points[c[i]].y};
}
kmeans(points, centroids, iterations);
//output results
for (int j = 0; j < n; j++ ) {
cout << "Point (" << points[j].x << ", " <<
points[j].y << ") : Cluster " << points[j].cluster << endl;
}
cout << "Cluster size = " << k << endl;
cout << "Centroids: " << endl;
for (int j = 0; j < k; j++ ) {
cout << "Cluster " << j << " at (" << (int) centroids[j].x << ", " <<
(int) centroids[j].y << ")"<< endl;
}
return 0;
}
#include <GL/glut.h>
const int screenWidth = 600;
const int screenHeight = 500;
//vectors to store points and centroids
vector< Point>pointVec;
vector< Point>centroids;
void init()
{
glClearColor(1, 1, 1, 1); //clear color buffer with white color
glClear(GL_COLOR_BUFFER_BIT); //clear color buffer
//define coordinate system
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0, screenWidth, 0, screenHeight);
glPointSize( 3 );
glColor3f(0.0, 0.0, 0.0); //draw with black color
}
void display()
{
//flush out data to color buffer
glFlush();
}
//draw one point
void drawDot(int x, int y)
{
glBegin(GL_POINTS);
glVertex2i(x, y);
glEnd();
glFlush();
}
//display cluster points with different colors and centroids
void displayClusters( vector< Point> &points, vector< Point> & centroids )
{
glClear(GL_COLOR_BUFFER_BIT); //clear color buffer
//display data points
int n = points.size();
int k = centroids.size();
glPointSize( 5 );
for (int i = 0; i < n; i++) {
int c = points[i].cluster;
float color[3] = {0}; //display color (R, G, B)
if ( c < 3 )
color[c] = 1; //Red, green or blue
//fourth color is black
glColor3fv(color);
drawDot(points[i].x, points[i].y);
}
//display centroids
glPointSize( 9 );
glColor3f(1, 0, 1); //magenta
for(int j = 0; j < k; j++)
drawDot(centroids[j].x, centroids[j].y);
glColor3f(0, 0, 0); //restore black color
glPointSize( 3 );
glutPostRedisplay();
}
void keyboard(unsigned char key, int x, int y)
{
switch ( key ) {
case 27: //escape
exit( -1 );
case 'd':
displayClusters(pointVec, centroids);
break;
case 'k':
doKmeans( pointVec, centroids );
}
}
void mouse(int button, int state, int mx, int my)
{
int x = mx, y = screenHeight - my;
if ( button == GLUT_LEFT_BUTTON && state == GLUT_DOWN ){
drawDot(x, y);
Point pt = {(double) x, (double) y};
pointVec.push_back( pt );
}
glFlush();
}
int main(int argc, char *argv[])
{
glutInit(&argc, argv); //Initialization
glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB );
glutInitWindowSize(screenWidth, screenHeight);
glutInitWindowPosition(59, 50);
glutCreateWindow( argv[0] );
init();
// specifies calback functions
glutKeyboardFunc( keyboard );
glutMouseFunc( mouse );
glutDisplayFunc( display );
glutMainLoop(); //go into perpetual loop
return 0;
}
|
Sample graphics output: