//mlp.cpp
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace std;
//MLP training for an XOR gate
const int n1 = 3; //number of inputs and bias
const int m1 = 3; //number of hidden nodes and bias
const int K = 1; //number of outputs
const int numSamples = 4;
double inputs[numSamples][n1] =
{
1, 0, 0, //x0=1, x1=0, x2=0
1, 0, 1, //x0=1, x1=0, x2=1
1, 1, 0, //x0=1, x1=1, x2=0
1, 1, 1 //x0=1, x1=1, x2=1
};
double labels[numSamples][K] = {0, 1, 1, 0};//target outputs (labels)
double w[n1][m1] = {0.97, 0.2, 0.7,//random weights (input to hidden)
0.73, 0.1, 0.9,
0.2, 0.7, 0.3};
double wo[m1][K] = {0.76, 0.6, 0.1};//random weights (hidden to output)
// Simple MLP class
class MLP
{
private:
double a[m1]; //linear sum of products of inputs and weights
double h[m1]; //hidden nodes h[j] = g(a[j])
double y[K]; //predicted output
double z[K]; //linear sum of products of weights and hidden nodes
double eta; //learning rate
public:
MLP(double learning_rate)
{
eta = learning_rate;
h[0] = 1; //for bias
}
//Sigmoid activation function
double g(double x)
{
return 1.0 / (1.0 + exp(-x));
}
//Derivative of sigmoid function
double gd(double x)
{
double s = g(x);
return s * (1 - s);
}
//Forward propagation
double* forward(double x[])
{
//hidden layer activation
for (int j = 0; j < m1; j++) {
a[j] = 0;
for (int i = 0; i < n1; i++)
a[j] += w[i][j] * x[i];
if ( j > 0 )
h[j] = g( a[j] );
}
//output layer activation
for (int l = 0; l < K; l++){
z[l] = 0;
for(int j = 0; j < m1; j++)
z[l] += wo[j][l] * h[j];
y[l] = g( z[l] );
}
return y; //return output
}
//Backward propagation, yd is the desired output
void backward(double yd[], double x[])
{
double delta[m1];
double deltao[K];
for (int l = 0; l < K; l++) {
double e = yd[l] - y[l]; //output layer error
deltao[l] = e * gd(z[l]);
}
//Compute hidden layer error
for(int j = 0; j < m1; j++){
delta[j] = 0;
for(int l = 0; l < K; l++)
delta[j] += deltao[l] * wo[j][l] * gd(a[j]);
}
//Update weights (hidden to output)
for(int j = 0; j < m1; j++)
for(int l = 0; l < K; l++)
wo[j][l] += eta * deltao[l] * h[j];
//Update weights (input to hidden)
for(int i = 0; i < n1; i++)
for(int j = 0; j < m1; j++)
w[i][j] += eta * delta[j] * x[i];
}
void get_input_data(int k, double x[])
{
for(int i=0; i < n1; i++)
x[i]=inputs[k][i];
}
// Train the MLP
// epochs = number of times of training
// numSamples = number of different input sets
void train(int numSamples, int epochs)
{
double x[n1]; //inputs
double *yd = &labels[0][0];
for (int epoch = 0; epoch < epochs; epoch++){
for (int k = 0; k < numSamples; k++) {
get_input_data(k, x);
forward( x );
backward(yd+k*K, x);
}
}
}
void printWeights()
{
cout << "\nMLP input to hidden weights: ";
for(int i = 0; i < n1; i++){
cout << endl;
for(int j = 0; j < m1; j++)
printf("w[%d][%d]: %5.3f\t", i, j, w[i][j]);
}
cout << "\nMLP hidden to output weights:";
for(int i = 0; i < m1; i++) {
cout << endl;
for(int j = 0; j < K; j++)
printf("wo[%d][%d]: %5.3f\t", i, j, wo[i][j]);
}
}
~MLP()
{
}
};
int classifier( double x)
{
if ( x < 0.2 && x > -0.1)
return 0;
if ( x > 0.8 && x < 1.1)
return 1;
return -1;
}
int main()
{
string gates = " XOR ";
MLP mlp(0.5);
mlp.train(4, 10000);
// Test the MLP
double x[3];
cout << "\nTesting MLP:" << endl;
x[0] = 1;
for (int i = 0; i < 4; i++) {
x[1] = i & 1;
x[2] = i >> 1;
double *output = mlp.forward(x);
cout << fixed << setprecision(0) << " " << x[2]
<< gates << x[1] << " = " << classifier(*output) <<
setprecision(2) << " (" << output[0]<<")" << endl;
}
mlp.printWeights();
cout << endl << "Hello, AI World!" << endl;
return 0;
}
|