prog

frontpage - thread list - new thread - preferences - ?

Image manipulation and OCR

3 2020-05-31 00:39 *

Start with the MNIST database of handwritten digits, it's a common entry point and you'll find countless tutorials.
http://yann.lecun.com/exdb/mnist/

Here's for instance an ANN with forward propagation that I wrote for this challenge. It was a lesbillion years ago, so forgive the poor code quality. You need to install cblas hdf5 and gsl.

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <time.h>
#include <math.h>
#include <hdf5.h>
#include <hdf5_hl.h>
#include <gsl/gsl_cblas.h>

void random_numbers(int n, int m, int *num)
{
    int i, in, im, *is_used;
    srand((unsigned int) time(NULL));
    is_used = malloc(n * sizeof(int));
    for (i = 0; i < n; ++i) is_used[i] = 0;
    im = 0;
    for (in = n - m; in < n && im < m; ++in) {
        int r = rand() % (in + 1);
        if (is_used[r]) r = in;
        assert(!is_used[r]);
        num[im++] = r;
        is_used[r] = 1;
    }
    assert((im = m));
    free(is_used);
}

herr_t load_matrix(char *filename, char *matrix_name, float *data)
{
    hid_t file_id;
    hsize_t dims[2];
    herr_t status;
    file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
    status = H5LTread_dataset_float(file_id, matrix_name, data);
    status = H5LTget_dataset_info(file_id, matrix_name, dims, NULL, NULL);
    return(status);
}

int main()
{
    size_t X_size, y_size;
    float *X, *y, *Theta1, *Theta2, *L2, *L3, *test;
    int i, n, max, *rand_num;
    int errors = 0;
    X_size = 5000 * 400;
    X = malloc(X_size * sizeof(float));
    load_matrix("data/dataset.h5", "X", X);
    y_size = 5000;
    y = malloc(y_size * sizeof(float));
    load_matrix("data/dataset.h5", "y", y);

    /* choose 100 randoms images in X */
    rand_num = malloc(100 * sizeof(int));
    random_numbers(5000, 100, rand_num);

    /* test the 100 random digits */
    test = malloc(401 * sizeof(float));
    Theta1 = malloc(401 * 25 * sizeof(float));
    Theta2 = malloc(26 * 10 * sizeof(float));
    L2 = malloc(26 * sizeof(float));
    L3 = malloc(10 * sizeof(float));
    for (n = 0; n < 5000; ++n) {
        /* test data no 1203 */
        test[0] = 1;
        for (i = 0; i < 400; ++i) {
            test[i+1] = X[n + i * 5000];
        }
        /* load Theta1, 401x25 matrix of weights for the hidden layer */
        load_matrix("data/weights.h5", "Theta1", Theta1);

        /* layer 2 is 26x1 */

        /* multiply */
        cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 25, 1, 401, 1.0, Theta1, 25, test, 1, 0.0, L2 + 1, 1);
        /* sigmoid */
        for(i = 1; i < 26; ++i) {
            L2[i] = 1 / (1 + exp(-L2[i]));
        }
        /*add bias value */
        L2[0] = 1;

        /* Layer 3 */

        /* load Theta2, 26x10 matrix of weights for the output layer */
        load_matrix("data/weights.h5", "Theta2", Theta2);
        /* output layer is 10x1 */
        cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 10, 1, 26, 1.0, Theta2, 10, L2, 1, 0.0, L3, 1);
        /*get the max value*/
        max = 0;
        for (i = 0; i < 10; ++i) {
            if (L3[i] > L3[max]) max = i;
        }
        if ((max + 1) % 10 != (int) y[n] % 10) {
            printf("%4d - predicted: %d | digit is: %d\n", n, (max + 1) % 10, (int) y[n] % 10);
            errors++;
        }
    }
    printf("accuracy: %f\n", 100 * (5000 - (float) errors) / 5000); 
    free(X);
    free(y);
    free(test);
    free(L2);
    free(L3);
    free(Theta1);
    free(Theta2);
    free(rand_num);    
    return 0;
}

I've put the MNIST dataset in HDF5, you can download it here: https://send.firefox.com/download/7f815eb602434217/#CjjlBzTawTx8CFGP_uGpxg (link will expire in 7 days)

gcc -Wall -pedantic -lhdf5 -l hdf5_hl -lgslcblas -ldl -lm fw_propagation.c
./a.out
... (list of errors)
accuracy: 97.519997

Not awful but nothing to boast about. I found this code on an old hard disk, it doesn't seeem to choose digits randomly. It's either bit rot or I got bored with it without even finishing. I don't remember, I was surprised it's still compiling.

8