DAAL.KmeansStep1Mapper.java Source code

Java tutorial

Introduction

Here is the source code for DAAL.KmeansStep1Mapper.java

Source

/* file: KmeansStep1Mapper.java */
/*
 //  Copyright(C) 2014-2015 Intel Corporation. All Rights Reserved.
 //
 //  The source code, information  and  material ("Material") contained herein is
 //  owned  by Intel Corporation or its suppliers or licensors, and title to such
 //  Material remains  with Intel Corporation  or its suppliers or licensors. The
 //  Material  contains proprietary information  of  Intel or  its  suppliers and
 //  licensors. The  Material is protected by worldwide copyright laws and treaty
 //  provisions. No  part  of  the  Material  may  be  used,  copied, reproduced,
 //  modified, published, uploaded, posted, transmitted, distributed or disclosed
 //  in any way  without Intel's  prior  express written  permission. No  license
 //  under  any patent, copyright  or  other intellectual property rights  in the
 //  Material  is  granted  to  or  conferred  upon  you,  either  expressly,  by
 //  implication, inducement,  estoppel or  otherwise.  Any  license  under  such
 //  intellectual  property  rights must  be express  and  approved  by  Intel in
 //  writing.
 //
 //  *Third Party trademarks are the property of their respective owners.
 //
 //  Unless otherwise  agreed  by Intel  in writing, you may not remove  or alter
 //  this  notice or  any other notice embedded  in Materials by Intel or Intel's
 //  suppliers or licensors in any way.
 */

package DAAL;

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.BufferedWriter;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.Arrays;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.conf.Configuration;

import com.intel.daal.data_management.data.HomogenNumericTable;
import com.intel.daal.algorithms.kmeans.*;
import com.intel.daal.data_management.data.*;
import com.intel.daal.services.*;

public class KmeansStep1Mapper extends Mapper<Object, Text, IntWritable, WriteableData> {

    private static final int nFeatures = 20;
    private static final int nVectorsInBlock = 10000;
    private static final long nClusters = 20;

    /* Index is supposed to be a sequence number for the split */
    private int index = 0;
    private int i = 0;
    private int totalTasks = 0;

    @Override
    public void setup(Context context) {
        index = context.getTaskAttemptID().getTaskID().getId();
        Configuration conf = context.getConfiguration();
        totalTasks = conf.getInt("mapred.map.tasks", 0);
    }

    @Override
    public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

        /* Read a data set */
        String filePath = "/Hadoop/Kmeans/data/" + value;
        double[] data = new double[nFeatures * nVectorsInBlock];
        readData(filePath, nFeatures, nVectorsInBlock, data);

        DaalContext daalContext = new DaalContext();

        HomogenNumericTable ntData = new HomogenNumericTable(daalContext, data, nFeatures, nVectorsInBlock);

        /* Create an algorithm to compute k-means on local nodes */
        DistributedStep1Local kmeansLocal = new DistributedStep1Local(daalContext, Double.class,
                Method.defaultDense, nClusters);

        /* Get the centroids table computed in step 2 */
        SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
                SequenceFile.Reader.file(new Path("/Hadoop/Kmeans/initResults/centroids")));
        IntWritable step1key = new IntWritable();
        WriteableData step1value = new WriteableData();
        reader.next(step1key, step1value);
        reader.close();

        HomogenNumericTable centroids = (HomogenNumericTable) step1value.getObject(daalContext);

        /* Set the algorithm parameters */
        kmeansLocal.input.set(InputId.data, ntData);
        kmeansLocal.input.set(InputId.inputCentroids, centroids);

        /* Compute k-means on local nodes */
        PartialResult pres = kmeansLocal.compute();

        /* Write the data prepended with a data set sequence number. Needed to know the position of the data set in the input data */
        context.write(new IntWritable(0), new WriteableData(index, pres));

        daalContext.dispose();
        index += totalTasks;
    }

    private static void readData(String dataset, int nFeatures, int nVectors, double[] data) {
        System.out.println("readData " + dataset);
        try {
            Path pt = new Path(dataset);
            FileSystem fs = FileSystem.get(new Configuration());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fs.open(pt)));

            int nLine = 0;
            for (String line; ((line = bufferedReader.readLine()) != null) && (nLine < nVectors); nLine++) {
                String[] elements = line.split(",");
                for (int j = 0; j < nFeatures; j++) {
                    data[nLine * nFeatures + j] = Double.parseDouble(elements[j]);
                }
            }
            bufferedReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (NumberFormatException e) {
            e.printStackTrace();
        }
    }
}