Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//  ww  w  . j  a  va  2  s . com
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    // setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:Importer.java

License:Open Source License

public static void copyFile(File file) throws Exception {
    //    String TEST_PREFIX = "";
    File destFile = new File(outDir, file.getName() + ".seq");
    Path dest = new Path(destFile.getAbsolutePath());

    Configuration conf = new Configuration();
    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),
            conf);// w ww.j av  a2s .c o m
    CompressionCodec codec = new DefaultCodec();
    fileSys.mkdirs(dest.getParent());
    FSDataOutputStream outputStr = fileSys.create(dest);
    seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, codec);
    String filename = file.getName();
    InputStream in = new BufferedInputStream(new FileInputStream(file));
    if (filename.endsWith(".bz2")) {
        in.read();
        in.read(); //snarf header
        in = new CBZip2InputStream(in);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));

    System.out.println("working on file " + file);
    int records = 0;
    long bytes = 0, bytes_since_status = 0;
    long startTime = System.currentTimeMillis();
    String s = null;
    Text content = new Text();
    while ((s = br.readLine()) != null) {
        if (s.startsWith("---END.OF.DOCUMENT---")) {
            Text name = new Text(hash(content));
            seqFileWriter.append(name, content);
            records++;
            content = new Text();
        } else {
            byte[] line_as_bytes = (s + " ").getBytes();
            for (byte b : line_as_bytes) {
                assert b < 128 : "found an unexpected high-bit set";
            }

            content.append(line_as_bytes, 0, line_as_bytes.length);
            bytes += line_as_bytes.length;
            /*
            bytes_since_status += line_as_bytes.length;
            if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
              System.err.print('.');
              bytes_since_status = 0;
            }*/
        }
    } //end while
    if (content.getLength() > 5) {
        Text name = new Text(hash(content));
        seqFileWriter.append(name, content);
        records++;
    }
    totalBytes += bytes;
    totalRecords += records;
    long time = (System.currentTimeMillis() - startTime) / 1000 + 1;
    long kbSec = bytes / 1024 / time;
    System.out.println(new java.util.Date());
    System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time
            + " seconds (" + kbSec + " KB/sec).");
    in.close();
    seqFileWriter.close();
    outputStr.close();
}

From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java

License:Apache License

@SuppressWarnings("deprecation")
private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes
        int nrFiles) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");

    Path controlDir = getControlDir(mConfig);

    if (!fs.exists(controlDir)) {

        fs.delete(controlDir, true);/*from   w w  w  . j a v a2 s.c o  m*/

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(controlDir, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(nrBytes));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null) {
                    writer.close();
                }
                writer = null;
            }
        }
    }
    LOG.info("created control files for: " + nrFiles + " files");
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n)
        throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class,
                CompressionType.NONE);// ww  w  .  java 2 s  .  c o m
    }

    // Create example file writer
    SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class,
            NullWritable.class, CompressionType.NONE);

    // Write random values to input files and example
    IntWritable inputKey = new IntWritable();
    NullWritable nullValue = NullWritable.get();
    Random r = new Random();
    for (long i = 0; i < n; i++) {
        inputKey.set(r.nextInt(n));
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(inputKey, nullValue);
        }
        inputKey.set(r.nextInt(n));
        exampleWriter.append(inputKey, nullValue);
    }

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {
        inputWriters[j].close();
    }
    exampleWriter.close();
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

private void recalculateAssignmentsAndWrite(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException {

    IntWritable keyWrite = new IntWritable();
    for (DoubleVector v : m_cache) {
        final int lowestDistantCenter = getNearestCenter(v);
        keyWrite.set(lowestDistantCenter);
        peer.write(keyWrite, new PipesVectorWritable(v));
    }/*from   w w  w.  ja v a  2s . c  om*/

    // just on the first task write the centers to filesystem to prevent
    // collisions
    if (peer.getPeerName().equals(peer.getPeerName(0))) {
        String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
        if (pathString != null) {
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                    new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);
            final NullWritable value = NullWritable.get();

            for (DoubleVector center : m_centers_cpu) {
                dataWriter.append(new PipesVectorWritable(center), value);
            }
            dataWriter.close();
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

@Override
public void bspGpu(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer,
        Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }/* ww w  . j a v a  2s  .  c o  m*/

    // Fetch inputs
    final List<DoubleVector> inputs = new ArrayList<DoubleVector>();
    final PipesVectorWritable key = new PipesVectorWritable();
    final NullWritable nullValue = NullWritable.get();
    while (peer.readNext(key, nullValue)) {
        inputs.add(key.getVector());
    }
    // Convert inputs to double[][]
    double[][] inputsArr = new double[inputs.size()][inputs.get(0).getLength()];
    for (int i = 0; i < inputs.size(); i++) {
        double[] vector = inputs.get(i).toArray();
        for (int j = 0; j < vector.length; j++) {
            inputsArr[i][j] = vector[j];
        }
    }

    // Logging
    if (m_isDebuggingEnabled) {
        m_logger.writeChars("KMeansHybrid.bspGpu executed on GPU!\n");
        m_logger.writeChars(
                "KMeansHybrid.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n");
        m_logger.writeChars("KMeansHybrid.bspGpu inputSize: " + inputs.size() + "\n");
    }

    KMeansHybridKernel kernel = new KMeansHybridKernel(inputsArr, m_centers_gpu,
            m_conf.getInt(CONF_MAX_ITERATIONS, 0), peer.getAllPeerNames());

    // Run GPU Kernels
    Context context = rootbeer.createDefaultContext();
    Stopwatch watch = new Stopwatch();
    watch.start();
    rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context);
    watch.stop();

    // Output inputs with corresponding new center id
    for (int i = 0; i < inputs.size(); i++) {
        peer.write(new IntWritable(kernel.m_input_centers[i]), new PipesVectorWritable(inputs.get(i)));
    }

    // Output new Centers only on first task
    // to prevent collisions
    if (peer.getPeerName().equals(peer.getPeerName(0))) {
        String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
        if (pathString != null) {
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                    new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

            for (int i = 0; i < kernel.m_centers.length; i++) {
                dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(kernel.m_centers[i])),
                        nullValue);
            }
            dataWriter.close();
        }
    }

    long stopTime = System.currentTimeMillis();
    if (m_timeMeasurement) {
        LOG.info("# bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");

        if (m_isDebuggingEnabled) {
            m_logger.writeChars(
                    "PiEstimatorHybrid,bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        }
    }

    // Logging
    if (m_isDebuggingEnabled) {
        List<StatsRow> stats = context.getStats();
        for (StatsRow row : stats) {
            m_logger.writeChars("  StatsRow:\n");
            m_logger.writeChars("    serial time: " + row.getSerializationTime() + "\n");
            m_logger.writeChars("    exec time: " + row.getExecutionTime() + "\n");
            m_logger.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
            m_logger.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
            m_logger.writeChars("    num threads: " + row.getNumThreads() + "\n");
            m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n");
        }

        m_logger.close();
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/**
 * prepareInputData/*from   w w w  . j ava 2s . c o m*/
 * 
 */
public static void prepareInputData(Configuration conf, FileSystem fs, Path in, Path centerIn, int numBspTask,
        int numGPUBspTask, long n, int k, int vectorDimension, Random rand, int GPUPercentage)
        throws IOException {

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);
    }
    if (fs.exists(centerIn)) {
        fs.delete(centerIn, true);
    }

    final NullWritable nullValue = NullWritable.get();
    final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn,
            PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    long inputVectorsPerGPUTask = 0;
    long inputVectorsPerCPU = 0;
    long inputVectorsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        inputVectorsPerGPUTask = (n * GPUPercentage) / 100;
        inputVectorsPerCPU = n - inputVectorsPerGPUTask;
    } else {
        inputVectorsPerCPU = n;
    }
    if (cpuTaskNum > 0) {
        inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum;
    }

    // long interval = totalNumberOfPoints / numBspTask;
    long centers = 0;

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(in, "part" + part + ".seq");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn,
                PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

        long interval = 0;
        if (part > cpuTaskNum) {
            interval = inputVectorsPerGPUTask;
        } else {
            interval = inputVectorsPerCPUTask;
        }
        long start = interval * part;
        long end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = n; // set to totalNumberOfPoints
        }
        LOG.info("Partition " + part + ": from " + start + " to " + end);

        for (long i = start; i <= end; i++) {
            double[] arr = new double[vectorDimension];
            for (int j = 0; j < vectorDimension; j++) {
                if (rand != null) {
                    arr[j] = rand.nextInt((int) n);
                } else {
                    arr[j] = i;
                }
            }
            PipesVectorWritable vector = new PipesVectorWritable(new DenseDoubleVector(arr));

            // LOG.info("input[" + i + "]: " + Arrays.toString(arr));
            dataWriter.append(vector, nullValue);

            if (k > centers) {
                // LOG.info("center[" + i + "]: " + Arrays.toString(arr));
                centerWriter.append(vector, nullValue);
                centers++;
            } else {
                centerWriter.close();
            }

        }
        dataWriter.close();
    }

}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/**
 * Create testExample vectors and centers as input from
 * http://www.maplesoft.com/support/help/Maple/view.aspx?path=NAG/g03efc
 * //  w w w. j  a v  a 2s.  c om
 * n := 20: vectorDimension := 5: k := 3: maxIterations := 10:
 * 
 * x := Matrix([ [77.3, 13, 9.699999999999999, 1.5, 6.4], [82.5, 10, 7.5, 1.5,
 * 6.5], [66.90000000000001, 20.6, 12.5, 2.3, 7], [47.2, 33.8, 19, 2.8, 5.8],
 * [65.3, 20.5, 14.2, 1.9, 6.9], [83.3, 10, 6.7, 2.2, 7], [81.59999999999999,
 * 12.7, 5.7, 2.9, 6.7], [47.8, 36.5, 15.7, 2.3, 7.2], [48.6, 37.1, 14.3, 2.1,
 * 7.2], [61.6, 25.5, 12.9, 1.9, 7.3], [58.6, 26.5, 14.9, 2.4, 6.7], [69.3,
 * 22.3, 8.4, 4, 7], [61.8, 30.8, 7.4, 2.7, 6.4], [67.7, 25.3, 7, 4.8, 7.3],
 * [57.2, 31.2, 11.6, 2.4, 6.5], [67.2, 22.7, 10.1, 3.3, 6.2], [59.2, 31.2,
 * 9.6, 2.4, 6], [80.2, 13.2, 6.6, 2, 5.8], [82.2, 11.1, 6.7, 2.2, 7.2],
 * [69.7, 20.7, 9.6, 3.1, 5.9]], datatype=float[8], order='C_order'):
 * 
 * cmeans := Matrix( [[82.5, 10, 7.5, 1.5, 6.5], [47.8, 36.5, 15.7, 2.3, 7.2],
 * [67.2, 22.7, 10.1, 3.3, 6.2]], datatype=float[8], order='C_order'):
 * 
 * 
 * Results
 * 
 * cmeans := Matrix([ [81.1833333333333371, 11.6666666666666661,
 * 7.1499999999999947, 2.0500000000000027, 6.6000000000000052],
 * [47.8666666666666671, 35.8000000000000043, 16.3333333333333321,
 * 2.3999999999999992, 6.7333333333333340], [64.0454545454545610,
 * 25.2090909090909037, 10.7454545454545425, 2.83636363636363642,
 * 6.65454545454545521]]):
 * 
 * inc := Vector([0, 0, 2, 1, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
 * 2]):
 * 
 * nic := Vector([6, 3, 11]):
 * 
 * css := Vector([46.5716666666666583, 20.3800000000000097,
 * 468.896363636363503]):
 * 
 */
public static void prepareTestInput(Configuration conf, FileSystem fs, Path in, Path centerIn)
        throws IOException {

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);
    }
    if (fs.exists(centerIn)) {
        fs.delete(centerIn, true);
    }

    double[][] input = { { 77.3, 13, 9.699999999999999, 1.5, 6.4 }, { 82.5, 10, 7.5, 1.5, 6.5 },
            { 66.90000000000001, 20.6, 12.5, 2.3, 7 }, { 47.2, 33.8, 19, 2.8, 5.8 },
            { 65.3, 20.5, 14.2, 1.9, 6.9 }, { 83.3, 10, 6.7, 2.2, 7 },
            { 81.59999999999999, 12.7, 5.7, 2.9, 6.7 }, { 47.8, 36.5, 15.7, 2.3, 7.2 },
            { 48.6, 37.1, 14.3, 2.1, 7.2 }, { 61.6, 25.5, 12.9, 1.9, 7.3 }, { 58.6, 26.5, 14.9, 2.4, 6.7 },
            { 69.3, 22.3, 8.4, 4, 7 }, { 61.8, 30.8, 7.4, 2.7, 6.4 }, { 67.7, 25.3, 7, 4.8, 7.3 },
            { 57.2, 31.2, 11.6, 2.4, 6.5 }, { 67.2, 22.7, 10.1, 3.3, 6.2 }, { 59.2, 31.2, 9.6, 2.4, 6 },
            { 80.2, 13.2, 6.6, 2, 5.8 }, { 82.2, 11.1, 6.7, 2.2, 7.2 }, { 69.7, 20.7, 9.6, 3.1, 5.9 } };
    double[][] centers = { { 82.5, 10, 7.5, 1.5, 6.5 }, { 47.8, 36.5, 15.7, 2.3, 7.2 },
            { 67.2, 22.7, 10.1, 3.3, 6.2 } };

    final NullWritable nullValue = NullWritable.get();

    // Write inputs
    LOG.info("inputs: ");
    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, PipesVectorWritable.class,
            NullWritable.class, CompressionType.NONE);

    for (int i = 0; i < input.length; i++) {
        dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(input[i])), nullValue);
        LOG.info("input[" + i + "]: " + Arrays.toString(input[i]));
    }

    dataWriter.close();

    // Write centers
    LOG.info("centers: ");
    final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn,
            PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

    for (int i = 0; i < centers.length; i++) {
        centerWriter.append(new PipesVectorWritable(new DenseDoubleVector(centers[i])), nullValue);
        LOG.info("center[" + i + "]: " + Arrays.toString(centers[i]));
    }

    centerWriter.close();
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns,
        Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException {

    List<Path> splittedFiles = new ArrayList<Path>();

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    int inputVectorsPerGPUTask = 0;
    int inputVectorsPerCPU = 0;
    int inputVectorsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        inputVectorsPerGPUTask = (rows * GPUPercentage) / 100;
        inputVectorsPerCPU = rows - inputVectorsPerGPUTask;
    } else {//from  w  w w . ja v  a2s  .c o m
        inputVectorsPerCPU = rows;
    }
    if (cpuTaskNum > 0) {
        inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum;
    }

    for (int part = 0; part < numBspTask; part++) {

        Path partIn = new Path(path, "part" + part + ".seq");
        splittedFiles.add(partIn);
        FileSystem fs = FileSystem.get(conf);
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class,
                VectorWritable.class, CompressionType.NONE);

        int interval = 0;
        if (part > cpuTaskNum) {
            interval = inputVectorsPerGPUTask;
        } else {
            interval = inputVectorsPerCPUTask;
        }
        int start = interval * part;
        int end = start + interval;
        if ((numBspTask - 1) == part) {
            end = rows; // set to totalRows
        }
        LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName()
                + " from " + start + " to " + (end - 1));

        for (int i = start; i < end; i++) {
            DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]);
            dataWriter.append(new IntWritable(i), new VectorWritable(rowVector));
        }
        dataWriter.close();
    }

    return splittedFiles;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java

License:Apache License

public static List<double[]> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask,
        int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage,
        int maxTestPrefs) throws IOException {

    // Delete input directory if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);//from w  w  w.  ja v a2  s  .  c  o  m
    }

    Random rand = new Random(32L);
    Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>();
    List<double[]> testItems = new ArrayList<double[]>();

    int possibleUserItemRatings = userCount * itemCount;
    int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100;
    System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: "
            + userItemRatings);

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    long ratingsPerGPUTask = 0;
    long ratingsPerCPU = 0;
    long ratingsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100;
        ratingsPerCPU = userItemRatings - ratingsPerGPUTask;
    } else {
        ratingsPerCPU = userItemRatings;
    }
    if (cpuTaskNum > 0) {
        ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum;
    }

    System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: "
            + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask);

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(in, "part" + part + ".seq");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class,
                PipesVectorWritable.class, CompressionType.NONE);

        long interval = 0;
        if (part > cpuTaskNum) {
            interval = ratingsPerGPUTask;
        } else {
            interval = ratingsPerCPUTask;
        }
        long start = interval * part;
        long end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = userItemRatings;
        }
        System.out.println("Partition " + part + ": from " + start + " to " + end);

        for (long i = start; i <= end; i++) {

            // Find new user item rating which was not used before
            Map.Entry<Long, Long> userItemPair;
            do {
                long userId = rand.nextInt(userCount);
                long itemId = rand.nextInt(itemCount);
                userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId);
            } while (userItemPairs.contains(userItemPair));

            // Add user item rating
            userItemPairs.add(userItemPair);

            // Generate rating
            int rating = rand.nextInt(5) + 1; // values between 1 and 5

            // Add user item rating to test data
            if (i < maxTestPrefs) {
                testItems.add(new double[] { userItemPair.getKey(), userItemPair.getValue(), rating });
            }

            // Write out user item rating
            dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable(
                    new DenseDoubleVector(new double[] { userItemPair.getValue(), rating })));
        }
        dataWriter.close();
    }

    return testItems;
}