Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:RunPageRankSchimmy.java

License:Apache License

private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner,
        boolean useRange) throws Exception {
    Configuration conf = getConf();

    String in = path + "/iter" + FORMAT.format(i);
    String out = path + "/iter" + FORMAT.format(j) + "t";
    String outm = out + "-mass";

    FileSystem fs = FileSystem.get(conf);

    // We need to actually count the number of part files to get the number
    // of partitions (because the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-")) {
            numPartitions++;//from  w w w  . j  a v a 2 s  .c  o  m
        }
    }

    conf.setInt("NodeCount", n);

    Partitioner<IntWritable, Writable> p = null;

    if (useRange) {
        p = new RangePartitioner();
        ((Configurable) p).setConf(conf);
    } else {
        p = new HashPartitioner<IntWritable, Writable>();
    }

    // This is really annoying: the mapping between the partition numbers on
    // disk (i.e., part-XXXX) and what partition the file contains (i.e.,
    // key.hash % #reducer) is arbitrary... so this means that we need to
    // open up each partition, peek inside to find out.
    IntWritable key = new IntWritable();
    PageRankNode value = new PageRankNode();
    FileStatus[] status = fs.listStatus(new Path(in));

    StringBuilder sb = new StringBuilder();

    for (FileStatus f : status) {
        if (!f.getPath().getName().contains("part-")) {
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath()));

        reader.next(key, value);
        int np = p.getPartition(key, value, numPartitions);
        reader.close();

        LOG.info(f.getPath() + "\t" + np);
        sb.append(np + "=" + f.getPath() + ";");
    }

    LOG.info(sb.toString().trim());

    LOG.info("PageRankSchimmy: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + n);
    LOG.info(" - useCombiner: " + useCombiner);
    LOG.info(" - useInmapCombiner: " + useInmapCombiner);
    LOG.info(" - numPartitions: " + numPartitions);
    LOG.info(" - useRange: " + useRange);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    //conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.set("PageRankMassPath", outm);
    conf.set("BasePath", in);
    conf.set("PartitionMapping", sb.toString().trim());

    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    Job job = Job.getInstance(conf);
    job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1");
    job.setJarByClass(RunPageRankSchimmy.class);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    if (useInmapCombiner) {
        job.setMapperClass(MapWithInMapperCombiningClass.class);
    } else {
        job.setMapperClass(MapClass.class);
    }

    if (useCombiner) {
        job.setCombinerClass(CombineClass.class);
    }

    if (useRange) {
        job.setPartitionerClass(RangePartitioner.class);
    }

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(conf).delete(new Path(out), true);
    FileSystem.get(conf).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float mass = Float.NEGATIVE_INFINITY;
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = sumLogProbs(mass, fin.readFloat());
        fin.close();
    }

    return mass;
}

From source file:ac.keio.sslab.nlp.lda.RowIdJob.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*www.j ava 2  s.c om*/
public int run(String[] args) throws Exception {

    addInputOption();
    addOutputOption();

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = getOutputPath();
    Path indexPath = new Path(outputPath, "docIndex");
    Path matrixPath = new Path(outputPath, "matrix");

    try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class,
            Text.class);
            SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath,
                    IntWritable.class, VectorWritable.class)) {
        IntWritable docId = new IntWritable();
        int i = 0;
        int numCols = 0;
        for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
            VectorWritable value = record.getSecond();
            docId.set(i);
            indexWriter.append(docId, record.getFirst());
            matrixWriter.append(docId, value);
            i++;
            numCols = value.get().size();
        }

        log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
        return 0;
    }
}

From source file:Analysis.A4_High_Traffic_Countries.Top_10_Countries_by_User_Traffic_Reducer.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    for (Map.Entry<Integer, String> entry : top10.entrySet()) {

        IntWritable result = new IntWritable();

        //Integer key = entry.getKey();
        String value = entry.getValue().substring(0, 1).toUpperCase() + entry.getValue().substring(1);

        result.set(entry.getKey());/*from   w  w w . ja v  a2 s. co  m*/

        // print top 10 counntries
        context.write(new Text(value), result);
    }
}

From source file:ar.edu.ungs.garules.CensusJob.java

License:Apache License

/**
 * Toma la salida del reducer del file system distribuido y la carga en el mapa "ocurrencias" en memoria
 * @param conf/*  w ww .  j av  a 2s  . c o  m*/
 * @param path
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private static void llenarOcurrencias(Configuration conf, String path) throws IOException {
    FileSystem fs = new DistributedFileSystem(
            new InetSocketAddress(DEFAULT_FILE_SYSTEM_HOST, DEFAULT_FILE_SYSTEM_PORT), conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-r-00000"), conf);

    Text key = new Text();
    IntWritable value = new IntWritable();
    while (reader.next(key, value))
        ocurrencias.put(key.toString(), value.get());
    reader.close();
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, NullWritable, IntWritable, NullWritable, NullWritable> peer)
        throws IOException, SyncException, InterruptedException {

    BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration());
    FileSystem fs = FileSystem.get(peer.getConfiguration());
    FSDataOutputStream outStream = fs/*from www. j  ava 2 s . co  m*/
            .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log"));

    outStream.writeChars("HelloHybrid.bsp executed on CPU!\n");

    ArrayList<Integer> summation = new ArrayList<Integer>();

    // test input
    IntWritable key = new IntWritable();
    NullWritable nullValue = NullWritable.get();

    while (peer.readNext(key, nullValue)) {
        outStream.writeChars("input: key: '" + key.get() + "'\n");
        summation.add(key.get());
    }

    // test sequenceFileReader
    Path example = new Path(peer.getConfiguration().get(CONF_EXAMPLE_PATH));
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, example, peer.getConfiguration());

        int i = 0;
        while (reader.next(key, nullValue)) {
            outStream.writeChars("sequenceFileReader: key: '" + key.get() + "'\n");
            if (i < summation.size()) {
                summation.set(i, summation.get(i) + key.get());
            }
            i++;
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    // test output
    for (Integer i : summation) {
        key.set(i);
        outStream.writeChars("output: key: '" + key.get() + "'\n");
        peer.write(key, nullValue);
    }

    // test getAllPeerNames
    outStream.writeChars("getAllPeerNames: '" + Arrays.toString(peer.getAllPeerNames()) + "'\n");

    // test String.split
    String splitString = "boo:and:foo";
    String[] splits;

    outStream.writeChars("splitString: '" + splitString + "'\n");

    splits = splitString.split(":");
    outStream.writeChars("split(\":\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", 2);
    outStream.writeChars(
            "split(\":\",2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", 5);
    outStream.writeChars(
            "split(\":\",5) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(":", -2);
    outStream.writeChars(
            "split(\":\",-2) len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    splits = splitString.split(";");
    outStream.writeChars("split(\";\") len: " + splits.length + " values: '" + Arrays.toString(splits) + "'\n");

    outStream.close();
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n)
        throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class,
                CompressionType.NONE);/*from   w  ww .  j ava 2 s.com*/
    }

    // Create example file writer
    SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class,
            NullWritable.class, CompressionType.NONE);

    // Write random values to input files and example
    IntWritable inputKey = new IntWritable();
    NullWritable nullValue = NullWritable.get();
    Random r = new Random();
    for (long i = 0; i < n; i++) {
        inputKey.set(r.nextInt(n));
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(inputKey, nullValue);
        }
        inputKey.set(r.nextInt(n));
        exampleWriter.append(inputKey, nullValue);
    }

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {
        inputWriters[j].close();
    }
    exampleWriter.close();
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

static void printOutput(BSPJob job, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    FileStatus[] files = fs.listStatus(path);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration());

                IntWritable key = new IntWritable();
                NullWritable value = NullWritable.get();
                while (reader.next(key, value)) {
                    System.out.println("key: '" + key.get() + "' value: '" + value + "'\n");
                }/* w ww  .  j  a  v  a  2 s.c  o  m*/
            } catch (IOException e) {
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                in.close();
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

private void recalculateAssignmentsAndWrite(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException {

    IntWritable keyWrite = new IntWritable();
    for (DoubleVector v : m_cache) {
        final int lowestDistantCenter = getNearestCenter(v);
        keyWrite.set(lowestDistantCenter);
        peer.write(keyWrite, new PipesVectorWritable(v));
    }/* w  w w.  j a v  a  2 s. c  o  m*/

    // just on the first task write the centers to filesystem to prevent
    // collisions
    if (peer.getPeerName().equals(peer.getPeerName(0))) {
        String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
        if (pathString != null) {
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                    new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);
            final NullWritable value = NullWritable.get();

            for (DoubleVector center : m_centers_cpu) {
                dataWriter.append(new PipesVectorWritable(center), value);
            }
            dataWriter.close();
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults//w ww.  j a  v  a  2 s  .  c  om
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int blockSize = BLOCK_SIZE;
    int gridSize = GRID_SIZE;
    long n = 10; // input vectors
    int k = 3; // start vectors
    int vectorDimension = 2;
    int maxIteration = 10;
    boolean useTestExampleInput = false;
    boolean isDebugging = false;
    boolean timeMeasurement = false;
    int GPUPercentage = 80;

    Configuration conf = new HamaConfiguration();
    FileSystem fs = FileSystem.get(conf);

    // Set numBspTask to maxTasks
    // BSPJobClient jobClient = new BSPJobClient(conf);
    // ClusterStatus cluster = jobClient.getClusterStatus(true);
    // numBspTask = cluster.getMaxTasks();

    if (args.length > 0) {
        if (args.length == 12) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            blockSize = Integer.parseInt(args[2]);
            gridSize = Integer.parseInt(args[3]);
            n = Long.parseLong(args[4]);
            k = Integer.parseInt(args[5]);
            vectorDimension = Integer.parseInt(args[6]);
            maxIteration = Integer.parseInt(args[7]);
            useTestExampleInput = Boolean.parseBoolean(args[8]);
            GPUPercentage = Integer.parseInt(args[9]);
            isDebugging = Boolean.parseBoolean(args[10]);
            timeMeasurement = Boolean.parseBoolean(args[11]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=blockSize");
            System.out.println("    Argument4=gridSize");
            System.out.println("    Argument5=n | Number of input vectors (" + n + ")");
            System.out.println("    Argument6=k | Number of start vectors (" + k + ")");
            System.out.println(
                    "    Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")");
            System.out.println(
                    "    Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")");
            System.out.println("    Argument9=testExample | Use testExample input (true|false=default)");
            System.out.println("    Argument10=GPUPercentage (percentage of input)");
            System.out.println("    Argument11=isDebugging (true|false=defaul)");
            System.out.println("    Argument12=timeMeasurement (true|false=defaul)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean(CONF_DEBUG, isDebugging);
    conf.setBoolean("hama.pipes.logging", false);
    conf.setBoolean(CONF_TIME, timeMeasurement);

    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU blockSize and gridSize
    conf.set(CONF_BLOCKSIZE, "" + blockSize);
    conf.set(CONF_GRIDSIZE, "" + gridSize);
    // Set maxIterations for KMeans
    conf.setInt(CONF_MAX_ITERATIONS, maxIteration);
    // Set n for KMeans
    conf.setLong(CONF_N, n);
    // Set GPU workload
    conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
    LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
    LOG.info("isDebugging: " + conf.get(CONF_DEBUG));
    LOG.info("timeMeasurement: " + conf.get(CONF_TIME));
    LOG.info("useTestExampleInput: " + useTestExampleInput);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("centersPath: " + CONF_CENTER_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);
    LOG.info("n: " + n);
    LOG.info("k: " + k);
    LOG.info("vectorDimension: " + vectorDimension);
    LOG.info("maxIteration: " + maxIteration);

    Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
    Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
    conf.set(CONF_CENTER_IN_PATH, centerIn.toString());
    conf.set(CONF_CENTER_OUT_PATH, centerOut.toString());

    // prepare Input
    if (useTestExampleInput) {
        // prepareTestInput(conf, fs, input, centerIn);
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                null, GPUPercentage);
    } else {
        prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                new Random(3337L), GPUPercentage);
    }

    BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {
        LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        if (isDebugging) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
            printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
        }

        if (k < 50) {
            printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.MatrixMultiplicationHybridBSP.java

License:Apache License

/********************************* CPU *********************************/
@Override/*from   w  w  w .  j  a v  a  2s .c  o  m*/
public void setup(
        BSPPeer<IntWritable, PipesVectorWritable, IntWritable, PipesVectorWritable, MatrixRowMessage> peer)
        throws IOException {

    HamaConfiguration conf = peer.getConfiguration();
    this.m_isDebuggingEnabled = conf.getBoolean(CONF_DEBUG, false);

    // Choose one as a master, who sorts the matrix rows at the end
    // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);

    // TODO
    // task must be 0 otherwise write out does NOT work!
    this.m_masterTask = peer.getPeerName(0);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Load transposed Matrix B
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
            new Path(conf.get(CONF_MATRIX_MULT_B_PATH)), conf);

    IntWritable bKey = new IntWritable();
    PipesVectorWritable bVector = new PipesVectorWritable();

    // for each col of matrix B (cause by transposed B)
    while (reader.next(bKey, bVector)) {
        m_bColumns.add(new KeyValuePair<Integer, DoubleVector>(bKey.get(), bVector.getVector()));
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("setup,read,transposedMatrixB,key=" + bKey.get() + ",value="
                    + bVector.getVector().toString() + "\n");
        }
    }
    reader.close();
}