Example usage for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable()

Source Link

Usage

From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java

License:Open Source License

private static void readVertices(FileStatus status, ArrayList<MRVertex> vertices, Configuration conf)
        throws IOException {
    Path path = status.getPath();
    if (path.getName().startsWith("part")) {
        System.out.println(path);

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        while (reader.next(key, value))
            vertices.add(new MRVertex(value, conf));
        reader.close();/* w  w w  . java 2 s  .  c o m*/
    }
}

From source file:com.philiphubbard.digraph.MRCompressChainsTest.java

License:Open Source License

private static void cleanupTest(Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);

    ArrayList<MRVertex> vertices = new ArrayList<MRVertex>();

    FileStatus[] files = fileSystem.listStatus(new Path(testOutput));
    for (FileStatus status : files) {
        Path path = status.getPath();
        if (path.getName().startsWith("part")) {
            System.out.println(path);

            SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
            IntWritable key = new IntWritable();
            BytesWritable value = new BytesWritable();
            while (reader.next(key, value))
                vertices.add(new MRVertex(value, conf));
            reader.close();//  w w  w. ja  va2 s .com
        }
    }

    for (MRVertex vertex : vertices)
        System.out.println(vertex.toDisplayString());

    fileSystem.delete(new Path(testInput), true);
    fileSystem.delete(new Path(testOutput), true);

    fileSystem.close();
}

From source file:com.philiphubbard.sabe.MRAssembler.java

License:Open Source License

private void readVertices(FileStatus status, ArrayList<MRMerVertex> vertices, Configuration conf)
        throws IOException {
    Path path = status.getPath();
    if (path.getName().startsWith("part")) {
        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        while (reader.next(key, value))
            vertices.add(new MRMerVertex(value, conf));
        reader.close();//from ww w.  ja va2s  .co  m
    }
}

From source file:com.scaleoutsoftware.soss.hserver.examples.SingleWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new RuntimeException("Required args: wordToCount (string)");
    }//  w w w. j a v a  2 s . c o m

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(args[0]);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));
    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(SingleWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    IntWritable result = (IntWritable) job.runAndGetResult();

    // Unload the invocation grid
    grid.unload();

    // Output the single result and it's frequency.
    System.out.println("The word: " + args[0] + " was used - " + result + " times!");
}

From source file:com.scaleoutsoftware.soss.hserver.examples.WordCountParameterPassing.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        throw new RuntimeException("Required args: wordMinLength wordMaxLength");
    }/*from ww w  .  j av a  2 s . c  o  m*/

    int minLength = Integer.parseInt(args[0]);
    int maxLength = Integer.parseInt(args[1]);

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(minLength, maxLength);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));

    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(WordCountParameterPassing.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // Pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    job.waitForCompletion(true);

    // Unload the invocation grid
    grid.unload();

    // Output resulting words and their frequencies
    Iterable<Text> results = outputMap.keySet();
    System.out.println("Following words were longer than " + mapArgs.minWordLength + " and shorter than "
            + mapArgs.maxWordLength + ":");
    for (Text word : results) {
        System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times.");
    }
}

From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopy.java

License:Apache License

public static void main(String argv[]) throws Exception {
    final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("map-i",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("map-o",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    inputMap.clear();/* w  ww  .  ja v  a2s. c  o m*/
    outputMap.clear();
    Thread.sleep(10000);
    BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader();
    String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
    Text contentW = new Text(content);

    IntWritable count = new IntWritable();
    for (int i = 0; i < 1000; i++) {
        count.set(i);
        put.put(count, contentW);
    }
    put.close();

    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addClass(Test_MapToMapCopy.class).load();

    HServerJob job;
    Configuration configuration;

    for (int i = 0; i < 100; i++) {
        // MMF
        configuration = new Configuration();
        configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.namedmap.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.map.maxtempmemorykb", 100000);
        job = new HServerJob(configuration, "Sample job");
        job.setGrid(grid);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(NamedMapInputFormat.class);
        job.setOutputFormatClass(GridOutputFormat.class);
        NamedMapInputFormat.setNamedMap(job, inputMap);
        NamedMapInputFormat.setSuggestedNumberOfSplits(job, 64);
        GridOutputFormat.setNamedMap(job, outputMap);
        job.waitForCompletion(false);
        assertEquals(inputMap.size(), outputMap.size());
        outputMap.clear();
    }
    grid.unload();
}

From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopyMapred.java

License:Apache License

public int run(String[] args) throws Exception {
    final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("mapr-i",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("mapr-o",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    inputMap.clear();//  w  ww.  ja  v a2  s  .  com
    outputMap.clear();
    Thread.sleep(15000);
    BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader();
    String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
    Text contentW = new Text(content);
    IntWritable count = new IntWritable();
    int expectedSize = 10000;

    for (int i = 0; i < expectedSize; i++) {
        count.set(i);
        put.put(count, contentW);
    }
    put.close();
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addClass(Test_MapToMapCopyMapred.class).load();

    JobConf configuration = new JobConf(getConf(), Test_MapToMapCopyMapred.class);
    configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 0);
    configuration.setMapOutputKeyClass(IntWritable.class);
    configuration.setMapOutputValueClass(Text.class);
    configuration.setOutputKeyClass(IntWritable.class);
    configuration.setOutputValueClass(Text.class);
    configuration.setInputFormat(NamedMapInputFormatMapred.class);
    configuration.setOutputFormat(NamedMapOutputFormatMapred.class);
    NamedMapInputFormatMapred.setNamedMap(configuration, inputMap);
    NamedMapOutputFormatMapred.setNamedMap(configuration, outputMap);
    assertEquals(inputMap.size(), outputMap.size() + expectedSize); // should be 0 + expected
    HServerJobClient.runJob(configuration, false, grid);
    assertEquals(inputMap.size(), outputMap.size());
    inputMap.clear();
    outputMap.clear();
    grid.unload();
    return 1;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert an in-memory representation of a matrix to a distributed MapDir
 * format. It then can be used in distributed jobs
 * //w  ww  .j  a v  a 2 s  .c  om
 * @param oriMatrix
 * @return path that will contain the matrix files
 * @throws Exception
 */
public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws Exception {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path mapDir = new Path(outputDir, "matrix-k-0");
        Path outputFile = new Path(mapDir, "data");
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorw = new VectorWritable();
        IntWritable intw = new IntWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorw.set(vector);
                intw.set(r);
                writer.append(intw, vectorw);
            }
        } finally {
            writer.close();
        }
        MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf);
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}

From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java

License:Apache License

private static void printSequenceFile(String inputStr, int printRow) throws IOException {
    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr);
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    Vector printVector = null;/* w  ww  . j a v a  2 s. c  om*/
    while (reader.next(key, value)) {
        if (key.get() == printRow)
            printVector = value.get();
        int cnt = 0;
        Iterator<Element> iter = value.get().nonZeroes().iterator();
        for (; iter.hasNext(); iter.next())
            cnt++;
        sum += value.get().zSum();
        System.out.println("# " + key + " " + cnt + " " + value.get().zSum());
    }
    System.out.println("SUM " + sum);
    reader.close();
    if (printVector != null)
        System.out.println("##### " + printRow + " " + printVector);
    else
        System.out.println("##### " + key + " " + value.get());
}

From source file:com.twitter.algebra.nmf.NMFCommon.java

License:Apache License

public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException {
    HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>();

    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr + "/part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    LongWritable key = new LongWritable();
    IntWritable value = new IntWritable();
    while (reader.next(key, value)) {
        hashMap.put(key.get(), value.get());
    }//from ww w . j  a va  2 s . c  o m
    System.out.println("SUM " + sum);
    reader.close();
    return hashMap;
}