Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java

License:Open Source License

private static void readVertices(FileStatus status, ArrayList<MRVertex> vertices, Configuration conf)
        throws IOException {
    Path path = status.getPath();
    if (path.getName().startsWith("part")) {
        System.out.println(path);

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        while (reader.next(key, value))
            vertices.add(new MRVertex(value, conf));
        reader.close();/* w  w w  . java 2 s  .  c o m*/
    }
}

From source file:com.philiphubbard.digraph.MRCompressChainsTest.java

License:Open Source License

private static void cleanupTest(Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);

    ArrayList<MRVertex> vertices = new ArrayList<MRVertex>();

    FileStatus[] files = fileSystem.listStatus(new Path(testOutput));
    for (FileStatus status : files) {
        Path path = status.getPath();
        if (path.getName().startsWith("part")) {
            System.out.println(path);

            SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
            IntWritable key = new IntWritable();
            BytesWritable value = new BytesWritable();
            while (reader.next(key, value))
                vertices.add(new MRVertex(value, conf));
            reader.close();//  w w  w. ja  va2 s .com
        }
    }

    for (MRVertex vertex : vertices)
        System.out.println(vertex.toDisplayString());

    fileSystem.delete(new Path(testInput), true);
    fileSystem.delete(new Path(testOutput), true);

    fileSystem.close();
}

From source file:com.philiphubbard.sabe.MRAssembler.java

License:Open Source License

private void readVertices(FileStatus status, ArrayList<MRMerVertex> vertices, Configuration conf)
        throws IOException {
    Path path = status.getPath();
    if (path.getName().startsWith("part")) {
        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        while (reader.next(key, value))
            vertices.add(new MRMerVertex(value, conf));
        reader.close();//from ww w.  ja va2s  .co  m
    }
}

From source file:com.scaleoutsoftware.soss.hserver.examples.SingleWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new RuntimeException("Required args: wordToCount (string)");
    }//  w w w. j a v a  2 s . c o m

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(args[0]);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));
    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(SingleWordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    IntWritable result = (IntWritable) job.runAndGetResult();

    // Unload the invocation grid
    grid.unload();

    // Output the single result and it's frequency.
    System.out.println("The word: " + args[0] + " was used - " + result + " times!");
}

From source file:com.scaleoutsoftware.soss.hserver.examples.WordCountParameterPassing.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        throw new RuntimeException("Required args: wordMinLength wordMaxLength");
    }/*from ww w  .  j av a  2 s . c  o  m*/

    int minLength = Integer.parseInt(args[0]);
    int maxLength = Integer.parseInt(args[1]);

    // Create parameter argument to send to the reducers
    MapArguments mapArgs = new MapArguments(minLength, maxLength);

    // Create the invocation grid
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class)
            .addClass(IntSumReducer.class).addClass(MapArguments.class).load();

    // Create a default configuration
    Configuration conf = new Configuration();

    // Create the input map
    NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap",
            new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class));

    // Create the output map
    NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class));

    // Clear the input and output maps
    inputMap.clear();
    outputMap.clear();

    // Create a BulkPut object
    BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader();

    IntWritable key = new IntWritable();
    Text value = new Text();

    // Build the input map from generated text
    Scanner scanner = new Scanner(SAMPLE_INPUT);

    for (int count = 0; scanner.hasNext(); count++) {
        value.set(scanner.next());
        key.set(count);
        loader.put(key, value);
    }

    scanner.close();

    // Close the bulk loader
    loader.close();

    // Assign the invocation grid to the maps, so parallel operations can be performed
    inputMap.setInvocationGrid(grid);
    outputMap.setInvocationGrid(grid);

    // Create an hServer job
    HServerJob job = new HServerJob(conf, "word count", false, grid);
    job.setJarByClass(WordCountParameterPassing.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // Pass the map arguments object to the job
    job.setJobParameter(mapArgs);

    // Set named maps for the input and output formats
    NamedMapInputFormat.setNamedMap(job, inputMap);
    GridOutputFormat.setNamedMap(job, outputMap);

    // Execute the job
    job.waitForCompletion(true);

    // Unload the invocation grid
    grid.unload();

    // Output resulting words and their frequencies
    Iterable<Text> results = outputMap.keySet();
    System.out.println("Following words were longer than " + mapArgs.minWordLength + " and shorter than "
            + mapArgs.maxWordLength + ":");
    for (Text word : results) {
        System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times.");
    }
}

From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopy.java

License:Apache License

public static void main(String argv[]) throws Exception {
    final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("map-i",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("map-o",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    inputMap.clear();/* w  ww  .  ja v  a2s. c  o m*/
    outputMap.clear();
    Thread.sleep(10000);
    BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader();
    String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
    Text contentW = new Text(content);

    IntWritable count = new IntWritable();
    for (int i = 0; i < 1000; i++) {
        count.set(i);
        put.put(count, contentW);
    }
    put.close();

    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addClass(Test_MapToMapCopy.class).load();

    HServerJob job;
    Configuration configuration;

    for (int i = 0; i < 100; i++) {
        // MMF
        configuration = new Configuration();
        configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.namedmap.usememorymappedfiles", 1);
        configuration.setInt("mapred.hserver.setting.map.maxtempmemorykb", 100000);
        job = new HServerJob(configuration, "Sample job");
        job.setGrid(grid);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(NamedMapInputFormat.class);
        job.setOutputFormatClass(GridOutputFormat.class);
        NamedMapInputFormat.setNamedMap(job, inputMap);
        NamedMapInputFormat.setSuggestedNumberOfSplits(job, 64);
        GridOutputFormat.setNamedMap(job, outputMap);
        job.waitForCompletion(false);
        assertEquals(inputMap.size(), outputMap.size());
        outputMap.clear();
    }
    grid.unload();
}

From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopyMapred.java

License:Apache License

public int run(String[] args) throws Exception {
    final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("mapr-i",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("mapr-o",
            new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class));
    inputMap.clear();//  w  ww.  ja  v a2  s  .  com
    outputMap.clear();
    Thread.sleep(15000);
    BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader();
    String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
    Text contentW = new Text(content);
    IntWritable count = new IntWritable();
    int expectedSize = 10000;

    for (int i = 0; i < expectedSize; i++) {
        count.set(i);
        put.put(count, contentW);
    }
    put.close();
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addClass(Test_MapToMapCopyMapred.class).load();

    JobConf configuration = new JobConf(getConf(), Test_MapToMapCopyMapred.class);
    configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 0);
    configuration.setMapOutputKeyClass(IntWritable.class);
    configuration.setMapOutputValueClass(Text.class);
    configuration.setOutputKeyClass(IntWritable.class);
    configuration.setOutputValueClass(Text.class);
    configuration.setInputFormat(NamedMapInputFormatMapred.class);
    configuration.setOutputFormat(NamedMapOutputFormatMapred.class);
    NamedMapInputFormatMapred.setNamedMap(configuration, inputMap);
    NamedMapOutputFormatMapred.setNamedMap(configuration, outputMap);
    assertEquals(inputMap.size(), outputMap.size() + expectedSize); // should be 0 + expected
    HServerJobClient.runJob(configuration, false, grid);
    assertEquals(inputMap.size(), outputMap.size());
    inputMap.clear();
    outputMap.clear();
    grid.unload();
    return 1;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert an in-memory representation of a matrix to a distributed MapDir
 * format. It then can be used in distributed jobs
 * //w  ww  .j  a v  a 2 s  .c  om
 * @param oriMatrix
 * @return path that will contain the matrix files
 * @throws Exception
 */
public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws Exception {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path mapDir = new Path(outputDir, "matrix-k-0");
        Path outputFile = new Path(mapDir, "data");
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorw = new VectorWritable();
        IntWritable intw = new IntWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorw.set(vector);
                intw.set(r);
                writer.append(intw, vectorw);
            }
        } finally {
            writer.close();
        }
        MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf);
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}

From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java

License:Apache License

private static void printSequenceFile(String inputStr, int printRow) throws IOException {
    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr);
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    Vector printVector = null;/* w  ww  . j a v a  2 s. c  om*/
    while (reader.next(key, value)) {
        if (key.get() == printRow)
            printVector = value.get();
        int cnt = 0;
        Iterator<Element> iter = value.get().nonZeroes().iterator();
        for (; iter.hasNext(); iter.next())
            cnt++;
        sum += value.get().zSum();
        System.out.println("# " + key + " " + cnt + " " + value.get().zSum());
    }
    System.out.println("SUM " + sum);
    reader.close();
    if (printVector != null)
        System.out.println("##### " + printRow + " " + printVector);
    else
        System.out.println("##### " + key + " " + value.get());
}

From source file:com.twitter.algebra.nmf.NMFCommon.java

License:Apache License

public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException {
    HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>();

    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr + "/part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    LongWritable key = new LongWritable();
    IntWritable value = new IntWritable();
    while (reader.next(key, value)) {
        hashMap.put(key.get(), value.get());
    }//from ww w . j  a va  2 s . c  o m
    System.out.println("SUM " + sum);
    reader.close();
    return hashMap;
}