Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:com.talis.mapreduce.dicenc.FirstReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;/*from   ww  w .j  av a2s .  com*/
    for (IntWritable value : values) {
        sum += value.get();
    }

    if (sum > threshold) {
        v.set(counter++);
        context.write(key, v);
    }
}

From source file:com.talis.mapreduce.wordcount.newapi.WordCountReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;// www.j a v a2 s. c o  m
    for (IntWritable value : values) {
        sum += value.get();
    }
    result.set(sum);
    context.write(key, result);
}

From source file:com.tdunning.plume.local.lazy.MapRedSequenceFileTest.java

License:Apache License

@Test
public void test() throws Exception {
    /*/*from w ww  .j ava2  s .co m*/
     * Create input which is SequenceFile<int,int> with data 1,2\n3,4
     */
    Configuration conf = new Configuration();
    Path p = new Path(inputPath);
    FileSystem localFS = FileSystem.getLocal(conf);
    if (localFS.exists(p)) {
        localFS.delete(p, true); // wipe it if needed
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class,
            IntWritable.class);
    writer.append(new IntWritable(1), new IntWritable(2));
    writer.append(new IntWritable(3), new IntWritable(4));
    writer.close();
    String outputPath = "/tmp/output-plume-simpletest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    OtherWorkflow workFlow = new OtherWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
    /*
     * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
     */
    p = new Path(outputPath + "/1_1/1-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
    IntWritable key = new IntWritable(1);
    IntWritable value = new IntWritable(1);
    reader.next(key, value);
    assertEquals(key.get(), 2);
    assertEquals(value.get(), 3);
    reader.next(key, value);
    assertEquals(key.get(), 4);
    assertEquals(value.get(), 5);
    reader.close();
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

/**
 * Jump to the reader that contains the key
 * /*from  w w w  . j  av  a 2 s .co m*/
 * @param key
 * @throws IOException
 */
private MapFile.Reader loadReader(IntWritable key) throws IOException {
    int partitionIndex = partitioner.getPartitionIndex(key.get());
    if (partitionIndex == partitioner.getLastPartitionIndex())
        noMorePartitions = true;
    log.info("Partition index is " + partitionIndex + " key was: " + key.get());
    lastReader = readers.getReader(partitionIndex);
    return lastReader;
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

/**
 * Get the value associated with the key
 * @param key/*from  w  w  w .j  av  a 2  s.  c  o  m*/
 * @param val the object that will be filled with the retrieved value 
 * @return the retrieved value
 * @throws IOException
 */
public VectorWritable get(IntWritable key, VectorWritable val) throws IOException {
    if (lastReader == null && noMorePartitions)
        return null;
    if (lastReader == null) {
        loadReader(key);
        nextKey.set(key.get());
        boolean eof = lastReader.getClosest(nextKey, nextValue, true) == null;
        if (eof) {
            lastReader = null;
            return null;
        }
    }
    boolean eof = false;
    //skip over keys until find the one that the user is asking for. This should rarely 
    //occur as the user normally asks for sequential keys
    while (!eof && nextKey.compareTo(key) < 0)
        eof = !lastReader.next(nextKey, nextValue);
    //If the requested key is not in the current MapFile, reset the process and 
    //search in the next MapFile using recursive call
    if (eof) {
        lastReader = null;
        return get(key, val);
    }
    if (nextKey.equals(key)) {
        val.set(nextValue.get());
        //update nextKey and nextValue for the next call
        eof = !lastReader.next(nextKey, nextValue);
        if (eof)
            lastReader = null;
        return val;
    }
    return null;
}

From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java

License:Apache License

private static void printSequenceFile(String inputStr, int printRow) throws IOException {
    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr);
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    Vector printVector = null;//from   w w w  . j ava 2  s.co  m
    while (reader.next(key, value)) {
        if (key.get() == printRow)
            printVector = value.get();
        int cnt = 0;
        Iterator<Element> iter = value.get().nonZeroes().iterator();
        for (; iter.hasNext(); iter.next())
            cnt++;
        sum += value.get().zSum();
        System.out.println("# " + key + " " + cnt + " " + value.get().zSum());
    }
    System.out.println("SUM " + sum);
    reader.close();
    if (printVector != null)
        System.out.println("##### " + printRow + " " + printVector);
    else
        System.out.println("##### " + key + " " + value.get());
}

From source file:com.twitter.algebra.nmf.NMFCommon.java

License:Apache License

public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException {
    HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>();

    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr + "/part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    LongWritable key = new LongWritable();
    IntWritable value = new IntWritable();
    while (reader.next(key, value)) {
        hashMap.put(key.get(), value.get());
    }/*from   w  ww.  j  a va2 s . c  om*/
    System.out.println("SUM " + sum);
    reader.close();
    return hashMap;
}

From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java

License:Apache License

@Test
public void testReaderWithNestedAndComplexSchema() throws Exception {
    // initial commit
    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
    HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(),
            HoodieTableType.MERGE_ON_READ);
    String commitTime = "100";
    int numberOfRecords = 100;
    int numberOfLogRecords = numberOfRecords / 2;
    File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords,
            commitTime);/*from www  . j  ava 2s  .  c  om*/
    InputFormatTestUtil.commit(basePath, commitTime);
    // Add the paths
    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());

    // update files or generate new log file
    String newCommitTime = "101";
    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime,
            numberOfLogRecords);
    long size = writer.getCurrentSize();
    writer.close();
    assertTrue("block - size should be > 0", size > 0);
    InputFormatTestUtil.deltaCommit(basePath, newCommitTime);

    //create a split with baseFile (parquet file written earlier) and new log file(s)
    String logFilePath = writer.getLogFile().getPath().toString();
    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
            new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf),
            basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);

    //create a RecordReader to be used by HoodieRealtimeRecordReader
    RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
            new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null);
    JobConf jobConf = new JobConf();
    List<Schema.Field> fields = schema.getFields();

    String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(","));
    String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
    jobConf.set("partition_columns", "datestr");

    // validate record reader compaction
    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);

    // use reader to read base Parquet File and log file, merge in flight and return latest commit
    // here the first 50 records should be updated, see above
    NullWritable key = recordReader.createKey();
    ArrayWritable value = recordReader.createValue();
    int numRecordsRead = 0;
    while (recordReader.next(key, value)) {
        int currentRecordNo = numRecordsRead;
        ++numRecordsRead;
        Writable[] values = value.get();
        String recordCommitTime;
        //check if the record written is with latest commit, here "101"
        if (numRecordsRead > numberOfLogRecords) {
            recordCommitTime = commitTime;
        } else {
            recordCommitTime = newCommitTime;
        }
        String recordCommitTimeSuffix = "@" + recordCommitTime;

        Assert.assertEquals(values[0].toString(), recordCommitTime);
        key = recordReader.createKey();
        value = recordReader.createValue();

        // Assert type STRING
        Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo);
        Assert.assertEquals("test value for field: field2", values[6].toString(),
                "field" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo);

        // Assert type INT
        IntWritable intWritable = (IntWritable) values[8];
        Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type LONG
        LongWritable longWritable = (LongWritable) values[9];
        Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(),
                currentRecordNo + recordCommitTime.hashCode());

        // Assert type FLOAT
        FloatWritable floatWritable = (FloatWritable) values[10];
        Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(),
                (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0);

        // Assert type DOUBLE
        DoubleWritable doubleWritable = (DoubleWritable) values[11];
        Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(),
                (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0);

        // Assert type MAP
        ArrayWritable mapItem = (ArrayWritable) values[12];
        Writable mapItemValue1 = mapItem.get()[0];
        Writable mapItemValue2 = mapItem.get()[1];

        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(),
                "mapItem1");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(),
                "mapItem2");
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2);
        Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2);
        Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1];
        Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1];
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1",
                ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1",
                ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo);
        Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2",
                ((ArrayWritable) mapItemValue1value).get()[1].toString(),
                "item" + currentRecordNo + recordCommitTimeSuffix);
        Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2",
                ((ArrayWritable) mapItemValue2value).get()[1].toString(),
                "item2" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type RECORD
        ArrayWritable recordItem = (ArrayWritable) values[13];
        Writable[] nestedRecord = recordItem.get();
        Assert.assertEquals("test value for field: testNestedRecord.isAdmin",
                ((BooleanWritable) nestedRecord[0]).get(), false);
        Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(),
                "UserId" + currentRecordNo + recordCommitTimeSuffix);

        // Assert type ARRAY
        ArrayWritable arrayValue = (ArrayWritable) values[14];
        Writable[] arrayValues = arrayValue.get();
        for (int i = 0; i < arrayValues.length; i++) {
            Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix,
                    arrayValues[i].toString());
        }
    }
}

From source file:com.wipro.ats.bdre.dq.DQFileReportReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;//from ww w  .jav a2s. co m
    for (IntWritable value : values) {
        sum += value.get();
    }
    if (key.toString().equals(DQConstants.GOOD_RECORDS_FILE)) {
        goodRecords = sum;
    } else if (key.toString().equals(DQConstants.BAD_RECORDS_FILE)) {
        badRecords = sum;
    }

}

From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriter.java

License:Open Source License

@Override
public void write(IntWritable key, IndexRecordWriterValue value) throws IOException, InterruptedException {
    IndexWrapper index = indices.get(key.get());
    index.write(value);/*from ww w. j  a  va  2s. c  om*/
}