List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:com.talis.mapreduce.dicenc.FirstReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;/*from ww w .j av a2s . com*/ for (IntWritable value : values) { sum += value.get(); } if (sum > threshold) { v.set(counter++); context.write(key, v); } }
From source file:com.talis.mapreduce.wordcount.newapi.WordCountReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;// www.j a v a2 s. c o m for (IntWritable value : values) { sum += value.get(); } result.set(sum); context.write(key, result); }
From source file:com.tdunning.plume.local.lazy.MapRedSequenceFileTest.java
License:Apache License
@Test public void test() throws Exception { /*/*from w ww .j ava2 s .co m*/ * Create input which is SequenceFile<int,int> with data 1,2\n3,4 */ Configuration conf = new Configuration(); Path p = new Path(inputPath); FileSystem localFS = FileSystem.getLocal(conf); if (localFS.exists(p)) { localFS.delete(p, true); // wipe it if needed } SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class); writer.append(new IntWritable(1), new IntWritable(2)); writer.append(new IntWritable(3), new IntWritable(4)); writer.close(); String outputPath = "/tmp/output-plume-simpletest"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow OtherWorkflow workFlow = new OtherWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); /* * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5 */ p = new Path(outputPath + "/1_1/1-r-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf); IntWritable key = new IntWritable(1); IntWritable value = new IntWritable(1); reader.next(key, value); assertEquals(key.get(), 2); assertEquals(value.get(), 3); reader.next(key, value); assertEquals(key.get(), 4); assertEquals(value.get(), 5); reader.close(); }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
/** * Jump to the reader that contains the key * /*from w w w . j av a 2 s .co m*/ * @param key * @throws IOException */ private MapFile.Reader loadReader(IntWritable key) throws IOException { int partitionIndex = partitioner.getPartitionIndex(key.get()); if (partitionIndex == partitioner.getLastPartitionIndex()) noMorePartitions = true; log.info("Partition index is " + partitionIndex + " key was: " + key.get()); lastReader = readers.getReader(partitionIndex); return lastReader; }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
/** * Get the value associated with the key * @param key/*from w w w .j av a 2 s. c o m*/ * @param val the object that will be filled with the retrieved value * @return the retrieved value * @throws IOException */ public VectorWritable get(IntWritable key, VectorWritable val) throws IOException { if (lastReader == null && noMorePartitions) return null; if (lastReader == null) { loadReader(key); nextKey.set(key.get()); boolean eof = lastReader.getClosest(nextKey, nextValue, true) == null; if (eof) { lastReader = null; return null; } } boolean eof = false; //skip over keys until find the one that the user is asking for. This should rarely //occur as the user normally asks for sequential keys while (!eof && nextKey.compareTo(key) < 0) eof = !lastReader.next(nextKey, nextValue); //If the requested key is not in the current MapFile, reset the process and //search in the next MapFile using recursive call if (eof) { lastReader = null; return get(key, val); } if (nextKey.equals(key)) { val.set(nextValue.get()); //update nextKey and nextValue for the next call eof = !lastReader.next(nextKey, nextValue); if (eof) lastReader = null; return val; } return null; }
From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java
License:Apache License
private static void printSequenceFile(String inputStr, int printRow) throws IOException { Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr); SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); Vector printVector = null;//from w w w . j ava 2 s.co m while (reader.next(key, value)) { if (key.get() == printRow) printVector = value.get(); int cnt = 0; Iterator<Element> iter = value.get().nonZeroes().iterator(); for (; iter.hasNext(); iter.next()) cnt++; sum += value.get().zSum(); System.out.println("# " + key + " " + cnt + " " + value.get().zSum()); } System.out.println("SUM " + sum); reader.close(); if (printVector != null) System.out.println("##### " + printRow + " " + printVector); else System.out.println("##### " + key + " " + value.get()); }
From source file:com.twitter.algebra.nmf.NMFCommon.java
License:Apache License
public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException { HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>(); Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr + "/part-r-00000"); @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; LongWritable key = new LongWritable(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { hashMap.put(key.get(), value.get()); }/*from w ww. j a va2 s . c om*/ System.out.println("SUM " + sum); reader.close(); return hashMap; }
From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReaderTest.java
License:Apache License
@Test public void testReaderWithNestedAndComplexSchema() throws Exception { // initial commit Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema()); HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); String commitTime = "100"; int numberOfRecords = 100; int numberOfLogRecords = numberOfRecords / 2; File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords, commitTime);/*from www . j ava 2s . c om*/ InputFormatTestUtil.commit(basePath, commitTime); // Add the paths FileInputFormat.setInputPaths(jobConf, partitionDir.getPath()); // update files or generate new log file String newCommitTime = "101"; HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords); long size = writer.getCurrentSize(); writer.close(); assertTrue("block - size should be > 0", size > 0); InputFormatTestUtil.deltaCommit(basePath, newCommitTime); //create a split with baseFile (parquet file written earlier) and new log file(s) String logFilePath = writer.getLogFile().getPath().toString(); HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit( new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + commitTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime); //create a RecordReader to be used by HoodieRealtimeRecordReader RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader( new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null), jobConf, null); JobConf jobConf = new JobConf(); List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(",")); String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions); jobConf.set("partition_columns", "datestr"); // validate record reader compaction HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader); // use reader to read base Parquet File and log file, merge in flight and return latest commit // here the first 50 records should be updated, see above NullWritable key = recordReader.createKey(); ArrayWritable value = recordReader.createValue(); int numRecordsRead = 0; while (recordReader.next(key, value)) { int currentRecordNo = numRecordsRead; ++numRecordsRead; Writable[] values = value.get(); String recordCommitTime; //check if the record written is with latest commit, here "101" if (numRecordsRead > numberOfLogRecords) { recordCommitTime = commitTime; } else { recordCommitTime = newCommitTime; } String recordCommitTimeSuffix = "@" + recordCommitTime; Assert.assertEquals(values[0].toString(), recordCommitTime); key = recordReader.createKey(); value = recordReader.createValue(); // Assert type STRING Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo); Assert.assertEquals("test value for field: field2", values[6].toString(), "field" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo); // Assert type INT IntWritable intWritable = (IntWritable) values[8]; Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type LONG LongWritable longWritable = (LongWritable) values[9]; Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(), currentRecordNo + recordCommitTime.hashCode()); // Assert type FLOAT FloatWritable floatWritable = (FloatWritable) values[10]; Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(), (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0); // Assert type DOUBLE DoubleWritable doubleWritable = (DoubleWritable) values[11]; Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(), (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0); // Assert type MAP ArrayWritable mapItem = (ArrayWritable) values[12]; Writable mapItemValue1 = mapItem.get()[0]; Writable mapItemValue2 = mapItem.get()[1]; Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get()[0].toString(), "mapItem1"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get()[0].toString(), "mapItem2"); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue1).get().length, 2); Assert.assertEquals("test value for field: tags", ((ArrayWritable) mapItemValue2).get().length, 2); Writable mapItemValue1value = ((ArrayWritable) mapItemValue1).get()[1]; Writable mapItemValue2value = ((ArrayWritable) mapItemValue2).get()[1]; Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1", ((ArrayWritable) mapItemValue1value).get()[0].toString(), "item" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1", ((ArrayWritable) mapItemValue2value).get()[0].toString(), "item2" + currentRecordNo); Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2", ((ArrayWritable) mapItemValue1value).get()[1].toString(), "item" + currentRecordNo + recordCommitTimeSuffix); Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2", ((ArrayWritable) mapItemValue2value).get()[1].toString(), "item2" + currentRecordNo + recordCommitTimeSuffix); // Assert type RECORD ArrayWritable recordItem = (ArrayWritable) values[13]; Writable[] nestedRecord = recordItem.get(); Assert.assertEquals("test value for field: testNestedRecord.isAdmin", ((BooleanWritable) nestedRecord[0]).get(), false); Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(), "UserId" + currentRecordNo + recordCommitTimeSuffix); // Assert type ARRAY ArrayWritable arrayValue = (ArrayWritable) values[14]; Writable[] arrayValues = arrayValue.get(); for (int i = 0; i < arrayValues.length; i++) { Assert.assertEquals("test value for field: stringArray", "stringArray" + i + recordCommitTimeSuffix, arrayValues[i].toString()); } } }
From source file:com.wipro.ats.bdre.dq.DQFileReportReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;//from ww w .jav a2s. co m for (IntWritable value : values) { sum += value.get(); } if (key.toString().equals(DQConstants.GOOD_RECORDS_FILE)) { goodRecords = sum; } else if (key.toString().equals(DQConstants.BAD_RECORDS_FILE)) { badRecords = sum; } }
From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriter.java
License:Open Source License
@Override public void write(IntWritable key, IndexRecordWriterValue value) throws IOException, InterruptedException { IndexWrapper index = indices.get(key.get()); index.write(value);/*from ww w. j a va 2s. c om*/ }