List of usage examples for org.apache.hadoop.io DataOutputBuffer reset
public DataOutputBuffer reset()
From source file:org.commoncrawl.util.JoinByTextSortByTagMapper.java
License:Open Source License
public static void makeCompositeKey(DataOutputBuffer compositeBuffer, TextBytes textKey, TextBytes tagValue, TextBytes textOut) throws IOException { compositeBuffer.reset(); compositeBuffer.write(textKey.getBytes(), 0, textKey.getLength()); compositeBuffer.write(tagDelimiter); compositeBuffer.write(tagValue.getBytes(), 0, tagValue.getLength()); textOut.set(compositeBuffer.getData(), 0, compositeBuffer.getLength()); }
From source file:org.commoncrawl.util.MultiFileMergeUtils.java
License:Open Source License
static void scanToItemThenDisplayNext(FileSystem fs, Path path, Configuration conf, URLFPV2 targetItem) throws IOException { DataOutputBuffer rawKey = new DataOutputBuffer(); DataInputBuffer keyDataStream = new DataInputBuffer(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); ValueBytes valueBytes = reader.createValueBytes(); int i = 0;/*from ww w . j a v a2 s . c o m*/ while (reader.nextRawKey(rawKey) != -1) { URLFPV2 keyObject = new URLFPV2(); keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength()); keyObject.readFields(keyDataStream); rawKey.reset(); reader.nextRawValue(valueBytes); if (keyObject.compareTo(targetItem) == 0) { reader.nextRawKey(rawKey); URLFPV2 nextKeyObject = new URLFPV2(); keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength()); nextKeyObject.readFields(keyDataStream); LOG.info("Target Domain:" + targetItem.getDomainHash() + " FP:" + targetItem.getUrlHash() + " NextDomain:" + nextKeyObject.getDomainHash() + " NextHash:" + nextKeyObject.getUrlHash()); break; } } reader.close(); }
From source file:org.commoncrawl.util.MultiFileMergeUtils.java
License:Open Source License
static void addFirstNFPItemsToSet(FileSystem fs, Path path, Configuration conf, Set<URLFPV2> outputSet, int nItems) throws IOException { DataOutputBuffer rawKey = new DataOutputBuffer(); DataInputBuffer keyDataStream = new DataInputBuffer(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); ValueBytes valueBytes = reader.createValueBytes(); int i = 0;// ww w .j av a2 s . c om while (reader.nextRawKey(rawKey) != -1) { URLFPV2 keyObject = new URLFPV2(); keyDataStream.reset(rawKey.getData(), 0, rawKey.getLength()); keyObject.readFields(keyDataStream); outputSet.add(keyObject); rawKey.reset(); reader.nextRawValue(valueBytes); if (++i == nItems) { break; } } reader.close(); }
From source file:org.goldenorb.io.InputSplitAllocator.java
License:Apache License
/** * This method gets the raw splits and calls another method to assign them. * //from ww w . j a v a2 s .co m * @returns Map */ @SuppressWarnings({ "deprecation", "rawtypes", "unchecked" }) public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() { List<RawSplit> rawSplits = null; JobConf job = new JobConf(orbConf); LOG.debug(orbConf.getJobNumber().toString()); JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0)); org.apache.hadoop.mapreduce.InputFormat<?, ?> input; try { input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext); rawSplits = new ArrayList<RawSplit>(splits.size()); DataOutputBuffer buffer = new DataOutputBuffer(); SerializationFactory factory = new SerializationFactory(orbConf); Serializer serializer = factory.getSerializer(splits.get(0).getClass()); serializer.open(buffer); for (int i = 0; i < splits.size(); i++) { buffer.reset(); serializer.serialize(splits.get(i)); RawSplit rawSplit = new RawSplit(); rawSplit.setClassName(splits.get(i).getClass().getName()); rawSplit.setDataLength(splits.get(i).getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(splits.get(i).getLocations()); rawSplits.add(rawSplit); } } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (InterruptedException e) { e.printStackTrace(); throw new RuntimeException(e); } return assignInputSplits(rawSplits); }