List of usage examples for org.apache.hadoop.io WritableUtils readVInt
public static int readVInt(DataInput stream) throws IOException
From source file:org.apache.tez.common.counters.AbstractCounters.java
License:Apache License
@Override public synchronized void readFields(DataInput in) throws IOException { int version = WritableUtils.readVInt(in); if (version != groupFactory.version()) { throw new IOException( "Counters version mismatch, expected " + groupFactory.version() + " got " + version); }//from ww w . j a va2 s . c o m int numFGroups = WritableUtils.readVInt(in); fgroups.clear(); GroupType[] groupTypes = GroupType.values(); while (numFGroups-- > 0) { GroupType groupType = groupTypes[WritableUtils.readVInt(in)]; G group; switch (groupType) { case FILESYSTEM: // with nothing group = groupFactory.newFileSystemGroup(); break; case FRAMEWORK: // with group id group = groupFactory.newFrameworkGroup(WritableUtils.readVInt(in)); break; default: // Silence dumb compiler, as it would've thrown earlier throw new IOException("Unexpected counter group type: " + groupType); } group.readFields(in); fgroups.put(group.getName(), group); } int numGroups = WritableUtils.readVInt(in); while (numGroups-- > 0) { limits.checkGroups(groups.size() + 1); G group = groupFactory.newGenericGroup(Text.readString(in), null, limits); group.readFields(in); groups.put(group.getName(), group); } }
From source file:org.apache.tez.common.counters.FileSystemCounterGroup.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { int numSchemes = WritableUtils.readVInt(in); // #scheme FileSystemCounter[] enums = FileSystemCounter.values(); for (int i = 0; i < numSchemes; ++i) { String scheme = WritableUtils.readString(in); // scheme int numCounters = WritableUtils.readVInt(in); // #counter for (int j = 0; j < numCounters; ++j) { findCounter(scheme, enums[WritableUtils.readVInt(in)]) // key .setValue(WritableUtils.readVLong(in)); // value }/*from w ww . j av a 2 s . c om*/ } }
From source file:org.apache.tez.common.counters.FrameworkCounterGroup.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { clear();//from w w w . j a va 2 s .com int len = WritableUtils.readVInt(in); T[] enums = enumClass.getEnumConstants(); for (int i = 0; i < len; ++i) { int ord = WritableUtils.readVInt(in); TezCounter counter = newCounter(enums[ord]); counter.setValue(WritableUtils.readVLong(in)); counters[ord] = counter; } }
From source file:org.apache.tez.engine.common.shuffle.impl.ShuffleHeader.java
License:Apache License
public void readFields(DataInput in) throws IOException { mapId = WritableUtils.readStringSafely(in, MAX_ID_LENGTH); compressedLength = WritableUtils.readVLong(in); uncompressedLength = WritableUtils.readVLong(in); forReduce = WritableUtils.readVInt(in); }
From source file:org.apache.tez.engine.records.TezDependentTaskCompletionEvent.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { taskAttemptId.readFields(in);//from w w w .j av a2 s . co m // isMap = in.readBoolean(); status = WritableUtils.readEnum(in, Status.class); taskTrackerHttp = WritableUtils.readString(in); taskRunTime = WritableUtils.readVInt(in); eventId = WritableUtils.readVInt(in); }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.RuleWritable.java
License:Apache License
@Override public void readFields(final DataInput input) throws IOException { this.rule = Rule.values()[WritableUtils.readVInt(input)]; final int objectLength = WritableUtils.readVInt(input); final byte[] objectBytes = new byte[objectLength]; for (int i = 0; i < objectLength; i++) { objectBytes[i] = input.readByte(); }/* w ww. j av a 2 s. co m*/ try { this.object = Serializer.deserializeObject(objectBytes); } catch (final ClassNotFoundException e) { throw new IOException(e.getMessage(), e); } /*this.rule = Rule.values()[WritableUtils.readVInt(input)]; int objectLength = WritableUtils.readVInt(input); byte[] bytes = new byte[objectLength]; for (int i = 0; i < objectLength; i++) { bytes[i] = input.readByte(); } final Input in = new Input(new ByteArrayInputStream(bytes)); this.object = Constants.GRYO.readClassAndObject(in); in.close();*/ }
From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.util.Rule.java
License:Apache License
@Override public void readFields(final DataInput input) throws IOException { this.operation = Operation.values()[WritableUtils.readVInt(input)]; final int objectLength = WritableUtils.readVInt(input); final byte[] objectBytes = new byte[objectLength]; for (int i = 0; i < objectLength; i++) { objectBytes[i] = input.readByte(); }//from w w w. ja va 2s .c o m try { this.object = Serializer.deserializeObject(objectBytes); } catch (final ClassNotFoundException e) { throw new IOException(e.getMessage(), e); } }
From source file:org.clueweb.data.SelectiveTermStatistics.java
License:Apache License
/** * Creates a {@code CfTable} object./*w ww. j a v a 2 s .c o m*/ * * @param file collection frequency data file * @param fs FileSystem to read from * @throws IOException */ public SelectiveTermStatistics(Path file, FileSystem fs) throws IOException { Preconditions.checkNotNull(file); Preconditions.checkNotNull(fs); FSDataInputStream in = fs.open(new Path(file, BuildDictionary.CF_BY_ID_DATA)); this.numTerms = in.readInt(); cfs = new long[numTerms]; for (int i = 0; i < numTerms; i++) { long cf = WritableUtils.readVLong(in); cfs[i] = cf; collectionSize += cf; if (cf > maxCf) { maxCf = cf; maxCfTerm = i + 1; } } in.close(); in = fs.open(new Path(file, BuildDictionary.DF_BY_ID_DATA)); if (numTerms != in.readInt()) { throw new IOException("df data and cf data should have the same number of entries!"); } dfs = new int[numTerms]; for (int i = 0; i < numTerms; i++) { int df = WritableUtils.readVInt(in); dfs[i] = df; if (df > maxDf) { maxDf = df; maxDfTerm = i + 1; } } in.close(); }
From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkMergerJob.java
License:Open Source License
@Override public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<TextBytes, TextBytes> output, Reporter reporter) throws IOException { if (_skipPartition) return;//www . j a v a 2 s . c o m // collect all incoming paths first Vector<Path> incomingPaths = new Vector<Path>(); while (values.hasNext()) { String path = values.next().toString(); LOG.info("Found Incoming Path:" + path); incomingPaths.add(new Path(path)); } FlexBuffer scanArray[] = LinkKey.allocateScanArray(); // set up merge attributes Configuration localMergeConfig = new Configuration(_conf); localMergeConfig.setClass(MultiFileInputReader.MULTIFILE_COMPARATOR_CLASS, LinkKeyGroupingComparator.class, RawComparator.class); localMergeConfig.setClass(MultiFileInputReader.MULTIFILE_KEY_CLASS, TextBytes.class, WritableComparable.class); // ok now spawn merger MultiFileInputReader<TextBytes> multiFileInputReader = new MultiFileInputReader<TextBytes>(_fs, incomingPaths, localMergeConfig); TextBytes keyBytes = new TextBytes(); TextBytes valueBytes = new TextBytes(); DataInputBuffer inputBuffer = new DataInputBuffer(); int processedKeysCount = 0; Pair<KeyAndValueData<TextBytes>, Iterable<RawRecordValue>> nextItem = null; while ((nextItem = multiFileInputReader.getNextItemIterator()) != null) { summaryRecord = null; linkSummaryRecord = null; types.clear(); linkSources = null; outputKeyString = null; outputKeyFromInternalLink = false; outputKeyURLObj = null; int statusCount = 0; int linkCount = 0; // scan key components LinkKey.scanForComponents(nextItem.e0._keyObject, ':', scanArray); // pick up source fp from key ... URLFPV2 fpSource = new URLFPV2(); fpSource.setRootDomainHash(LinkKey.getLongComponentFromComponentArray(scanArray, LinkKey.ComponentId.ROOT_DOMAIN_HASH_COMPONENT_ID)); fpSource.setDomainHash(LinkKey.getLongComponentFromComponentArray(scanArray, LinkKey.ComponentId.DOMAIN_HASH_COMPONENT_ID)); fpSource.setUrlHash(LinkKey.getLongComponentFromComponentArray(scanArray, LinkKey.ComponentId.URL_HASH_COMPONENT_ID)); for (RawRecordValue rawValue : nextItem.e1) { inputBuffer.reset(rawValue.key.getData(), 0, rawValue.key.getLength()); int length = WritableUtils.readVInt(inputBuffer); keyBytes.set(rawValue.key.getData(), inputBuffer.getPosition(), length); inputBuffer.reset(rawValue.data.getData(), 0, rawValue.data.getLength()); length = WritableUtils.readVInt(inputBuffer); valueBytes.set(rawValue.data.getData(), inputBuffer.getPosition(), length); long linkType = LinkKey.getLongComponentFromKey(keyBytes, LinkKey.ComponentId.TYPE_COMPONENT_ID); if (linkType == LinkKey.Type.KEY_TYPE_CRAWL_STATUS.ordinal()) { statusCount++; try { JsonObject object = _parser.parse(valueBytes.toString()).getAsJsonObject(); if (object != null) { updateCrawlStatsFromJSONObject(object, fpSource, reporter); } } catch (Exception e) { LOG.error("Error Parsing JSON:" + valueBytes.toString()); throw new IOException(e); } } else { linkCount++; JsonObject object = _parser.parse(valueBytes.toString()).getAsJsonObject(); // ok this is a link ... updateLinkStatsFromLinkJSONObject(object, fpSource, reporter); } reporter.progress(); } if (statusCount > 1) { reporter.incrCounter(Counters.TWO_REDUNDANT_STATUS_IN_REDUCER, 1); } if (statusCount == 0 && linkCount != 0) { reporter.incrCounter(Counters.DISCOVERED_NEW_LINK, 1); } else { if (statusCount >= 1 && linkCount >= 1) { reporter.incrCounter(Counters.GOT_CRAWL_STATUS_WITH_LINK, 1); } else if (statusCount >= 1 && linkCount == 0) { reporter.incrCounter(Counters.GOT_CRAWL_STATUS_NO_LINK, 1); } } if (summaryRecord != null || linkSummaryRecord != null) { JsonObject compositeObject = new JsonObject(); if (summaryRecord != null) { compositeObject.add("crawl_status", summaryRecord); } if (linkSummaryRecord != null) { if (types != null && types.size() != 0) { stringCollectionToJsonArray(linkSummaryRecord, "typeAndRels", types); if (linkSources != null) { stringCollectionToJsonArray(linkSummaryRecord, "sources", linkSources.values()); } } compositeObject.add("link_status", linkSummaryRecord); } if (outputKeyString != null && outputKeyURLObj != null && outputKeyURLObj.isValid()) { if (outputKeyFromInternalLink) { reporter.incrCounter(Counters.OUTPUT_KEY_FROM_INTERNAL_LINK, 1); } else { reporter.incrCounter(Counters.OUTPUT_KEY_FROM_EXTERNAL_LINK, 1); } output.collect(new TextBytes(outputKeyString), new TextBytes(compositeObject.toString())); } else { reporter.incrCounter(Counters.FAILED_TO_GET_SOURCE_HREF, 1); } } } }
From source file:org.commoncrawl.mapred.pipelineV3.crawllistgen.GenBundlesStep.java
License:Open Source License
private static void rawValueToTextBytes(DataOutputBuffer dataBuffer, DataInputBuffer inputBuffer, TextBytes textOut) throws IOException { inputBuffer.reset(dataBuffer.getData(), dataBuffer.getLength()); int newLength = WritableUtils.readVInt(inputBuffer); textOut.set(inputBuffer.getData(), inputBuffer.getPosition(), newLength); }