Example usage for org.apache.hadoop.io WritableUtils readVInt

List of usage examples for org.apache.hadoop.io WritableUtils readVInt

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils readVInt.

Prototype

public static int readVInt(DataInput stream) throws IOException 

Source Link

Document

Reads a zero-compressed encoded integer from input stream and returns it.

Usage

From source file:org.apache.tez.common.counters.AbstractCounters.java

License:Apache License

@Override
public synchronized void readFields(DataInput in) throws IOException {
    int version = WritableUtils.readVInt(in);
    if (version != groupFactory.version()) {
        throw new IOException(
                "Counters version mismatch, expected " + groupFactory.version() + " got " + version);
    }//from   ww  w . j a  va2  s  .  c o  m
    int numFGroups = WritableUtils.readVInt(in);
    fgroups.clear();
    GroupType[] groupTypes = GroupType.values();
    while (numFGroups-- > 0) {
        GroupType groupType = groupTypes[WritableUtils.readVInt(in)];
        G group;
        switch (groupType) {
        case FILESYSTEM: // with nothing
            group = groupFactory.newFileSystemGroup();
            break;
        case FRAMEWORK: // with group id
            group = groupFactory.newFrameworkGroup(WritableUtils.readVInt(in));
            break;
        default: // Silence dumb compiler, as it would've thrown earlier
            throw new IOException("Unexpected counter group type: " + groupType);
        }
        group.readFields(in);
        fgroups.put(group.getName(), group);
    }
    int numGroups = WritableUtils.readVInt(in);
    while (numGroups-- > 0) {
        limits.checkGroups(groups.size() + 1);
        G group = groupFactory.newGenericGroup(Text.readString(in), null, limits);
        group.readFields(in);
        groups.put(group.getName(), group);
    }
}

From source file:org.apache.tez.common.counters.FileSystemCounterGroup.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int numSchemes = WritableUtils.readVInt(in); // #scheme
    FileSystemCounter[] enums = FileSystemCounter.values();
    for (int i = 0; i < numSchemes; ++i) {
        String scheme = WritableUtils.readString(in); // scheme
        int numCounters = WritableUtils.readVInt(in); // #counter
        for (int j = 0; j < numCounters; ++j) {
            findCounter(scheme, enums[WritableUtils.readVInt(in)]) // key
                    .setValue(WritableUtils.readVLong(in)); // value
        }/*from w ww  .  j av  a  2 s .  c om*/
    }
}

From source file:org.apache.tez.common.counters.FrameworkCounterGroup.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    clear();//from  w w w . j  a va  2  s .com
    int len = WritableUtils.readVInt(in);
    T[] enums = enumClass.getEnumConstants();
    for (int i = 0; i < len; ++i) {
        int ord = WritableUtils.readVInt(in);
        TezCounter counter = newCounter(enums[ord]);
        counter.setValue(WritableUtils.readVLong(in));
        counters[ord] = counter;
    }
}

From source file:org.apache.tez.engine.common.shuffle.impl.ShuffleHeader.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    mapId = WritableUtils.readStringSafely(in, MAX_ID_LENGTH);
    compressedLength = WritableUtils.readVLong(in);
    uncompressedLength = WritableUtils.readVLong(in);
    forReduce = WritableUtils.readVInt(in);
}

From source file:org.apache.tez.engine.records.TezDependentTaskCompletionEvent.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    taskAttemptId.readFields(in);//from w w  w .j  av  a2 s  .  co  m
    //    isMap = in.readBoolean();
    status = WritableUtils.readEnum(in, Status.class);
    taskTrackerHttp = WritableUtils.readString(in);
    taskRunTime = WritableUtils.readVInt(in);
    eventId = WritableUtils.readVInt(in);

}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.giraph.RuleWritable.java

License:Apache License

@Override
public void readFields(final DataInput input) throws IOException {
    this.rule = Rule.values()[WritableUtils.readVInt(input)];
    final int objectLength = WritableUtils.readVInt(input);
    final byte[] objectBytes = new byte[objectLength];
    for (int i = 0; i < objectLength; i++) {
        objectBytes[i] = input.readByte();
    }/* w  ww.  j av a  2  s.  co m*/
    try {
        this.object = Serializer.deserializeObject(objectBytes);
    } catch (final ClassNotFoundException e) {
        throw new IOException(e.getMessage(), e);
    }

    /*this.rule = Rule.values()[WritableUtils.readVInt(input)];
    int objectLength = WritableUtils.readVInt(input);
    byte[] bytes = new byte[objectLength];
    for (int i = 0; i < objectLength; i++) {
    bytes[i] = input.readByte();
    }
    final Input in = new Input(new ByteArrayInputStream(bytes));
    this.object = Constants.GRYO.readClassAndObject(in);
    in.close();*/
}

From source file:org.apache.tinkerpop.gremlin.hadoop.process.computer.util.Rule.java

License:Apache License

@Override
public void readFields(final DataInput input) throws IOException {
    this.operation = Operation.values()[WritableUtils.readVInt(input)];
    final int objectLength = WritableUtils.readVInt(input);
    final byte[] objectBytes = new byte[objectLength];
    for (int i = 0; i < objectLength; i++) {
        objectBytes[i] = input.readByte();
    }//from   w  w  w. ja  va  2s  .c  o m
    try {
        this.object = Serializer.deserializeObject(objectBytes);
    } catch (final ClassNotFoundException e) {
        throw new IOException(e.getMessage(), e);
    }
}

From source file:org.clueweb.data.SelectiveTermStatistics.java

License:Apache License

/**
 * Creates a {@code CfTable} object./*w  ww. j  a v a 2 s .c  o  m*/
 * 
 * @param file collection frequency data file
 * @param fs FileSystem to read from
 * @throws IOException
 */
public SelectiveTermStatistics(Path file, FileSystem fs) throws IOException {
    Preconditions.checkNotNull(file);
    Preconditions.checkNotNull(fs);

    FSDataInputStream in = fs.open(new Path(file, BuildDictionary.CF_BY_ID_DATA));
    this.numTerms = in.readInt();

    cfs = new long[numTerms];

    for (int i = 0; i < numTerms; i++) {
        long cf = WritableUtils.readVLong(in);

        cfs[i] = cf;
        collectionSize += cf;

        if (cf > maxCf) {
            maxCf = cf;
            maxCfTerm = i + 1;
        }
    }

    in.close();

    in = fs.open(new Path(file, BuildDictionary.DF_BY_ID_DATA));
    if (numTerms != in.readInt()) {
        throw new IOException("df data and cf data should have the same number of entries!");
    }

    dfs = new int[numTerms];

    for (int i = 0; i < numTerms; i++) {
        int df = WritableUtils.readVInt(in);

        dfs[i] = df;

        if (df > maxDf) {
            maxDf = df;
            maxDfTerm = i + 1;
        }
    }

    in.close();
}

From source file:org.commoncrawl.mapred.ec2.postprocess.linkCollector.LinkMergerJob.java

License:Open Source License

@Override
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<TextBytes, TextBytes> output,
        Reporter reporter) throws IOException {

    if (_skipPartition)
        return;//www .  j a  v  a  2  s . c  o  m
    // collect all incoming paths first
    Vector<Path> incomingPaths = new Vector<Path>();

    while (values.hasNext()) {
        String path = values.next().toString();
        LOG.info("Found Incoming Path:" + path);
        incomingPaths.add(new Path(path));
    }

    FlexBuffer scanArray[] = LinkKey.allocateScanArray();

    // set up merge attributes
    Configuration localMergeConfig = new Configuration(_conf);

    localMergeConfig.setClass(MultiFileInputReader.MULTIFILE_COMPARATOR_CLASS, LinkKeyGroupingComparator.class,
            RawComparator.class);
    localMergeConfig.setClass(MultiFileInputReader.MULTIFILE_KEY_CLASS, TextBytes.class,
            WritableComparable.class);

    // ok now spawn merger
    MultiFileInputReader<TextBytes> multiFileInputReader = new MultiFileInputReader<TextBytes>(_fs,
            incomingPaths, localMergeConfig);

    TextBytes keyBytes = new TextBytes();
    TextBytes valueBytes = new TextBytes();
    DataInputBuffer inputBuffer = new DataInputBuffer();

    int processedKeysCount = 0;

    Pair<KeyAndValueData<TextBytes>, Iterable<RawRecordValue>> nextItem = null;
    while ((nextItem = multiFileInputReader.getNextItemIterator()) != null) {

        summaryRecord = null;
        linkSummaryRecord = null;
        types.clear();
        linkSources = null;
        outputKeyString = null;
        outputKeyFromInternalLink = false;
        outputKeyURLObj = null;

        int statusCount = 0;
        int linkCount = 0;

        // scan key components 
        LinkKey.scanForComponents(nextItem.e0._keyObject, ':', scanArray);

        // pick up source fp from key ... 
        URLFPV2 fpSource = new URLFPV2();

        fpSource.setRootDomainHash(LinkKey.getLongComponentFromComponentArray(scanArray,
                LinkKey.ComponentId.ROOT_DOMAIN_HASH_COMPONENT_ID));
        fpSource.setDomainHash(LinkKey.getLongComponentFromComponentArray(scanArray,
                LinkKey.ComponentId.DOMAIN_HASH_COMPONENT_ID));
        fpSource.setUrlHash(LinkKey.getLongComponentFromComponentArray(scanArray,
                LinkKey.ComponentId.URL_HASH_COMPONENT_ID));

        for (RawRecordValue rawValue : nextItem.e1) {

            inputBuffer.reset(rawValue.key.getData(), 0, rawValue.key.getLength());
            int length = WritableUtils.readVInt(inputBuffer);
            keyBytes.set(rawValue.key.getData(), inputBuffer.getPosition(), length);
            inputBuffer.reset(rawValue.data.getData(), 0, rawValue.data.getLength());
            length = WritableUtils.readVInt(inputBuffer);
            valueBytes.set(rawValue.data.getData(), inputBuffer.getPosition(), length);

            long linkType = LinkKey.getLongComponentFromKey(keyBytes, LinkKey.ComponentId.TYPE_COMPONENT_ID);

            if (linkType == LinkKey.Type.KEY_TYPE_CRAWL_STATUS.ordinal()) {
                statusCount++;

                try {
                    JsonObject object = _parser.parse(valueBytes.toString()).getAsJsonObject();
                    if (object != null) {
                        updateCrawlStatsFromJSONObject(object, fpSource, reporter);
                    }
                } catch (Exception e) {
                    LOG.error("Error Parsing JSON:" + valueBytes.toString());
                    throw new IOException(e);
                }
            } else {
                linkCount++;
                JsonObject object = _parser.parse(valueBytes.toString()).getAsJsonObject();
                // ok this is a link ... 
                updateLinkStatsFromLinkJSONObject(object, fpSource, reporter);
            }

            reporter.progress();
        }

        if (statusCount > 1) {
            reporter.incrCounter(Counters.TWO_REDUNDANT_STATUS_IN_REDUCER, 1);
        }

        if (statusCount == 0 && linkCount != 0) {
            reporter.incrCounter(Counters.DISCOVERED_NEW_LINK, 1);
        } else {
            if (statusCount >= 1 && linkCount >= 1) {
                reporter.incrCounter(Counters.GOT_CRAWL_STATUS_WITH_LINK, 1);
            } else if (statusCount >= 1 && linkCount == 0) {
                reporter.incrCounter(Counters.GOT_CRAWL_STATUS_NO_LINK, 1);
            }
        }

        if (summaryRecord != null || linkSummaryRecord != null) {
            JsonObject compositeObject = new JsonObject();
            if (summaryRecord != null) {
                compositeObject.add("crawl_status", summaryRecord);
            }
            if (linkSummaryRecord != null) {
                if (types != null && types.size() != 0) {
                    stringCollectionToJsonArray(linkSummaryRecord, "typeAndRels", types);
                    if (linkSources != null) {
                        stringCollectionToJsonArray(linkSummaryRecord, "sources", linkSources.values());
                    }
                }
                compositeObject.add("link_status", linkSummaryRecord);
            }

            if (outputKeyString != null && outputKeyURLObj != null && outputKeyURLObj.isValid()) {
                if (outputKeyFromInternalLink) {
                    reporter.incrCounter(Counters.OUTPUT_KEY_FROM_INTERNAL_LINK, 1);
                } else {
                    reporter.incrCounter(Counters.OUTPUT_KEY_FROM_EXTERNAL_LINK, 1);
                }
                output.collect(new TextBytes(outputKeyString), new TextBytes(compositeObject.toString()));
            } else {
                reporter.incrCounter(Counters.FAILED_TO_GET_SOURCE_HREF, 1);
            }
        }
    }
}

From source file:org.commoncrawl.mapred.pipelineV3.crawllistgen.GenBundlesStep.java

License:Open Source License

private static void rawValueToTextBytes(DataOutputBuffer dataBuffer, DataInputBuffer inputBuffer,
        TextBytes textOut) throws IOException {
    inputBuffer.reset(dataBuffer.getData(), dataBuffer.getLength());
    int newLength = WritableUtils.readVInt(inputBuffer);
    textOut.set(inputBuffer.getData(), inputBuffer.getPosition(), newLength);
}