Example usage for org.apache.hadoop.io Text readString

List of usage examples for org.apache.hadoop.io Text readString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text readString.

Prototype

public static String readString(DataInput in) throws IOException 

Source Link

Document

Read a UTF8 encoded string from in

Usage

From source file:org.apache.tez.mapreduce.input.SimpleInput.java

License:Apache License

public org.apache.hadoop.mapred.InputSplit getOldSplitDetails(TaskSplitIndex splitMetaInfo) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);//from w ww . j  a v a2s. com
    LOG.info("Reading input split file from : " + file);
    long offset = splitMetaInfo.getStartOffset();

    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapred.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.input.SimpleInput.java

License:Apache License

public org.apache.hadoop.mapreduce.InputSplit getNewSplitDetails(TaskSplitIndex splitMetaInfo)
        throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    long offset = splitMetaInfo.getStartOffset();

    // Split information read from local filesystem.
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);//from   w  w w .j  a  v  a2 s.  c  o  m
    LOG.info("Reading input split file from : " + file);
    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapreduce.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    reporter.getCounter(TaskCounter.SPLIT_RAW_BYTES).increment(pos - offset);
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo,
        JobConf jobConf, TezCounter splitBytesCounter) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    long offset = splitMetaInfo.getStartOffset();

    // Split information read from local filesystem.
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);// w  w w  . j av  a 2 s  .c  om
    LOG.info("Reading input split file from : " + file);
    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapreduce.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapreduce.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapreduce.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapreduce.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    if (splitBytesCounter != null) {
        splitBytesCounter.increment(pos - offset);
    }
    inFile.close();
    return split;
}

From source file:org.apache.tez.mapreduce.lib.MRInputUtils.java

License:Apache License

@SuppressWarnings("unchecked")
public static InputSplit getOldSplitDetailsFromDisk(TaskSplitIndex splitMetaInfo, JobConf jobConf,
        TezCounter splitBytesCounter) throws IOException {
    Path file = new Path(splitMetaInfo.getSplitLocation());
    FileSystem fs = FileSystem.getLocal(jobConf);
    file = fs.makeQualified(file);//from  ww  w.  j  a  va2s .  c om
    LOG.info("Reading input split file from : " + file);
    long offset = splitMetaInfo.getStartOffset();

    FSDataInputStream inFile = fs.open(file);
    inFile.seek(offset);
    String className = Text.readString(inFile);
    Class<org.apache.hadoop.mapred.InputSplit> cls;
    try {
        cls = (Class<org.apache.hadoop.mapred.InputSplit>) jobConf.getClassByName(className);
    } catch (ClassNotFoundException ce) {
        IOException wrap = new IOException("Split class " + className + " not found");
        wrap.initCause(ce);
        throw wrap;
    }
    SerializationFactory factory = new SerializationFactory(jobConf);
    Deserializer<org.apache.hadoop.mapred.InputSplit> deserializer = (Deserializer<org.apache.hadoop.mapred.InputSplit>) factory
            .getDeserializer(cls);
    deserializer.open(inFile);
    org.apache.hadoop.mapred.InputSplit split = deserializer.deserialize(null);
    long pos = inFile.getPos();
    if (splitBytesCounter != null) {
        splitBytesCounter.increment(pos - offset);
    }
    inFile.close();
    return split;
}

From source file:org.apache.tez.runtime.api.impl.GroupInputSpec.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    groupName = StringInterner.weakIntern(Text.readString(in));
    int numMembers = in.readInt();
    groupVertices = Lists.newArrayListWithCapacity(numMembers);
    for (int i = 0; i < numMembers; ++i) {
        groupVertices.add(StringInterner.weakIntern(Text.readString(in)));
    }/*ww  w .  j  ava  2  s .c  om*/
    mergedInputDescriptor = new InputDescriptor();
    mergedInputDescriptor.readFields(in);
}

From source file:org.apache.tez.runtime.api.impl.TaskStatistics.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int numEntries = in.readInt();
    for (int i = 0; i < numEntries; ++i) {
        String edgeName = Text.readString(in);
        IOStatistics edgeStats = new IOStatistics();
        edgeStats.readFields(in);//www . j a  va 2 s . co m
        addIO(edgeName, edgeStats);
    }
}

From source file:org.apache.tez.runtime.api.impl.TezHeartbeatRequest.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    if (in.readBoolean()) {
        int eventsCount = in.readInt();
        events = new ArrayList<TezEvent>(eventsCount);
        for (int i = 0; i < eventsCount; ++i) {
            TezEvent e = new TezEvent();
            e.readFields(in);//from   www . j a  v a2  s  .  com
            events.add(e);
        }
    }
    if (in.readBoolean()) {
        currentTaskAttemptID = TezTaskAttemptID.readTezTaskAttemptID(in);
    } else {
        currentTaskAttemptID = null;
    }
    startIndex = in.readInt();
    maxEvents = in.readInt();
    requestId = in.readLong();
    containerIdentifier = Text.readString(in);
}

From source file:org.broadinstitute.sting.gatk.hadoop.XFileSplit.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    file = new Path(Text.readString(in));
    start = in.readLong();/*from w  w w.ja v a  2 s  . c  o  m*/
    length = in.readLong();
    fileId = in.readLong();
    splitId = in.readLong();
    hosts = null;
}

From source file:org.commoncrawl.hadoop.io.ARCResource.java

License:Open Source License

/**
 * @inheritDoc
 */
public void readFields(DataInput in) throws IOException {
    name = Text.readString(in);
    size = in.readLong();
}

From source file:org.commoncrawl.hadoop.io.ARCSplit.java

License:Open Source License

/**
 * @inheritDoc/*  www.  j  a  v  a2s . c  o m*/
 */
public void readFields(DataInput in) throws IOException {
    int nResources = in.readInt();
    resources = new ARCResource[nResources];
    for (int i = 0; i < nResources; i++) {
        resources[i] = new ARCResource(Text.readString(in), in.readLong());
    }
    size = in.readLong();
    hosts = null;
}