Example usage for org.apache.hadoop.io WritableUtils readStringArray

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils readStringArray.

Prototype

public static String[] readStringArray(DataInput in) throws IOException

Source Link

Usage

From source file:cascading.tap.hadoop.io.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*  w  w  w  .  jav  a2 s .c o m*/

    if (LOG.isDebugEnabled()) {
        LOG.debug("current split config diff:");
        for (Map.Entry<String, String> entry : config.entrySet())
            LOG.debug("key: {}, value: {}", entry.getKey(), entry.getValue());
    }

    JobConf currentConf = HadoopUtil.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set(CASCADING_SOURCE_PATH, path.toString());

            LOG.info("current split input path: {}", path);
        }
    }
}

From source file:cascading.tap.hadoop.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*  w  w w  .j a v a  2s . co m*/

    JobConf currentConf = MultiInputFormat.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set("cascading.source.path", path.toString());

            if (LOG.isInfoEnabled())
                LOG.info("current split input path: " + path.toString());
        }
    }
}

From source file:com.asakusafw.bridge.hadoop.directio.Util.java

License:Apache License

static DirectInputFragment readFragment(DataInput in) throws IOException {
    String path = WritableUtils.readString(in);
    long offset = WritableUtils.readVLong(in);
    long length = WritableUtils.readVLong(in);
    String[] locations = WritableUtils.readStringArray(in);
    Map<String, String> attributes = readMap(in);
    return new DirectInputFragment(path, offset, length, Arrays.asList(locations), attributes);
}

From source file:com.asakusafw.runtime.stage.input.StageInputDriver.java

License:Apache License

@SuppressWarnings("rawtypes")
private static List<StageInput> decode(Configuration conf, String encoded)
        throws IOException, ClassNotFoundException {
    assert conf != null;
    assert encoded != null;
    ByteArrayInputStream source = new ByteArrayInputStream(encoded.getBytes(ASCII));
    DataInputStream input = new DataInputStream(new GZIPInputStream(new Base64InputStream(source)));
    long version = WritableUtils.readVLong(input);
    if (version != SERIAL_VERSION) {
        throw new IOException(MessageFormat.format("Invalid StageInput version: framework={0}, saw={1}",
                SERIAL_VERSION, version));
    }/*from w  w w.  j  a v  a  2s.  co  m*/
    String[] dictionary = WritableUtils.readStringArray(input);
    int inputListSize = WritableUtils.readVInt(input);
    List<StageInput> results = new ArrayList<>();
    for (int inputListIndex = 0; inputListIndex < inputListSize; inputListIndex++) {
        String pathString = readEncoded(input, dictionary);
        String formatName = readEncoded(input, dictionary);
        String mapperName = readEncoded(input, dictionary);
        int attributeCount = WritableUtils.readVInt(input);
        Map<String, String> attributes = new HashMap<>();
        for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++) {
            String keyString = readEncoded(input, dictionary);
            String valueString = readEncoded(input, dictionary);
            attributes.put(keyString, valueString);
        }
        Class<? extends InputFormat> formatClass = conf.getClassByName(formatName)
                .asSubclass(InputFormat.class);
        Class<? extends Mapper> mapperClass = conf.getClassByName(mapperName).asSubclass(Mapper.class);
        results.add(new StageInput(pathString, formatClass, mapperClass, attributes));
    }
    return results;
}

From source file:com.iflytek.spider.protocol.ProtocolStatus.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    byte version = in.readByte();
    switch (version) {
    case 1:/*w w w .  j a  v a  2s . c om*/
        code = in.readByte();
        lastModified = in.readLong();
        args = WritableUtils.readCompressedStringArray(in);
        break;
    case VERSION:
        code = in.readByte();
        lastModified = in.readLong();
        args = WritableUtils.readStringArray(in);
        break;
    default:
        throw new VersionMismatchException(VERSION, version);
    }
}

From source file:com.marklogic.mapreduce.ForestDocument.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    fragmentOrdinal = in.readLong();
    collections = WritableUtils.readStringArray(in);
}

From source file:com.netflix.aegisthus.input.AegSplit.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    end = in.readLong();// w w  w  .  j a v a2s. c o  m
    hosts = WritableUtils.readStringArray(in);
    path = new Path(WritableUtils.readString(in));
    compressed = in.readBoolean();
    if (compressed) {
        compressedPath = new Path(WritableUtils.readString(in));
    }
    start = in.readLong();
    type = WritableUtils.readEnum(in, Type.class);
    int size = in.readInt();
    if (type == Type.sstable) {
        convertors = Maps.newHashMap();
        for (int i = 0; i < size; i++) {
            String[] parts = WritableUtils.readStringArray(in);
            try {
                convertors.put(parts[0], TypeParser.parse(parts[1]));
            } catch (ConfigurationException e) {
                throw new IOException(e);
            } catch (SyntaxException e) {
                throw new IOException(e);
            }
        }
    }
}

From source file:com.netflix.aegisthus.input.splits.AegSplit.java

License:Apache License

@Override
public void readFields(@Nonnull DataInput in) throws IOException {
    end = in.readLong();//  www.j a v  a  2  s  . c o m
    hosts = WritableUtils.readStringArray(in);
    path = new Path(WritableUtils.readString(in));
    start = in.readLong();
}

From source file:org.apache.mahout.classifier.chi_rw.data.Dataset.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int nbAttributes = in.readInt();
    attributes = new Attribute[nbAttributes];
    for (int attr = 0; attr < nbAttributes; attr++) {
        String name = WritableUtils.readString(in);
        attributes[attr] = Attribute.valueOf(name);
    }/*  ww w  .  j a va2 s  . c  o  m*/

    ignored = Chi_RWUtils.readIntArray(in);

    // only CATEGORICAL attributes have values
    values = new String[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isCategorical()) {
            values[attr] = WritableUtils.readStringArray(in);
        }
    }

    // only NUMERICAL attributes have values
    nvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isNumerical()) {
            nvalues[attr] = Chi_RWUtils.readDoubleArray(in);
        }
    }

    minmaxvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        minmaxvalues[attr] = Chi_RWUtils.readDoubleArray(in);
    }

    labelId = in.readInt();
    nbInstances = in.readInt();
}

From source file:org.apache.mahout.classifier.chi_rwcs.data.Dataset.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int nbAttributes = in.readInt();
    attributes = new Attribute[nbAttributes];
    for (int attr = 0; attr < nbAttributes; attr++) {
        String name = WritableUtils.readString(in);
        attributes[attr] = Attribute.valueOf(name);
    }/*from   w  ww.j av a2  s  .c  om*/

    ignored = Chi_RWCSUtils.readIntArray(in);

    // only CATEGORICAL attributes have values
    values = new String[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isCategorical()) {
            values[attr] = WritableUtils.readStringArray(in);
        }
    }

    // only NUMERICAL attributes have values
    nvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isNumerical()) {
            nvalues[attr] = DFUtils.readDoubleArray(in);
        }
    }

    minmaxvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        minmaxvalues[attr] = DFUtils.readDoubleArray(in);
    }

    labelId = in.readInt();
    nbInstances = in.readInt();
}