Example usage for org.apache.hadoop.io WritableUtils readStringArray

List of usage examples for org.apache.hadoop.io WritableUtils readStringArray

Introduction

In this page you can find the example usage for org.apache.hadoop.io WritableUtils readStringArray.

Prototype

public static String[] readStringArray(DataInput in) throws IOException 

Source Link

Usage

From source file:cascading.tap.hadoop.io.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*  w  w  w  .  jav  a2 s .c o m*/

    if (LOG.isDebugEnabled()) {
        LOG.debug("current split config diff:");
        for (Map.Entry<String, String> entry : config.entrySet())
            LOG.debug("key: {}, value: {}", entry.getKey(), entry.getValue());
    }

    JobConf currentConf = HadoopUtil.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set(CASCADING_SOURCE_PATH, path.toString());

            LOG.info("current split input path: {}", path);
        }
    }
}

From source file:cascading.tap.hadoop.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*  w  w w  .j a v a  2s . co m*/

    JobConf currentConf = MultiInputFormat.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set("cascading.source.path", path.toString());

            if (LOG.isInfoEnabled())
                LOG.info("current split input path: " + path.toString());
        }
    }
}

From source file:com.asakusafw.bridge.hadoop.directio.Util.java

License:Apache License

static DirectInputFragment readFragment(DataInput in) throws IOException {
    String path = WritableUtils.readString(in);
    long offset = WritableUtils.readVLong(in);
    long length = WritableUtils.readVLong(in);
    String[] locations = WritableUtils.readStringArray(in);
    Map<String, String> attributes = readMap(in);
    return new DirectInputFragment(path, offset, length, Arrays.asList(locations), attributes);
}

From source file:com.asakusafw.runtime.stage.input.StageInputDriver.java

License:Apache License

@SuppressWarnings("rawtypes")
private static List<StageInput> decode(Configuration conf, String encoded)
        throws IOException, ClassNotFoundException {
    assert conf != null;
    assert encoded != null;
    ByteArrayInputStream source = new ByteArrayInputStream(encoded.getBytes(ASCII));
    DataInputStream input = new DataInputStream(new GZIPInputStream(new Base64InputStream(source)));
    long version = WritableUtils.readVLong(input);
    if (version != SERIAL_VERSION) {
        throw new IOException(MessageFormat.format("Invalid StageInput version: framework={0}, saw={1}",
                SERIAL_VERSION, version));
    }/*from w  w w.  j  a v  a  2s.  co  m*/
    String[] dictionary = WritableUtils.readStringArray(input);
    int inputListSize = WritableUtils.readVInt(input);
    List<StageInput> results = new ArrayList<>();
    for (int inputListIndex = 0; inputListIndex < inputListSize; inputListIndex++) {
        String pathString = readEncoded(input, dictionary);
        String formatName = readEncoded(input, dictionary);
        String mapperName = readEncoded(input, dictionary);
        int attributeCount = WritableUtils.readVInt(input);
        Map<String, String> attributes = new HashMap<>();
        for (int attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++) {
            String keyString = readEncoded(input, dictionary);
            String valueString = readEncoded(input, dictionary);
            attributes.put(keyString, valueString);
        }
        Class<? extends InputFormat> formatClass = conf.getClassByName(formatName)
                .asSubclass(InputFormat.class);
        Class<? extends Mapper> mapperClass = conf.getClassByName(mapperName).asSubclass(Mapper.class);
        results.add(new StageInput(pathString, formatClass, mapperClass, attributes));
    }
    return results;
}

From source file:com.iflytek.spider.protocol.ProtocolStatus.java

License:Apache License

public void readFields(DataInput in) throws IOException {
    byte version = in.readByte();
    switch (version) {
    case 1:/*w w w .  j a  v a  2s . c om*/
        code = in.readByte();
        lastModified = in.readLong();
        args = WritableUtils.readCompressedStringArray(in);
        break;
    case VERSION:
        code = in.readByte();
        lastModified = in.readLong();
        args = WritableUtils.readStringArray(in);
        break;
    default:
        throw new VersionMismatchException(VERSION, version);
    }
}

From source file:com.marklogic.mapreduce.ForestDocument.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    fragmentOrdinal = in.readLong();
    collections = WritableUtils.readStringArray(in);
}

From source file:com.netflix.aegisthus.input.AegSplit.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    end = in.readLong();// w w  w  .  j a v a2s. c o  m
    hosts = WritableUtils.readStringArray(in);
    path = new Path(WritableUtils.readString(in));
    compressed = in.readBoolean();
    if (compressed) {
        compressedPath = new Path(WritableUtils.readString(in));
    }
    start = in.readLong();
    type = WritableUtils.readEnum(in, Type.class);
    int size = in.readInt();
    if (type == Type.sstable) {
        convertors = Maps.newHashMap();
        for (int i = 0; i < size; i++) {
            String[] parts = WritableUtils.readStringArray(in);
            try {
                convertors.put(parts[0], TypeParser.parse(parts[1]));
            } catch (ConfigurationException e) {
                throw new IOException(e);
            } catch (SyntaxException e) {
                throw new IOException(e);
            }
        }
    }
}

From source file:com.netflix.aegisthus.input.splits.AegSplit.java

License:Apache License

@Override
public void readFields(@Nonnull DataInput in) throws IOException {
    end = in.readLong();//  www.j a v  a  2  s  . c o m
    hosts = WritableUtils.readStringArray(in);
    path = new Path(WritableUtils.readString(in));
    start = in.readLong();
}

From source file:org.apache.mahout.classifier.chi_rw.data.Dataset.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int nbAttributes = in.readInt();
    attributes = new Attribute[nbAttributes];
    for (int attr = 0; attr < nbAttributes; attr++) {
        String name = WritableUtils.readString(in);
        attributes[attr] = Attribute.valueOf(name);
    }/*  ww w  .  j a va2 s  . c  o  m*/

    ignored = Chi_RWUtils.readIntArray(in);

    // only CATEGORICAL attributes have values
    values = new String[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isCategorical()) {
            values[attr] = WritableUtils.readStringArray(in);
        }
    }

    // only NUMERICAL attributes have values
    nvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isNumerical()) {
            nvalues[attr] = Chi_RWUtils.readDoubleArray(in);
        }
    }

    minmaxvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        minmaxvalues[attr] = Chi_RWUtils.readDoubleArray(in);
    }

    labelId = in.readInt();
    nbInstances = in.readInt();
}

From source file:org.apache.mahout.classifier.chi_rwcs.data.Dataset.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    int nbAttributes = in.readInt();
    attributes = new Attribute[nbAttributes];
    for (int attr = 0; attr < nbAttributes; attr++) {
        String name = WritableUtils.readString(in);
        attributes[attr] = Attribute.valueOf(name);
    }/*from   w  ww.j av a2  s  .c  om*/

    ignored = Chi_RWCSUtils.readIntArray(in);

    // only CATEGORICAL attributes have values
    values = new String[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isCategorical()) {
            values[attr] = WritableUtils.readStringArray(in);
        }
    }

    // only NUMERICAL attributes have values
    nvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        if (attributes[attr].isNumerical()) {
            nvalues[attr] = DFUtils.readDoubleArray(in);
        }
    }

    minmaxvalues = new double[nbAttributes][];
    for (int attr = 0; attr < nbAttributes; attr++) {
        minmaxvalues[attr] = DFUtils.readDoubleArray(in);
    }

    labelId = in.readInt();
    nbInstances = in.readInt();
}