Example usage for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength()

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.gotometrics.orderly.RowKeyUtils.java

License:Apache License

/** Converts a Text object to a byte array, copying only if
 * necessary.//from  w ww  . jav a2s.  com
 */
public static byte[] toBytes(Text t) {
    return toBytes(t.getBytes(), 0, t.getLength());
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * /*from  w w w.  j  a  v a2s  .  co m*/
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Creates an lzo file with random data.
 * /*from  w  w  w .  ja  v  a  2  s.  c  o m*/
 * @param outputDir Output directory.
 * @param fs File system we're using.
 * @param attemptContext Task attempt context, contains task id etc. 
 * @throws IOException
 * @throws InterruptedException
 */
private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext,
        int charsToOutput) throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5.reset();

    try {
        rw = output.getRecordWriter(attemptContext);

        char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

        Random r = new Random(System.currentTimeMillis());
        Text key = new Text();
        Text value = new Text();
        int charsMax = chars.length - 1;
        for (int i = 0; i < charsToOutput;) {
            i += fillText(chars, r, charsMax, key);
            i += fillText(chars, r, charsMax, value);
            rw.write(key, value);
            md5.update(key.getBytes(), 0, key.getLength());
            // text output format writes tab between the key and value
            md5.update("\t".getBytes("UTF-8"));
            md5.update(value.getBytes(), 0, value.getLength());
        }
    } finally {
        if (rw != null) {
            rw.close(attemptContext);
            OutputCommitter committer = output.getOutputCommitter(attemptContext);
            committer.commitTask(attemptContext);
            committer.cleanupJob(attemptContext);
        }
    }

    byte[] result = md5.digest();
    md5.reset();
    return result;
}

From source file:com.ibm.jaql.io.hadoop.converter.FromDelConverter.java

License:Apache License

/** Converts the given line into a JSON value. */
@Override//from w  w w.  j  av a2s. c o m
public JsonValue convert(LongWritable key, Text value, JsonValue target) {
    return convert(key.get(), value.getBytes(), value.getLength(), target);
}

From source file:com.ibm.jaql.io.hadoop.converter.TextToJsonString.java

License:Apache License

@Override
public JsonValue convert(Text src, JsonValue target) {
    MutableJsonString str;//from  w  ww  .  ja v  a2  s .  co  m
    if (target instanceof MutableJsonString) {
        str = (MutableJsonString) target;
    } else {
        str = new MutableJsonString();
    }
    str.set(src.getBytes(), src.getLength());
    return str;
}

From source file:com.ibm.spss.hive.serde2.xml.XmlSerDe.java

License:Open Source License

/**
 * @see org.apache.hadoop.hive.serde2.Deserializer#deserialize(org.apache.hadoop.io.Writable)
 *//*from w w w  . j a  v  a2s .  c  o m*/
@Override
public Object deserialize(Writable writable) throws SerDeException {
    Text text = (Text) writable;
    if (text == null || text.getLength() == 0) {
        return (Object) null;
    }
    try {
        return this.xmlProcessor.parse(text.toString());
    } catch (Exception e) {
        throw new SerDeException(e);
    }
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.DatabusRecordReader.java

License:Apache License

@Override
public Message getCurrentValue() throws IOException, InterruptedException {
    Text text = lineReader.getCurrentValue();
    // get the byte array corresponding to the value read
    int length = text.getLength();
    byte[] msg = new byte[length];
    System.arraycopy(text.getBytes(), 0, msg, 0, length);
    return DatabusUtil.decodeMessage(msg);
}

From source file:com.kasabi.labs.freebase.mr.Freebase2RDFReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    for (Text value : values) {
        log.debug("< ({}, {})", key, value);
        k.clear();//from w  w  w . j  a  va2  s.c om
        byte[] kb = key.getBytes();
        k.append(kb, 0, key.getLength());
        byte[] vb = value.toString().getBytes();
        k.append(vb, 0, vb.length);
        context.write(k, nullWritable);
        log.debug("> ({}, {})", k, nullWritable);
    }
}

From source file:com.kylinolap.cube.common.BytesSplitter.java

License:Apache License

public int detectDelim(Text value, int expectedParts) {
    for (int i = 0; i < COMMON_DELIMS.length; i++) {
        int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]);
        if (nParts == expectedParts)
            return COMMON_DELIMS[i];
    }//ww w .j av  a2  s .c  om
    throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect "
            + expectedParts + " columns");
}

From source file:com.kylinolap.cube.measure.MeasureCodec.java

License:Apache License

public void decode(Text bytes, Object[] result) {
    decode(ByteBuffer.wrap(bytes.getBytes(), 0, bytes.getLength()), result);
}