Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:com.facebook.presto.hive.DwrfHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;//from   ww  w .  j  av a 2  s  .  c  om
    nulls[column] = false;

    OrcLazyObject lazyObject = getRawValue(column);
    if (lazyObject == null) {
        nulls[column] = true;
        return;
    }

    Object value = materializeValue(lazyObject);
    if (value == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST
            || type.getCategory() == Category.STRUCT) {
        slices[column] = Slices
                .wrappedBuffer(getJsonBytes(sessionTimeZone, lazyObject, fieldInspectors[column]));
    } else if (type.equals(HIVE_STRING)) {
        Text text = checkWritable(value, Text.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = checkWritable(value, BytesWritable.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.orc.DwrfHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;//from w ww.ja  v a  2s.co  m
    nulls[column] = false;

    OrcLazyObject lazyObject = getRawValue(column);
    if (lazyObject == null) {
        nulls[column] = true;
        return;
    }

    Object value = materializeValue(lazyObject);
    if (value == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.equals(HIVE_STRING)) {
        Text text = checkWritable(value, Text.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = checkWritable(value, BytesWritable.class);
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.orc.OrcHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;/* www. j  a  va 2 s .  c  o m*/
    nulls[column] = false;

    Object object = getFieldValue(row, hiveColumnIndexes[column]);
    if (object == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.equals(HIVE_STRING)) {
        Text text = Types.checkType(object, Text.class, "materialized string value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.facebook.presto.hive.OrcHiveRecordCursor.java

License:Apache License

private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;//from www .  j  a  va2s.  c  om
    nulls[column] = false;

    Object object = getFieldValue(row, hiveColumnIndexes[column]);
    if (object == null) {
        nulls[column] = true;
        return;
    }

    HiveType type = hiveTypes[column];
    if (type.getCategory() == Category.MAP || type.getCategory() == Category.LIST
            || type.getCategory() == Category.STRUCT) {
        slices[column] = Slices.wrappedBuffer(getJsonBytes(sessionTimeZone, object, fieldInspectors[column]));
    } else if (type.equals(HIVE_STRING)) {
        Text text = Types.checkType(object, Text.class, "materialized string value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(text.getBytes()), 0, text.getLength());
    } else if (type.equals(HIVE_BINARY)) {
        BytesWritable bytesWritable = Types.checkType(object, BytesWritable.class, "materialized binary value");
        slices[column] = Slices.copyOf(Slices.wrappedBuffer(bytesWritable.getBytes()), 0,
                bytesWritable.getLength());
    } else {
        throw new RuntimeException(String.format("%s is not a valid STRING type", type));
    }
}

From source file:com.foobar.store.FromJSONSeqConverter.java

License:Apache License

/**
 * @param w//  ww  w  . j a  va  2s.  c o  m
 * @param i
 */
private JsonValue convertWritableToItem(Writable w, JsonValue val) {
    if (w == null)
        return null;
    Text t = null;
    if (w instanceof Text) {
        t = (Text) w;
    } else {
        t = new Text(w.toString());
    }

    ByteArrayInputStream input = new ByteArrayInputStream(t.getBytes());
    JsonParser parser = new JsonParser(input);

    try {
        val = parser.JsonVal();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return val;
}

From source file:com.gotometrics.orderly.RowKeyUtils.java

License:Apache License

/** Converts a Text object to a byte array, copying only if
 * necessary.//from w w w.ja v a2 s  .  com
 */
public static byte[] toBytes(Text t) {
    return toBytes(t.getBytes(), 0, t.getLength());
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * /*from   w  w  w.  jav a 2  s .c o m*/
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Creates an lzo file with random data.
 * // w  ww .j  av a2s .  com
 * @param outputDir Output directory.
 * @param fs File system we're using.
 * @param attemptContext Task attempt context, contains task id etc. 
 * @throws IOException
 * @throws InterruptedException
 */
private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext,
        int charsToOutput) throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5.reset();

    try {
        rw = output.getRecordWriter(attemptContext);

        char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

        Random r = new Random(System.currentTimeMillis());
        Text key = new Text();
        Text value = new Text();
        int charsMax = chars.length - 1;
        for (int i = 0; i < charsToOutput;) {
            i += fillText(chars, r, charsMax, key);
            i += fillText(chars, r, charsMax, value);
            rw.write(key, value);
            md5.update(key.getBytes(), 0, key.getLength());
            // text output format writes tab between the key and value
            md5.update("\t".getBytes("UTF-8"));
            md5.update(value.getBytes(), 0, value.getLength());
        }
    } finally {
        if (rw != null) {
            rw.close(attemptContext);
            OutputCommitter committer = output.getOutputCommitter(attemptContext);
            committer.commitTask(attemptContext);
            committer.cleanupJob(attemptContext);
        }
    }

    byte[] result = md5.digest();
    md5.reset();
    return result;
}

From source file:com.ibm.jaql.io.hadoop.converter.FromDelConverter.java

License:Apache License

/** Converts the given line into a JSON value. */
@Override//w  ww  .j  a  v  a 2s  .c o  m
public JsonValue convert(LongWritable key, Text value, JsonValue target) {
    return convert(key.get(), value.getBytes(), value.getLength(), target);
}

From source file:com.ibm.jaql.io.hadoop.converter.TextToJsonString.java

License:Apache License

@Override
public JsonValue convert(Text src, JsonValue target) {
    MutableJsonString str;/*from   w  w w  .  j  av  a2  s  .c om*/
    if (target instanceof MutableJsonString) {
        str = (MutableJsonString) target;
    } else {
        str = new MutableJsonString();
    }
    str.set(src.getBytes(), src.getLength());
    return str;
}