List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.gotometrics.orderly.RowKeyUtils.java
License:Apache License
/** Converts a Text object to a byte array, copying only if * necessary.//from w ww . jav a2s. com */ public static byte[] toBytes(Text t) { return toBytes(t.getBytes(), 0, t.getLength()); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * /*from w w w. j a v a2s . co m*/ * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Creates an lzo file with random data. * /*from w w w . ja v a 2 s. c o m*/ * @param outputDir Output directory. * @param fs File system we're using. * @param attemptContext Task attempt context, contains task id etc. * @throws IOException * @throws InterruptedException */ private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext, int charsToOutput) throws IOException, InterruptedException { TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>(); RecordWriter<Text, Text> rw = null; md5.reset(); try { rw = output.getRecordWriter(attemptContext); char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray(); Random r = new Random(System.currentTimeMillis()); Text key = new Text(); Text value = new Text(); int charsMax = chars.length - 1; for (int i = 0; i < charsToOutput;) { i += fillText(chars, r, charsMax, key); i += fillText(chars, r, charsMax, value); rw.write(key, value); md5.update(key.getBytes(), 0, key.getLength()); // text output format writes tab between the key and value md5.update("\t".getBytes("UTF-8")); md5.update(value.getBytes(), 0, value.getLength()); } } finally { if (rw != null) { rw.close(attemptContext); OutputCommitter committer = output.getOutputCommitter(attemptContext); committer.commitTask(attemptContext); committer.cleanupJob(attemptContext); } } byte[] result = md5.digest(); md5.reset(); return result; }
From source file:com.ibm.jaql.io.hadoop.converter.FromDelConverter.java
License:Apache License
/** Converts the given line into a JSON value. */ @Override//from w w w. j av a2s. c o m public JsonValue convert(LongWritable key, Text value, JsonValue target) { return convert(key.get(), value.getBytes(), value.getLength(), target); }
From source file:com.ibm.jaql.io.hadoop.converter.TextToJsonString.java
License:Apache License
@Override public JsonValue convert(Text src, JsonValue target) { MutableJsonString str;//from w ww . ja v a2 s . co m if (target instanceof MutableJsonString) { str = (MutableJsonString) target; } else { str = new MutableJsonString(); } str.set(src.getBytes(), src.getLength()); return str; }
From source file:com.ibm.spss.hive.serde2.xml.XmlSerDe.java
License:Open Source License
/** * @see org.apache.hadoop.hive.serde2.Deserializer#deserialize(org.apache.hadoop.io.Writable) *//*from w w w . j a v a2s . c o m*/ @Override public Object deserialize(Writable writable) throws SerDeException { Text text = (Text) writable; if (text == null || text.getLength() == 0) { return (Object) null; } try { return this.xmlProcessor.parse(text.toString()); } catch (Exception e) { throw new SerDeException(e); } }
From source file:com.inmobi.messaging.consumer.databus.mapreduce.DatabusRecordReader.java
License:Apache License
@Override public Message getCurrentValue() throws IOException, InterruptedException { Text text = lineReader.getCurrentValue(); // get the byte array corresponding to the value read int length = text.getLength(); byte[] msg = new byte[length]; System.arraycopy(text.getBytes(), 0, msg, 0, length); return DatabusUtil.decodeMessage(msg); }
From source file:com.kasabi.labs.freebase.mr.Freebase2RDFReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { log.debug("< ({}, {})", key, value); k.clear();//from w w w . j a va2 s.c om byte[] kb = key.getBytes(); k.append(kb, 0, key.getLength()); byte[] vb = value.toString().getBytes(); k.append(vb, 0, vb.length); context.write(k, nullWritable); log.debug("> ({}, {})", k, nullWritable); } }
From source file:com.kylinolap.cube.common.BytesSplitter.java
License:Apache License
public int detectDelim(Text value, int expectedParts) { for (int i = 0; i < COMMON_DELIMS.length; i++) { int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]); if (nParts == expectedParts) return COMMON_DELIMS[i]; }//ww w .j av a2 s .c om throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect " + expectedParts + " columns"); }
From source file:com.kylinolap.cube.measure.MeasureCodec.java
License:Apache License
public void decode(Text bytes, Object[] result) { decode(ByteBuffer.wrap(bytes.getBytes(), 0, bytes.getLength()), result); }