List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void test() throws IOException { // vv TextTest Text t = new Text("hadoop"); assertThat(t.getLength(), is(6)); assertThat(t.getBytes().length, is(6)); assertThat(t.charAt(2), is((int) 'd')); assertThat("Out of bounds", t.charAt(100), is(-1)); // ^^ TextTest }
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void mutability() throws IOException { // vv TextTest-Mutability Text t = new Text("hadoop"); t.set("pig"); assertThat(t.getLength(), is(3)); assertThat(t.getBytes().length, is(3)); // ^^ TextTest-Mutability }
From source file:io.aos.hdfs.TextTest.java
License:Apache License
@Test public void byteArrayNotShortened() throws IOException { // vv TextTest-ByteArrayNotShortened Text t = new Text("hadoop"); t.set(/*[*/new Text("pig")/*]*/); assertThat(t.getLength(), is(3)); assertThat("Byte length not shortened", t.getBytes().length, /*[*/is(6)/*]*/); // ^^ TextTest-ByteArrayNotShortened }
From source file:io.dataapps.chlorine.hive.ScanUDF.java
License:Apache License
@Override public void process(Object[] arguments) throws HiveException { long matches = 0; for (int i = 0; i < arguments.length; i++) { if (arguments[i] != null && converters[i] != null) { Text value = (Text) converters[i].convert(arguments[i]); Map<String, List<String>> matchesByType = engine.findWithType(value.toString()); totalSize += value.getLength(); for (Map.Entry<String, List<String>> entry : matchesByType.entrySet()) { Collection<String> result = entry.getValue(); if (result.size() > 0) { matches += result.size(); forward(entry.getKey(), result.size(), i, StringUtils.join(result, ',')); }// ww w .j ava 2s . co m } } } totalRecords++; if (matches > 0) { totalMatches += matches; matchedRecords++; } }
From source file:io.fluo.core.util.ByteUtil.java
License:Apache License
/** * Convert from Hadoop Text to Bytes object * /*from www . j ava 2 s . c o m*/ * @param t Text * @return Bytes object */ public static Bytes toBytes(Text t) { return Bytes.wrap(t.getBytes(), 0, t.getLength()); }
From source file:it.crs4.pydoop.mapreduce.pipes.BinaryProtocol.java
License:Apache License
/** * Write the given object to the stream. If it is a Text or BytesWritable, * write it directly. Otherwise, write it to a buffer and then write the * length and data to the stream./* w w w. j a va 2 s .com*/ * @param obj the object to write * @throws IOException */ private void writeObject(Writable obj) throws IOException { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. if (obj instanceof Text) { Text t = (Text) obj; int len = t.getLength(); WritableUtils.writeVInt(stream, len); stream.write(t.getBytes(), 0, len); } else if (obj instanceof BytesWritable) { BytesWritable b = (BytesWritable) obj; int len = b.getLength(); WritableUtils.writeVInt(stream, len); stream.write(b.getBytes(), 0, len); } else if (obj == null) { // write a zero length string WritableUtils.writeVInt(stream, 0); } else { buffer.reset(); obj.write(buffer); int length = buffer.getLength(); WritableUtils.writeVInt(stream, length); stream.write(buffer.getData(), 0, length); } }
From source file:it.crs4.pydoop.mapreduce.pipes.CommonStub.java
License:Apache License
protected void writeObject(Writable obj, DataOutputStream stream) throws IOException { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. System.err.println("obj: " + obj); DataOutputBuffer buffer = new DataOutputBuffer(); if (obj instanceof Text) { Text t = (Text) obj; int len = t.getLength(); WritableUtils.writeVLong(stream, len); stream.flush();//from ww w. j a va 2s .c o m stream.write(t.getBytes(), 0, len); stream.flush(); System.err.println("len: " + len); } else if (obj instanceof BytesWritable) { BytesWritable b = (BytesWritable) obj; int len = b.getLength(); WritableUtils.writeVLong(stream, len); stream.write(b.getBytes(), 0, len); System.err.println("len: " + len); } else { buffer.reset(); obj.write(buffer); int length = buffer.getLength(); WritableUtils.writeVInt(stream, length); stream.write(buffer.getData(), 0, length); System.err.println("len: " + length); } stream.flush(); }
From source file:it.crs4.pydoop.pipes.BinaryProtocol.java
License:Apache License
/** * Write the given object to the stream. If it is a Text or BytesWritable, * write it directly. Otherwise, write it to a buffer and then write the * length and data to the stream./*from w ww . java 2 s . c o m*/ * @param obj the object to write * @throws IOException */ private void writeObject(Writable obj) throws IOException { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. if (obj instanceof Text) { Text t = (Text) obj; int len = t.getLength(); WritableUtils.writeVInt(stream, len); stream.write(t.getBytes(), 0, len); } else if (obj instanceof BytesWritable) { BytesWritable b = (BytesWritable) obj; int len = b.getLength(); WritableUtils.writeVInt(stream, len); stream.write(b.getBytes(), 0, len); } else { buffer.reset(); obj.write(buffer); int length = buffer.getLength(); WritableUtils.writeVInt(stream, length); stream.write(buffer.getData(), 0, length); } }
From source file:it.crs4.seal.common.CutText.java
License:Open Source License
public void loadRecord(Text record) throws FormatException { int pos = 0; // the byte position within the record int fieldno = 0; // the field index within the record int colno = 0; // the index within the list of requested fields (columns) try {/* ww w .j a va 2s .c o m*/ while (pos < record.getLength() && colno < columns.size()) // iterate over each field { int endpos = record.find(delim, pos); // the field's end position if (endpos < 0) endpos = record.getLength(); if (columns.get(colno) == fieldno) // if we're at a requested field { extractedFields[colno] = Text.decode(record.getBytes(), pos, endpos - pos); extractedFieldPositions[colno] = pos; colno += 1; // advance column } pos = endpos + 1; // the next starting position is the current end + 1 fieldno += 1; } } catch (java.nio.charset.CharacterCodingException e) { throw new FormatException("character coding exception. Message: " + e.getMessage(), record); } if (colno < columns.size()) throw new FormatException("Missing field(s) in record. Field " + colno + " (zero-based) not found.", record); }
From source file:it.crs4.seal.common.TextSamMapping.java
License:Open Source License
public TextSamMapping(Text sam) throws FormatException { unparsedData = new Text(); cutter = new CutText(Delim, 0, 1, 2, 3, 4, 5, 6, 7, 8); // all fields up to and including insert size try {//from www . j a v a2 s. com cutter.loadRecord(sam); flag = Integer.parseInt(cutter.getField(1)); // set flag first so we can use the flag methods mapQ = Byte.parseByte(cutter.getField(4)); if (isMapped()) pos5 = Integer.parseInt(cutter.getField(3)); if (isMateMapped()) matePos5 = Integer.parseInt(cutter.getField(7)); if (isMapped() && isMateMapped()) insertSize = Integer.parseInt(cutter.getField(8)); } catch (CutText.FormatException e) { throw new FormatException("sam formatting problem: " + e + ". Record: " + sam); } catch (NumberFormatException e) { throw new FormatException("sam formatting problem. Found text in place of a number. Record: " + sam); } int seqStart = cutter.getFieldPos(8) + cutter.getField(8).length() + 1; if (seqStart > sam.getLength()) throw new FormatException("Incomplete SAM record -- missing fields. Record: " + sam); // copy the sequence and tag data to our internal buffer unparsedData.set(sam.getBytes(), seqStart, sam.getLength() - seqStart); // Find the end of the sequence field. Search for a Delim after the insert size field. int end = unparsedData.find(Delim); if (end < 0) throw new FormatException("Bad SAM format. Missing terminator for sequence field. SAM: " + sam); seqLen = end; // now repeat for the quality field qualityStart = end + 1; if (qualityStart > unparsedData.getLength()) throw new FormatException("Incomplete SAM record -- missing quality field. Record: " + sam); end = unparsedData.find(Delim, qualityStart); if (end < 0) end = unparsedData.getLength(); if (seqLen != end - qualityStart) { throw new FormatException( "Length of sequence (" + seqLen + ") is different from length of quality string (" + (end - qualityStart) + "). Record: " + sam); } tagsStart = end + 1; }