Example usage for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength()

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void test() throws IOException {
    // vv TextTest
    Text t = new Text("hadoop");
    assertThat(t.getLength(), is(6));
    assertThat(t.getBytes().length, is(6));

    assertThat(t.charAt(2), is((int) 'd'));
    assertThat("Out of bounds", t.charAt(100), is(-1));
    // ^^ TextTest
}

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void mutability() throws IOException {
    // vv TextTest-Mutability
    Text t = new Text("hadoop");
    t.set("pig");
    assertThat(t.getLength(), is(3));
    assertThat(t.getBytes().length, is(3));
    // ^^ TextTest-Mutability
}

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void byteArrayNotShortened() throws IOException {
    // vv TextTest-ByteArrayNotShortened
    Text t = new Text("hadoop");
    t.set(/*[*/new Text("pig")/*]*/);
    assertThat(t.getLength(), is(3));
    assertThat("Byte length not shortened", t.getBytes().length, /*[*/is(6)/*]*/);
    // ^^ TextTest-ByteArrayNotShortened
}

From source file:io.dataapps.chlorine.hive.ScanUDF.java

License:Apache License

@Override
public void process(Object[] arguments) throws HiveException {
    long matches = 0;
    for (int i = 0; i < arguments.length; i++) {
        if (arguments[i] != null && converters[i] != null) {
            Text value = (Text) converters[i].convert(arguments[i]);
            Map<String, List<String>> matchesByType = engine.findWithType(value.toString());
            totalSize += value.getLength();
            for (Map.Entry<String, List<String>> entry : matchesByType.entrySet()) {
                Collection<String> result = entry.getValue();
                if (result.size() > 0) {
                    matches += result.size();
                    forward(entry.getKey(), result.size(), i, StringUtils.join(result, ','));
                }// ww  w .j  ava 2s  . co m
            }
        }
    }
    totalRecords++;
    if (matches > 0) {
        totalMatches += matches;
        matchedRecords++;
    }

}

From source file:io.fluo.core.util.ByteUtil.java

License:Apache License

/**
 * Convert from Hadoop Text to Bytes object
 * /*from  www .  j  ava  2 s .  c  o  m*/
 * @param t Text
 * @return Bytes object
 */
public static Bytes toBytes(Text t) {
    return Bytes.wrap(t.getBytes(), 0, t.getLength());
}

From source file:it.crs4.pydoop.mapreduce.pipes.BinaryProtocol.java

License:Apache License

/**
 * Write the given object to the stream. If it is a Text or BytesWritable,
 * write it directly. Otherwise, write it to a buffer and then write the
 * length and data to the stream./* w w  w.  j  a va 2 s  .com*/
 * @param obj the object to write
 * @throws IOException
 */
private void writeObject(Writable obj) throws IOException {
    // For Text and BytesWritable, encode them directly, so that they end up
    // in C++ as the natural translations.
    if (obj instanceof Text) {
        Text t = (Text) obj;
        int len = t.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(t.getBytes(), 0, len);
    } else if (obj instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) obj;
        int len = b.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(b.getBytes(), 0, len);
    } else if (obj == null) {
        // write a zero length string
        WritableUtils.writeVInt(stream, 0);
    } else {
        buffer.reset();
        obj.write(buffer);
        int length = buffer.getLength();
        WritableUtils.writeVInt(stream, length);
        stream.write(buffer.getData(), 0, length);
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommonStub.java

License:Apache License

protected void writeObject(Writable obj, DataOutputStream stream) throws IOException {
    // For Text and BytesWritable, encode them directly, so that they end up
    // in C++ as the natural translations.
    System.err.println("obj: " + obj);

    DataOutputBuffer buffer = new DataOutputBuffer();
    if (obj instanceof Text) {
        Text t = (Text) obj;
        int len = t.getLength();
        WritableUtils.writeVLong(stream, len);
        stream.flush();//from ww  w. j a va  2s .c o  m

        stream.write(t.getBytes(), 0, len);
        stream.flush();
        System.err.println("len: " + len);

    } else if (obj instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) obj;
        int len = b.getLength();
        WritableUtils.writeVLong(stream, len);
        stream.write(b.getBytes(), 0, len);
        System.err.println("len: " + len);
    } else {
        buffer.reset();
        obj.write(buffer);
        int length = buffer.getLength();
        WritableUtils.writeVInt(stream, length);
        stream.write(buffer.getData(), 0, length);
        System.err.println("len: " + length);
    }
    stream.flush();

}

From source file:it.crs4.pydoop.pipes.BinaryProtocol.java

License:Apache License

/**
 * Write the given object to the stream. If it is a Text or BytesWritable,
 * write it directly. Otherwise, write it to a buffer and then write the
 * length and data to the stream./*from   w  ww . java 2 s . c o m*/
 * @param obj the object to write
 * @throws IOException
 */
private void writeObject(Writable obj) throws IOException {
    // For Text and BytesWritable, encode them directly, so that they end up
    // in C++ as the natural translations.
    if (obj instanceof Text) {
        Text t = (Text) obj;
        int len = t.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(t.getBytes(), 0, len);
    } else if (obj instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) obj;
        int len = b.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(b.getBytes(), 0, len);
    } else {
        buffer.reset();
        obj.write(buffer);
        int length = buffer.getLength();
        WritableUtils.writeVInt(stream, length);
        stream.write(buffer.getData(), 0, length);
    }
}

From source file:it.crs4.seal.common.CutText.java

License:Open Source License

public void loadRecord(Text record) throws FormatException {
    int pos = 0; // the byte position within the record
    int fieldno = 0; // the field index within the record
    int colno = 0; // the index within the list of requested fields (columns)
    try {/*  ww  w .j  a va 2s  .c o  m*/
        while (pos < record.getLength() && colno < columns.size()) // iterate over each field
        {
            int endpos = record.find(delim, pos); // the field's end position
            if (endpos < 0)
                endpos = record.getLength();

            if (columns.get(colno) == fieldno) // if we're at a requested field
            {
                extractedFields[colno] = Text.decode(record.getBytes(), pos, endpos - pos);
                extractedFieldPositions[colno] = pos;
                colno += 1; // advance column
            }

            pos = endpos + 1; // the next starting position is the current end + 1
            fieldno += 1;
        }
    } catch (java.nio.charset.CharacterCodingException e) {
        throw new FormatException("character coding exception.  Message: " + e.getMessage(), record);
    }

    if (colno < columns.size())
        throw new FormatException("Missing field(s) in record. Field " + colno + " (zero-based) not found.",
                record);
}

From source file:it.crs4.seal.common.TextSamMapping.java

License:Open Source License

public TextSamMapping(Text sam) throws FormatException {
    unparsedData = new Text();
    cutter = new CutText(Delim, 0, 1, 2, 3, 4, 5, 6, 7, 8); // all fields up to and including insert size

    try {//from  www  .  j  a  v  a2  s.  com
        cutter.loadRecord(sam);
        flag = Integer.parseInt(cutter.getField(1)); // set flag first so we can use the flag methods
        mapQ = Byte.parseByte(cutter.getField(4));

        if (isMapped())
            pos5 = Integer.parseInt(cutter.getField(3));
        if (isMateMapped())
            matePos5 = Integer.parseInt(cutter.getField(7));
        if (isMapped() && isMateMapped())
            insertSize = Integer.parseInt(cutter.getField(8));
    } catch (CutText.FormatException e) {
        throw new FormatException("sam formatting problem: " + e + ". Record: " + sam);
    } catch (NumberFormatException e) {
        throw new FormatException("sam formatting problem.  Found text in place of a number.  Record: " + sam);
    }

    int seqStart = cutter.getFieldPos(8) + cutter.getField(8).length() + 1;
    if (seqStart > sam.getLength())
        throw new FormatException("Incomplete SAM record -- missing fields. Record: " + sam);
    // copy the sequence and tag data to our internal buffer
    unparsedData.set(sam.getBytes(), seqStart, sam.getLength() - seqStart);

    // Find the end of the sequence field.  Search for a Delim after the insert size field.
    int end = unparsedData.find(Delim);
    if (end < 0)
        throw new FormatException("Bad SAM format.  Missing terminator for sequence field.  SAM: " + sam);
    seqLen = end;

    // now repeat for the quality field
    qualityStart = end + 1;
    if (qualityStart > unparsedData.getLength())
        throw new FormatException("Incomplete SAM record -- missing quality field. Record: " + sam);
    end = unparsedData.find(Delim, qualityStart);
    if (end < 0)
        end = unparsedData.getLength();
    if (seqLen != end - qualityStart) {
        throw new FormatException(
                "Length of sequence (" + seqLen + ") is different from length of quality string ("
                        + (end - qualityStart) + "). Record: " + sam);
    }

    tagsStart = end + 1;
}