Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:RunText.java

License:Apache License

private static List<String> parse(Text value) {
    int p = 0;/*  w w  w  .j  a v  a  2  s  . co m*/
    List<String> strings = Lists.newArrayList();
    while (p < value.getLength()) {
        int next = find(value, delim, p);
        if (next == -1) {
            break;
        }
        String s = new String(value.getBytes(), p, next - p);
        strings.add(s);
        p = next + 1;
    }
    return strings;
}

From source file:RunText.java

License:Apache License

private static int find(Text text, byte what, int start) {
    int len = text.getLength();
    int p = start;
    byte[] bytes = text.getBytes();
    boolean inQuotes = false;
    while (p < len) {
        if ('\"' == bytes[p]) {
            inQuotes = !inQuotes;/*from  w  ww .j  a  va  2s .  c o  m*/
        }
        if (!inQuotes && bytes[p] == what) {
            return p;
        }
        p++;
    }
    return -1;
}

From source file:Importer.java

License:Open Source License

public static Text hash(Text content) throws Exception {
    StringBuilder sb = new StringBuilder();
    sb.append("post_");

    MessageDigest md = MessageDigest.getInstance("MD5");

    md.update(content.getBytes(), 0, content.getLength());
    byte[] bytes = md.digest();
    for (int i = 0; i < bytes.length; ++i) {
        if ((bytes[i] & 0xF0) == 0)
            sb.append('0');
        sb.append(Integer.toHexString(0xFF & bytes[i]));
    }//from w  w w  .j  a va  2  s.c  om
    return new Text(sb.toString());
}

From source file:TweetTweetTweet.java

License:Open Source License

@Override
public void fromText(Text text) {
    tweet1.fromText(text);/*from  w ww.j  av  a2  s  . c om*/
    // Skip the Tab
    text.set(text.getBytes(), 1, text.getLength() - 1);
    tweet2.fromText(text);
    // Skip the Tab
    text.set(text.getBytes(), 1, text.getLength() - 1);
    tweet3.fromText(text);
}

From source file:TestString.java

License:Apache License

@Test
public void testTextSubstring() throws Exception {
    Text text = new Text("string");
    Text text1 = new Text();
    Text text2 = new Text();

    long start = System.nanoTime();
    for (int i = 0; i < 100000000; i++) {
        text1.set(text.getBytes(), 0, 2);
        text2.set(text.getBytes(), 3, text.getLength() - 3);
    }/*  w w w. ja  v a 2  s . c  o  m*/
    long end = System.nanoTime();
    System.out.println("TestTextSubString");
    System.out.println("text1: " + text1.toString());
    System.out.println("text2: " + text2.toString());
    System.out.println("Elapsed Time: " + (end - start) / 1000000000f + " seconds.");
}

From source file:accumulo.ingest.AbstractAccumuloCsvIngest.java

License:Apache License

protected void setRowId(Text buffer, Text fileName, long recordCount) {
    final byte[] rowSuffix = lex.encode(recordCount);
    buffer.clear();/*from   w w w .  ja va 2 s.c o m*/
    buffer.append(fileName.getBytes(), 0, fileName.getLength());
    buffer.append(rowSuffix, 0, rowSuffix.length);
}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Position the input stream at the start of the first record.
 *
 * @param stream The stream to reposition.
 *///w ww  . java2 s .c o m
protected void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
    Text buffer = new Text();

    if (true) { // (start > 0) // use start>0 to assume that files start with valid data
        // Advance to the start of the first record that ends with /1
        // We use a temporary LineReader to read lines until we find the
        // position of the right one.  We then seek the file to that position.
        stream.seek(start);
        LineReader reader = new LineReader(stream);

        int bytesRead = 0;
        do {
            bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
            int bufferLength = buffer.getLength();
            if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) {
                start += bytesRead;
            } else {
                // line starts with @.  Read two more and verify that it starts with a +
                //
                // If this isn't the start of a record, we want to backtrack to its end
                long backtrackPosition = start + bytesRead;

                bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') {
                    break; // all good!
                } else {
                    // backtrack to the end of the record we thought was the start.
                    start = backtrackPosition;
                    stream.seek(start);
                    reader = new LineReader(stream);
                }
            }
        } while (bytesRead > 0);

        stream.seek(start);
    }

    pos = start;
}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Parses a read from an interleaved FASTQ file.
 *
 * Only reads a single record.//from   w w  w  .  j av  a 2 s.  c  o  m
 *
 * @param readName Text record containing read name. Output parameter.
 * @param value Text record containing full record. Output parameter.
 * @return Returns true if read was successful (did not hit EOF).
 *
 * @throws RuntimeException Throws exception if FASTQ record doesn't
 *   have proper formatting (e.g., record doesn't start with @).
 */
protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException {
    // ID line
    readName.clear();
    long skipped = appendLineInto(readName, true);
    pos += skipped;
    if (skipped == 0) {
        return false; // EOF
    }

    if (readName.getBytes()[0] != '@') {
        throw new RuntimeException("unexpected fastq record didn't start with '@' at " + makePositionMessage()
                + ". Line: " + readName + ". \n");
    }

    value.append(readName.getBytes(), 0, readName.getLength());

    // sequence
    appendLineInto(value, false);

    // separator line
    appendLineInto(value, false);

    // quality
    appendLineInto(value, false);

    return true;
}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Reads a newline into a text record from the underlying line reader.
 *
 * @param dest Text record to read line into.
 * @param eofOk Whether an EOF is acceptable in this line.
 * @return Returns the number of bytes read.
 *
 * @throws EOFException Throws if eofOk was false and we hit an EOF in
 *    the current line.//from  w w  w.  jav  a2  s .c  o m
 */
private int appendLineInto(final Text dest, final boolean eofOk) throws EOFException, IOException {
    Text buf = new Text();
    int bytesRead = lineReader.readLine(buf, MAX_LINE_LENGTH);

    if (bytesRead < 0 || (bytesRead == 0 && !eofOk))
        throw new EOFException();

    dest.append(buf.getBytes(), 0, buf.getLength());
    dest.append(newline, 0, 1);
    pos += bytesRead;

    return bytesRead;
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

protected String makeEncodedString(Object[] context) {
    Text text = (Text) context[1];
    return new String(text.getBytes(), 0, text.getLength(), (Charset) context[2]);
}