Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:es.pic.astro.hadoop.io.BinaryOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 *
 * @param jc//www.  j a va2s.  c om
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {

    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress),
            isCompressed);
    return new RecordWriter() {
        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                outStream.write(tr.getBytes(), 0, tr.getLength());
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:eu.larkc.RDFPig.io.NTriplesReader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {

    while (true) {
        mProtoTuple = null;//from w w w .  jav a2  s  .c o  m
        try {
            boolean notDone = in.nextKeyValue();
            if (!notDone) {
                return null;
            }
            Text value = null;
            try {
                value = (Text) in.getCurrentValue();

                byte[] buf = value.getBytes();
                int len = value.getLength();

                if (len < 3)
                    continue; // Ignore lines with less than 3 bytes

                //Get rid of any trailing whitespace
                while (Character.isWhitespace(buf[len - 1]))
                    len--;

                if (buf[len - 1] != '.')
                    continue;//throw new ExecException("Could not parse triple, no trailing \'.\': " + value);
                else
                    len--;

                //Get rid of any trailing whitespace
                while (Character.isWhitespace(buf[len - 1]))
                    len--;

                int start = 0;
                while (Character.isWhitespace(buf[start]))
                    start++;

                // Parse subject
                boolean isURI = buf[0] == '<';
                for (int i = 0; i < len; i++) {
                    if (isURI && buf[i] == '>') {
                        readField(buf, start, i + 1);
                        start = i + 1;
                        break;
                    } else if (Character.isWhitespace(buf[i])) {
                        readField(buf, start, i);
                        start = i + 1;
                        break;
                    }
                }

                while (Character.isWhitespace(buf[start]))
                    start++;

                // Parse predicate (always URI)
                for (int i = start; i < len; i++) {
                    if (buf[i] == '>') {
                        readField(buf, start, i + 1);
                        start = i + 1;
                        break;
                    }
                }

                while (Character.isWhitespace(buf[start]))
                    start++;

                // Parse object
                if (buf[start] == '<') //URI
                    for (int i = start + 1; i < len; i++) {
                        if (buf[i] == '>') {
                            readField(buf, start, i + 1);
                            start = i + 1;
                            break;
                        }
                    }
                else if (buf[start] == '"') //Literal
                    for (int i = start + 1; i < len; i++) {
                        if (buf[i] == '"' && i > 0 && buf[i - 1] != '\\') {
                            readField(buf, start, i + 1);
                            start = i + 1;
                            break;
                        }
                    }
                else if (buf[start] == '_') {//BNode
                    int i = start + 1;
                    for (; i < len; i++) {
                        if (Character.isWhitespace(buf[i])) {
                            readField(buf, start, i);
                            start = i + 1;
                            break;
                        }
                    }
                    // We are at end of line, read it
                    readField(buf, start, i);

                } else
                    continue;//throw new ExecException("Could not parse triple, invalid term in object position: " + value);
                // After the first three terms, the rest are ignored

                if (mProtoTuple.size() != 3)
                    continue;

                Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);
                mProtoTuple = null;
                return t;
            } catch (Exception e) {
                e.printStackTrace();
                System.err.println("For line: " + value);
                mProtoTuple = null;
            }
        } catch (Exception e) {
            int errCode = 6018;
            String errMsg = "Error while reading input";
            throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
        }
    }
}

From source file:ezbake.protect.test.security.AutoSecurityTest.java

License:Apache License

protected void writeRow(Map.Entry<Key, Value> row, Text value) throws RegistrationException {
    Connector connector = null;/*from  w w  w  . ja v  a2s  .co m*/
    BatchWriter writer = null;
    try {
        connector = new AccumuloHelper(configuration).getConnector();
        writer = connector.createBatchWriter(REG_TABLE, 1000000L, 1000L, 10);
        Mutation m = new Mutation(row.getKey().getRow());
        m.put(row.getKey().getColumnFamily(), row.getKey().getColumnQualifier(), new ColumnVisibility("U"),
                new Value(value.getBytes()));
        writer.addMutation(m);
    } catch (IOException e) {
        throw new RegistrationException("Error: IOException " + e);
    } catch (TableNotFoundException e) {
        throw new RegistrationException("Error: Accumulo Misconfigured - table is not found " + e);
    } catch (MutationsRejectedException e) {
        throw new RegistrationException("Error: Mutation Rejected " + e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (MutationsRejectedException e) {
                throw new RegistrationException("Error: Mutation Rejected " + e);
            }
        }
    }
}

From source file:ezbake.security.persistence.impl.AccumuloRegistrationManager.java

License:Apache License

protected void writeRow(Map.Entry<Key, Value> row, Text value) throws RegistrationException {
    try {//from w  w w .  j  ava 2s.c om
        Connector connector = new AccumuloHelper(configuration).getConnector();
        BatchWriter writer = connector.createBatchWriter(REG_TABLE, 1000000L, 1000L, 10);
        Mutation m = new Mutation(row.getKey().getRow());
        m.put(row.getKey().getColumnFamily(), row.getKey().getColumnQualifier(),
                new ColumnVisibility(visibilityToken), new Value(value.getBytes()));
        writer.addMutation(m);
        writer.close();
    } catch (IOException e) {
        throw new RegistrationException("Error: IOException " + e);
    } catch (TableNotFoundException e) {
        throw new RegistrationException("Error: Accumulo Misconfigured - table is not found " + e);
    } catch (MutationsRejectedException e) {
        throw new RegistrationException("Error: Mutation Rejected " + e);
    }
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, CharacterCodingException {
    if (!lineRR.nextKeyValue())
        return false;

    Text line = getCurrentValue();
    int tabOne = line.find("\t");

    int rid = Integer.parseInt(Text.decode(line.getBytes(), 0, tabOne));

    int tabTwo = line.find("\t", tabOne + 1);
    int posBeg = tabOne + 1;
    int posEnd = tabTwo - 1;

    int pos = Integer.parseInt(Text.decode(line.getBytes(), posBeg, posEnd - posBeg + 1));

    key.set(BAMRecordReader.getKey0(rid, pos));
    return true;//  w  w w.  j a  v a 2  s  . c o m
}

From source file:fi.tkk.ics.hadoop.bam.SequencedFragment.java

License:Open Source License

/**
 * Convert quality scores in-place.//from   ww w.  ja v  a 2s  .  com
 *
 * @raise FormatException if quality scores are out of the range
 * allowed by the current encoding.
 * @raise IllegalArgumentException if current and  target quality encodings are the same.
 */
public static void convertQuality(Text quality, BaseQualityEncoding current, BaseQualityEncoding target) {
    if (current == target)
        throw new IllegalArgumentException(
                "current and target quality encodinds are the same (" + current + ")");

    byte[] bytes = quality.getBytes();
    final int len = quality.getLength();
    final int illuminaSangerDistance = FormatConstants.ILLUMINA_OFFSET - FormatConstants.SANGER_OFFSET;

    if (current == BaseQualityEncoding.Illumina && target == BaseQualityEncoding.Sanger) {
        for (int i = 0; i < len; ++i) {
            if (bytes[i] < FormatConstants.ILLUMINA_OFFSET
                    || bytes[i] > (FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX)) {
                throw new FormatException("base quality score out of range for Illumina Phred+64 format (found "
                        + (bytes[i] - FormatConstants.ILLUMINA_OFFSET) + " but acceptable range is [0,"
                        + FormatConstants.ILLUMINA_MAX + "]).\n"
                        + "Maybe qualities are encoded in Sanger format?\n");
            }
            bytes[i] -= illuminaSangerDistance;
        }
    } else if (current == BaseQualityEncoding.Sanger && target == BaseQualityEncoding.Illumina) {
        for (int i = 0; i < len; ++i) {
            if (bytes[i] < FormatConstants.SANGER_OFFSET
                    || bytes[i] > (FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX)) {
                throw new FormatException("base quality score out of range for Sanger Phred+64 format (found "
                        + (bytes[i] - FormatConstants.SANGER_OFFSET) + " but acceptable range is [0,"
                        + FormatConstants.SANGER_MAX + "]).\n"
                        + "Maybe qualities are encoded in Illumina format?\n");
            }
            bytes[i] += illuminaSangerDistance;
        }
    } else
        throw new IllegalArgumentException(
                "unsupported BaseQualityEncoding transformation from " + current + " to " + target);
}

From source file:fi.tkk.ics.hadoop.bam.SequencedFragment.java

License:Open Source License

/**
 * Verify that the given quality bytes are within the range allowed for the specified encoding.
 *
 * In theory, the Sanger encoding uses the entire
 * range of characters from ASCII 33 to 126, giving a value range of [0,93].  However, values over 60 are
 * unlikely in practice, and are more likely to be caused by mistaking a file that uses Illumina encoding
 * for Sanger.  So, we'll enforce the same range supported by Illumina encoding ([0,62]) for Sanger.
 *
 * @return -1 if quality is ok.//  w  ww. j  a v  a2 s  . co m
 * @return If an out-of-range value is found the index of the value is returned.
 */
public static int verifyQuality(Text quality, BaseQualityEncoding encoding) {
    // set allowed quality range
    int max, min;

    if (encoding == BaseQualityEncoding.Illumina) {
        max = FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX;
        min = FormatConstants.ILLUMINA_OFFSET;
    } else if (encoding == BaseQualityEncoding.Sanger) {
        max = FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX;
        min = FormatConstants.SANGER_OFFSET;
    } else
        throw new IllegalArgumentException("Unsupported base encoding quality " + encoding);

    // verify
    final byte[] bytes = quality.getBytes();
    final int len = quality.getLength();

    for (int i = 0; i < len; ++i) {
        if (bytes[i] < min || bytes[i] > max)
            return i;
    }
    return -1;
}

From source file:fm.last.darling.hbase.HBaseJSONOutputReader.java

License:Apache License

private byte[] trimOuterBytes(Text text) {
    byte[] bytes = new byte[text.getLength() - 2];
    System.arraycopy(text.getBytes(), 1, bytes, 0, bytes.length);
    return bytes;
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionRecordWriter.java

License:LGPL

@Override
public synchronized void write(final Text key, final LongWritable value)
        throws IOException, InterruptedException {

    this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1);

    if (value == null) {
        return;//w  w  w  . j a  va 2 s .c  o m
    }

    this.out.write(key.getBytes(), 0, key.getLength());
    this.out.write(separator);
    this.out.write(value.toString().getBytes(StandardCharsets.UTF_8));
    this.out.write(newline);

    this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1);
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.SAMRecordWriter.java

License:LGPL

@Override
public synchronized void write(final Text key, final Text value) throws IOException, InterruptedException {

    this.context.getCounter(COUNTERS_GROUP, INPUT_ENTRIES).increment(1);

    if (value == null) {
        return;/*from  w w  w  . j a va2s  .c  o  m*/
    }

    this.out.write(value.getBytes(), 0, value.getLength());
    this.out.write(newline);

    this.context.getCounter(COUNTERS_GROUP, ENTRIES_WRITTEN).increment(1);
}