Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.kylin.storage.hbase.steps.KeyValueCreator.java

License:Apache License

public KeyValue create(Text key, byte[] value, int voffset, int vlen) {
    return create(key.getBytes(), 0, key.getLength(), value, voffset, vlen);
}

From source file:org.apache.kylin.storage.hbase.steps.RangeKeyDistributionReducer.java

License:Apache License

@Override
protected void doCleanup(Context context) throws IOException, InterruptedException {
    int nRegion = Math.round((float) gbPoints.size() / cut);
    nRegion = Math.max(minRegionCount, nRegion);
    nRegion = Math.min(maxRegionCount, nRegion);

    int gbPerRegion = gbPoints.size() / nRegion;
    gbPerRegion = Math.max(1, gbPerRegion);

    if (hfileSizeGB <= 0) {
        hfileSizeGB = gbPerRegion;/*w  w  w  . java 2 s.c  om*/
    }
    int hfilePerRegion = (int) (gbPerRegion / hfileSizeGB);
    hfilePerRegion = Math.max(1, hfilePerRegion);

    System.out.println(nRegion + " regions");
    System.out.println(gbPerRegion + " GB per region");
    System.out.println(hfilePerRegion + " hfile per region");

    Path hfilePartitionFile = new Path(output + "/part-r-00000_hfile");
    SequenceFile.Writer hfilePartitionWriter = new SequenceFile.Writer(
            hfilePartitionFile.getFileSystem(context.getConfiguration()), context.getConfiguration(),
            hfilePartitionFile, ImmutableBytesWritable.class, NullWritable.class);
    int hfileCountInOneRegion = 0;
    for (int i = hfileSizeGB; i < gbPoints.size(); i += hfileSizeGB) {
        hfilePartitionWriter.append(new ImmutableBytesWritable(gbPoints.get(i).getBytes()), NullWritable.get());
        if (++hfileCountInOneRegion >= hfilePerRegion) {
            Text key = gbPoints.get(i);
            outputValue.set(i);
            System.out.println(StringUtils.byteToHexString(key.getBytes()) + "\t" + outputValue.get());
            context.write(key, outputValue);

            hfileCountInOneRegion = 0;
        }
    }
    hfilePartitionWriter.close();
}

From source file:org.apache.mahout.text.ChunkedWriter.java

License:Apache License

public void write(String key, String value) throws IOException {
    if (currentChunkSize > maxChunkSizeInBytes) {
        writer.close();/*from w  w w .j av a  2s.  c o  m*/
        writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID++), Text.class, Text.class);
        currentChunkSize = 0;
    }

    Text keyT = new Text(key);
    Text valueT = new Text(value);
    currentChunkSize += keyT.getBytes().length + valueT.getBytes().length; // Overhead
    writer.append(keyT, valueT);
}

From source file:org.apache.mahout.utils.io.ChunkedWriter.java

License:Apache License

/** Writes a new key-value pair, creating a new sequence file if necessary.*/
public void write(String key, String value) throws IOException {
    if (currentChunkSize > maxChunkSizeInBytes) {
        Closeables.close(writer, false);
        currentChunkID++;//from  www  .  j  a va  2  s  .c  o m
        writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID), Text.class, Text.class);
        currentChunkSize = 0;
    }

    Text keyT = new Text(key);
    Text valueT = new Text(value);
    currentChunkSize += keyT.getBytes().length + valueT.getBytes().length; // Overhead
    writer.append(keyT, valueT);
}

From source file:org.apache.orc.impl.writer.StringBaseTreeWriter.java

License:Apache License

private void flushDictionary() throws IOException {
    final int[] dumpOrder = new int[dictionary.size()];

    if (useDictionaryEncoding) {
        // Write the dictionary by traversing the red-black tree writing out
        // the bytes and lengths; and creating the map from the original order
        // to the final sorted order.

        dictionary.visit(new StringRedBlackTree.Visitor() {
            private int currentId = 0;

            @Override/*from   ww w .j av  a2 s. c  o m*/
            public void visit(StringRedBlackTree.VisitorContext context) throws IOException {
                context.writeBytes(stringOutput);
                lengthOutput.write(context.getLength());
                dumpOrder[context.getOriginalPosition()] = currentId++;
            }
        });
    } else {
        // for direct encoding, we don't want the dictionary data stream
        stringOutput.suppress();
    }
    int length = rows.size();
    int rowIndexEntry = 0;
    OrcProto.RowIndex.Builder rowIndex = getRowIndex();
    Text text = new Text();
    // write the values translated into the dump order.
    for (int i = 0; i <= length; ++i) {
        // now that we are writing out the row values, we can finalize the
        // row index
        if (buildIndex) {
            while (i == rowIndexValueCount.get(rowIndexEntry) && rowIndexEntry < savedRowIndex.size()) {
                OrcProto.RowIndexEntry.Builder base = savedRowIndex.get(rowIndexEntry++).toBuilder();
                if (useDictionaryEncoding) {
                    rowOutput.getPosition(new RowIndexPositionRecorder(base));
                } else {
                    PositionRecorder posn = new RowIndexPositionRecorder(base);
                    directStreamOutput.getPosition(posn);
                    lengthOutput.getPosition(posn);
                }
                rowIndex.addEntry(base.build());
            }
        }
        if (i != length) {
            if (useDictionaryEncoding) {
                rowOutput.write(dumpOrder[rows.get(i)]);
            } else {
                dictionary.getText(text, rows.get(i));
                directStreamOutput.write(text.getBytes(), 0, text.getLength());
                lengthOutput.write(text.getLength());
            }
        }
    }
    rows.clear();
}

From source file:org.apache.orc.mapred.OrcMapredRecordWriter.java

License:Apache License

static void setCharValue(BytesColumnVector vector, int row, Text value, int length) {
    // we need to trim or pad the string with spaces to required length
    int actualLength = value.getLength();
    if (actualLength >= length) {
        setBinaryValue(vector, row, value, length);
    } else {//w w  w . j  ava  2s .co m
        byte[] spaces = SPACE_BUFFER.get();
        if (length - actualLength > spaces.length) {
            spaces = new byte[length - actualLength];
            Arrays.fill(spaces, (byte) ' ');
            SPACE_BUFFER.set(spaces);
        }
        vector.setConcat(row, value.getBytes(), 0, actualLength, spaces, 0, length - actualLength);
    }
}

From source file:org.apache.pig.builtin.JsonLoader.java

License:Apache License

public Tuple getNext() throws IOException {
    Text val = null;
    try {/* w ww .j  a  v  a  2 s . c o  m*/
        // Read the next key value pair from the record reader.  If it's
        // finished, return null
        if (!reader.nextKeyValue())
            return null;

        // Get the current value.  We don't use the key.
        val = (Text) reader.getCurrentValue();
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }

    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.
    byte[] newBytes = new byte[val.getLength()];
    System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength());
    ByteArrayInputStream bais = new ByteArrayInputStream(newBytes);
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    ResourceFieldSchema[] fields = schema.getFields();
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job.
    if (p.nextToken() != JsonToken.START_OBJECT) {
        warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }

    // Read each field in the record
    for (int i = 0; i < fields.length; i++) {
        t.set(i, readField(p, fields[i], i));
    }

    if (p.nextToken() != JsonToken.END_OBJECT) {
        warn("Bad record, could not find end of record " + val.toString(), PigWarning.UDF_WARNING_1);
        return t;
    }
    p.close();
    return t;
}

From source file:org.apache.pig.builtin.PigStorage.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    mProtoTuple = new ArrayList<Object>();
    if (!mRequiredColumnsInitialized) {
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p.getProperty(signature));
        }//from  w w  w .j  a  va  2  s .co  m
        mRequiredColumnsInitialized = true;
    }
    //Prepend input source path if source tagging is enabled
    if (tagFile) {
        mProtoTuple.add(new DataByteArray(sourcePath.getName()));
    } else if (tagPath) {
        mProtoTuple.add(new DataByteArray(sourcePath.toString()));
    }

    try {
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] buf = value.getBytes();
        int len = value.getLength();
        int start = 0;
        int fieldID = 0;
        for (int i = 0; i < len; i++) {
            if (buf[i] == fieldDel) {
                if (mRequiredColumns == null
                        || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID]))
                    addTupleValue(mProtoTuple, buf, start, i);
                start = i + 1;
                fieldID++;
            }
        }
        // pick up the last field
        if (start <= len && (mRequiredColumns == null
                || (mRequiredColumns.length > fieldID && mRequiredColumns[fieldID]))) {
            addTupleValue(mProtoTuple, buf, start, len);
        }
        Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);

        return dontLoadSchema ? t : applySchema(t);
    } catch (InterruptedException e) {
        int errCode = 6018;
        String errMsg = "Error while reading input";
        throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e);
    }
}

From source file:org.apache.pig.builtin.TextLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    try {//from   w w w . j  a  v  a 2 s  .  com
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] ba = value.getBytes();
        // make a copy of the bytes representing the input since
        // TextInputFormat will reuse the byte array
        return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength()));
    } catch (InterruptedException e) {
        throw new IOException("Error getting input");
    }
}

From source file:org.apache.pig.impl.streaming.OutputHandler.java

License:Apache License

private byte[] readNextLine() throws IOException {
    Text line = new Text();
    int num = in.readLine(line);
    byte[] lineBytes = line.getBytes();
    if (num <= 0) {
        return null;
    }// w  w w  .ja  va  2 s.c  o  m

    return lineBytes;
}