Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.kylin.engine.mr.steps.InMemCuboidFromBaseCuboidMapper.java

License:Apache License

@Override
protected ByteArray getRecordFromKeyValue(Text key, Text value) {
    keyValueBuffer.clear();/*ww  w .  ja  va 2  s.  c om*/
    keyValueBuffer.put(key.getBytes(), keyOffset, key.getBytes().length - keyOffset);
    keyValueBuffer.put(value.getBytes());

    byte[] keyValue = new byte[keyValueBuffer.position()];
    System.arraycopy(keyValueBuffer.array(), 0, keyValue, 0, keyValueBuffer.position());

    return new ByteArray(keyValue);
}

From source file:org.apache.kylin.engine.mr.steps.MergeCuboidMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidID = rowKeySplitter.split(key.getBytes());
    Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidID);
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid);

    SplittedBytes[] splittedByteses = rowKeySplitter.getSplitBuffers();
    int bufOffset = 0;
    int bodySplitOffset = rowKeySplitter.getBodySplitOffset();

    for (int i = 0; i < cuboid.getColumns().size(); ++i) {
        int useSplit = i + bodySplitOffset;
        TblColRef col = cuboid.getColumns().get(i);

        if (this.checkNeedMerging(col)) {
            // if dictionary on fact table column, needs rewrite
            DictionaryManager dictMgr = DictionaryManager.getInstance(config);
            Dictionary<String> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col));

            Dictionary<String> sourceDict;
            // handle the column that all records is null
            if (sourceCubeSegment.getDictionary(col) == null) {
                BytesUtil.writeUnsigned(mergedDict.nullId(), newKeyBodyBuf, bufOffset,
                        mergedDict.getSizeOfId());
                bufOffset += mergedDict.getSizeOfId();
                continue;
            } else {
                sourceDict = dictMgr.getDictionary(sourceCubeSegment.getDictResPath(col));
            }//from  w ww  .  j  a va 2 s  . c om

            while (sourceDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || //
                    mergedDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || //
                    mergedDict.getSizeOfId() > newKeyBodyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBodyBuf;
                newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length);
            }

            int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[useSplit].value, 0,
                    splittedByteses[useSplit].length);
            int idInMergedDict;

            //int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBodyBuf, bufOffset);
            String v = sourceDict.getValueFromId(idInSourceDict);
            if (v == null) {
                idInMergedDict = mergedDict.nullId();
            } else {
                idInMergedDict = mergedDict.getIdFromValue(v);
            }

            BytesUtil.writeUnsigned(idInMergedDict, newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId());
            bufOffset += mergedDict.getSizeOfId();
        } else {
            // keep as it is
            while (splittedByteses[useSplit].length > newKeyBodyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBodyBuf;
                newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length);
            }

            System.arraycopy(splittedByteses[useSplit].value, 0, newKeyBodyBuf, bufOffset,
                    splittedByteses[useSplit].length);
            bufOffset += splittedByteses[useSplit].length;
        }
    }

    int fullKeySize = rowkeyEncoder.getBytesLength();
    while (newKeyBuf.array().length < fullKeySize) {
        newKeyBuf.set(new byte[newKeyBuf.length() * 2]);
    }
    newKeyBuf.set(0, fullKeySize);

    rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, bufOffset), newKeyBuf);
    outputKey.set(newKeyBuf.array(), 0, fullKeySize);

    // re-encode measures if dictionary is used
    if (dictMeasures.size() > 0) {
        codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs);
        for (Pair<Integer, MeasureIngester> pair : dictMeasures) {
            int i = pair.getFirst();
            MeasureIngester ingester = pair.getSecond();
            measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts,
                    newDicts);
        }
        ByteBuffer valueBuf = codec.encode(measureObjs);
        outputValue.set(valueBuf.array(), 0, valueBuf.position());
        value = outputValue;
    }

    context.write(outputKey, value);
}

From source file:org.apache.kylin.engine.mr.steps.NDCuboidMapper.java

License:Apache License

@Override
public void doMap(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidId = rowKeySplitter.split(key.getBytes());
    Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId);

    Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId);

    // if still empty or null
    if (myChildren == null || myChildren.size() == 0) {
        context.getCounter(BatchConstants.MAPREDUCE_COUNTER_GROUP_NAME, "Skipped records").increment(1L);
        if (skipCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
            logger.info("Skipping record with ordinal: " + skipCounter);
        }/*from w  ww. ja va2 s  . c  o m*/
        return;
    }

    context.getCounter(BatchConstants.MAPREDUCE_COUNTER_GROUP_NAME, "Processed records").increment(1L);

    if (handleCounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) {
        logger.info("Handling record with ordinal: " + handleCounter);
    }

    for (Long child : myChildren) {
        Cuboid childCuboid = Cuboid.findById(cubeDesc, child);
        Pair<Integer, ByteArray> result = ndCuboidBuilder.buildKey(parentCuboid, childCuboid,
                rowKeySplitter.getSplitBuffers());
        outputKey.set(result.getSecond().array(), 0, result.getFirst());
        context.write(outputKey, value);
    }

}

From source file:org.apache.kylin.engine.mr.steps.NumberDictionaryForestTest.java

License:Apache License

private String printKey(SelfDefineSortableKey key) {
    Text data = key.getText();
    String fieldValue = Bytes.toString(data.getBytes(), 1, data.getLength() - 1);
    System.out.println("type flag:" + key.getTypeId() + " fieldValue:" + fieldValue);
    return fieldValue;
}

From source file:org.apache.kylin.engine.mr.steps.NumberDictionaryForestTest.java

License:Apache License

private String getFieldValue(SelfDefineSortableKey key) {
    Text data = key.getText();
    return Bytes.toString(data.getBytes(), 1, data.getLength() - 1);
}

From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java

License:Apache License

/**
 * Re-encode with both dimension and measure in encoded (Text) format.
 * @param key//from   w ww. j  ava 2s . c  o  m
 * @param value
 * @return
 * @throws IOException
 */
public Pair<Text, Text> reEncode(Text key, Text value) throws IOException {
    if (initialized == false) {
        throw new IllegalStateException("Not initialized");
    }
    Object[] measureObjs = new Object[measureDescs.size()];
    // re-encode measures if dictionary is used
    if (dictMeasures.size() > 0) {
        codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs);
        for (Pair<Integer, MeasureIngester> pair : dictMeasures) {
            int i = pair.getFirst();
            MeasureIngester ingester = pair.getSecond();
            measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts,
                    newDicts);
        }

        ByteBuffer valueBuf = codec.encode(measureObjs);
        byte[] resultValue = new byte[valueBuf.position()];
        System.arraycopy(valueBuf.array(), 0, resultValue, 0, valueBuf.position());

        return Pair.newPair(processKey(key), new Text(resultValue));
    } else {
        return Pair.newPair(processKey(key), value);
    }
}

From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java

License:Apache License

/**
 * Re-encode with measures in Object[] format.
 * @param key//from  w  w  w.  j a v  a2s  .c  o m
 * @param value
 * @return
 * @throws IOException
 */
public Pair<Text, Object[]> reEncode2(Text key, Text value) throws IOException {
    if (initialized == false) {
        throw new IllegalStateException("Not initialized");
    }

    Object[] measureObjs = new Object[measureDescs.size()];
    codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs);
    // re-encode measures if dictionary is used
    if (dictMeasures.size() > 0) {
        for (Pair<Integer, MeasureIngester> pair : dictMeasures) {
            int i = pair.getFirst();
            MeasureIngester ingester = pair.getSecond();
            measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts,
                    newDicts);
        }

        ByteBuffer valueBuf = codec.encode(measureObjs);
        byte[] resultValue = new byte[valueBuf.position()];
        System.arraycopy(valueBuf.array(), 0, resultValue, 0, valueBuf.position());

    }
    return Pair.newPair(processKey(key), measureObjs);
}

From source file:org.apache.kylin.engine.mr.steps.SegmentReEncoder.java

License:Apache License

private Text processKey(Text key) throws IOException {
    long cuboidID = rowKeySplitter.split(key.getBytes());
    Cuboid cuboid = Cuboid.findForMandatory(cubeDesc, cuboidID);
    RowKeyEncoder rowkeyEncoder = rowKeyEncoderProvider.getRowkeyEncoder(cuboid);

    ByteArray[] splittedByteses = rowKeySplitter.getSplitBuffers();
    int bufOffset = 0;
    int bodySplitOffset = rowKeySplitter.getBodySplitOffset();

    for (int i = 0; i < cuboid.getColumns().size(); ++i) {
        int useSplit = i + bodySplitOffset;
        TblColRef col = cuboid.getColumns().get(i);

        if (cubeDesc.getRowkey().isUseDictionary(col)) {
            // if dictionary on fact table column, needs rewrite
            DictionaryManager dictMgr = DictionaryManager.getInstance(kylinConfig);
            Dictionary<String> mergedDict = dictMgr.getDictionary(mergedSeg.getDictResPath(col));

            // handle the dict of all merged segments is null
            if (mergedDict == null) {
                continue;
            }/*from   w  w w  .  j  a  v  a 2s . c o  m*/

            Dictionary<String> sourceDict;
            // handle the column that all records is null
            if (mergingSeg.getDictionary(col) == null) {
                BytesUtil.writeUnsigned(mergedDict.nullId(), newKeyBodyBuf, bufOffset,
                        mergedDict.getSizeOfId());
                bufOffset += mergedDict.getSizeOfId();
                continue;
            } else {
                sourceDict = dictMgr.getDictionary(mergingSeg.getDictResPath(col));
            }

            while (sourceDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || //
                    mergedDict.getSizeOfValue() > newKeyBodyBuf.length - bufOffset || //
                    mergedDict.getSizeOfId() > newKeyBodyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBodyBuf;
                newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length);
            }

            int idInSourceDict = BytesUtil.readUnsigned(splittedByteses[useSplit].array(),
                    splittedByteses[useSplit].offset(), splittedByteses[useSplit].length());
            int idInMergedDict;

            //int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBodyBuf, bufOffset);
            String v = sourceDict.getValueFromId(idInSourceDict);
            if (v == null) {
                idInMergedDict = mergedDict.nullId();
            } else {
                idInMergedDict = mergedDict.getIdFromValue(v);
            }

            BytesUtil.writeUnsigned(idInMergedDict, newKeyBodyBuf, bufOffset, mergedDict.getSizeOfId());
            bufOffset += mergedDict.getSizeOfId();
        } else {
            // keep as it is
            while (splittedByteses[useSplit].length() > newKeyBodyBuf.length - bufOffset) {
                byte[] oldBuf = newKeyBodyBuf;
                newKeyBodyBuf = new byte[2 * newKeyBodyBuf.length];
                System.arraycopy(oldBuf, 0, newKeyBodyBuf, 0, oldBuf.length);
            }

            System.arraycopy(splittedByteses[useSplit].array(), splittedByteses[useSplit].offset(),
                    newKeyBodyBuf, bufOffset, splittedByteses[useSplit].length());
            bufOffset += splittedByteses[useSplit].length();
        }
    }

    int fullKeySize = rowkeyEncoder.getBytesLength();
    while (newKeyBuf.array().length < fullKeySize) {
        newKeyBuf = new ByteArray(newKeyBuf.length() * 2);
    }
    newKeyBuf.setLength(fullKeySize);

    rowkeyEncoder.encode(new ByteArray(newKeyBodyBuf, 0, bufOffset), newKeyBuf);

    byte[] resultKey = new byte[fullKeySize];
    System.arraycopy(newKeyBuf.array(), 0, resultKey, 0, fullKeySize);

    return new Text(resultKey);
}

From source file:org.apache.kylin.engine.mr.steps.SelfDefineSortableKey.java

License:Apache License

public void init(Text key, byte typeId) {
    this.typeId = typeId;
    this.rawKey = key;
    if (isNumberFamily()) {
        String valueStr = new String(key.getBytes(), 1, key.getLength() - 1);
        if (isIntegerFamily()) {
            this.keyInObj = Long.parseLong(valueStr);
        } else {/*from  ww  w . j  ava  2  s  .  c  om*/
            this.keyInObj = Double.parseDouble(valueStr);
        }
    } else {
        this.keyInObj = key;
    }
}

From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryMapper.java

License:Apache License

@Override
public void doMap(NullWritable key, Text value, Context context) throws IOException, InterruptedException {
    tmpBuf.clear();/*from ww w  .  j a va 2  s .  c  om*/
    int size = value.getLength() + 1;
    if (size >= tmpBuf.capacity()) {
        tmpBuf = ByteBuffer.allocate(countNewSize(tmpBuf.capacity(), size));
    }
    tmpBuf.put(Bytes.toBytes(index)[3]);
    tmpBuf.put(value.getBytes(), 0, value.getLength());
    outputKey.set(tmpBuf.array(), 0, tmpBuf.position());

    sortableKey.init(outputKey, type);
    context.write(sortableKey, NullWritable.get());
}