Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    counter++;//from   w  ww .j a  v a2s  . c o m
    if (counter % BatchConstants.COUNTER_MAX == 0) {
        logger.info("Handled " + counter + " records!");
    }

    try {
        bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
        intermediateTableDesc.sanityCheck(bytesSplitter);

        byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
        outputKey.set(rowKey, 0, rowKey.length);

        buildValue(bytesSplitter.getSplitBuffers());
        outputValue.set(valueBuf.array(), 0, valueBuf.position());

        context.write(outputKey, outputValue);
    } catch (Exception ex) {
        handleErrorRecord(bytesSplitter, ex);
    }
}

From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapperTest.java

License:Apache License

@Test
public void testMapperWithHeader() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"),
            new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.331"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());//from  w ww .  j a  v  a  2  s  . co  m
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]",
            decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33",
            1);
}

From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapperTest.java

License:Apache License

@Test
public void testMapperWithNull() throws Exception {
    String cubeName = "test_kylin_cube_with_slr_1_new_segment";
    String segmentName = "20130331080000_20131212080000";
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
    mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
    // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL,
    // metadata);
    mapDriver.withInput(new Text("key"),
            new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N\\N"));
    List<Pair<Text, Text>> result = mapDriver.run();

    CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig());
    CubeInstance cube = cubeMgr.getCube(cubeName);

    assertEquals(1, result.size());/* w ww .j a va 2s.  c  o m*/
    Text rowkey = result.get(0).getFirst();
    byte[] key = rowkey.getBytes();
    byte[] header = Bytes.head(key, 26);
    byte[] sellerId = Bytes.tail(header, 18);
    byte[] cuboidId = Bytes.head(header, 8);
    byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26);

    RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment());
    decoder.decode(key);
    assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]",
            decoder.getValues().toString());

    assertTrue(Bytes.toString(sellerId).startsWith("123456789"));
    assertEquals(511, Bytes.toLong(cuboidId));
    assertEquals(22, restKey.length);

    verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0", 1L);
}

From source file:com.kylinolap.job.hadoop.cube.CubeHFileMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    outputKey.set(key.getBytes(), 0, key.getLength());
    KeyValue outputValue;//  www. j a v  a  2  s  . co m

    int n = keyValueCreators.size();
    if (n == 1 && keyValueCreators.get(0).isFullCopy) { // shortcut for
                                                        // simple full copy

        outputValue = keyValueCreators.get(0).create(key, value.getBytes(), 0, value.getLength());
        context.write(outputKey, outputValue);

    } else { // normal (complex) case that distributes measures to multiple
             // HBase columns

        inputCodec.decode(value, inputMeasures);

        for (int i = 0; i < n; i++) {
            outputValue = keyValueCreators.get(i).create(key, inputMeasures);
            context.write(outputKey, outputValue);
        }
    }
}

From source file:com.kylinolap.job.hadoop.cube.CubeHFileMapper2Test.java

License:Apache License

@Test
public void testBasic() throws Exception {

    Configuration hconf = new Configuration();
    Context context = MockupMapContext.create(hconf, this.getTestConfig().getMetadataUrl(), cubeName, outKV);

    CubeHFileMapper mapper = new CubeHFileMapper();
    mapper.setup(context);/*w  w w. jav a 2s.  com*/

    Text key = new Text("not important");
    Text value = new Text(new byte[] { 2, 2, 51, -79, 1 });

    mapper.map(key, value, context);

    ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0];
    KeyValue outValue = (KeyValue) outKV[1];

    assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(),
            outKey.getLength()) == 0);

    assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(),
            outValue.getValueOffset(), outValue.getValueLength()) == 0);
}

From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsCombiner.java

License:Apache License

@Override
public void reduce(ShortWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    HashSet<ByteArray> set = new HashSet<ByteArray>();
    for (Text textValue : values) {
        ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength()));
        set.add(value);//from  ww w.  j av a 2s.co  m
    }

    for (ByteArray value : set) {
        outputValue.set(value.data);
        context.write(key, outputValue);
    }
}

From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {

    try {//from w ww  .ja  va 2  s  . co  m
        bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);
        intermediateTableDesc.sanityCheck(bytesSplitter);
        SplittedBytes[] splitBuffers = bytesSplitter.getSplitBuffers();

        int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
        for (int i : factDictCols) {
            outputKey.set((short) i);
            SplittedBytes bytes = splitBuffers[flatTableIndexes[i]];
            outputValue.set(bytes.value, 0, bytes.length);
            context.write(outputKey, outputValue);
        }
    } catch (Exception ex) {
        handleErrorRecord(bytesSplitter, ex);
    }

}

From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsReducer.java

License:Apache License

@Override
public void reduce(ShortWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    TblColRef col = columnList.get(key.get());

    HashSet<ByteArray> set = new HashSet<ByteArray>();
    for (Text textValue : values) {
        ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength()));
        set.add(value);//from   ww  w .j a  va2  s.co  m
    }

    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get(BatchConstants.OUTPUT_PATH);
    FSDataOutputStream out = fs.create(new Path(outputPath, col.getName()));

    try {
        for (ByteArray value : set) {
            out.write(value.data);
            out.write('\n');
        }
    } finally {
        out.close();
    }

}

From source file:com.kylinolap.job.hadoop.cube.NDCuboidMapper.java

License:Apache License

@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    long cuboidId = rowKeySplitter.split(key.getBytes(), key.getLength());
    Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId);

    Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId);

    // if still empty or null
    if (myChildren == null || myChildren.size() == 0) {
        context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Skipped records").increment(1L);
        skipCounter++;/*from  w ww . j  ava 2s  .  c o m*/
        if (skipCounter % BatchConstants.COUNTER_MAX == 0) {
            logger.info("Skipped " + skipCounter + " records!");
        }
        return;
    }

    context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Processed records").increment(1L);

    handleCounter++;
    if (handleCounter % BatchConstants.COUNTER_MAX == 0) {
        logger.info("Handled " + handleCounter + " records!");
    }

    for (Long child : myChildren) {
        Cuboid childCuboid = Cuboid.findById(cubeDesc, child);
        int keyLength = buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers());
        outputKey.set(keyBuf, 0, keyLength);
        context.write(outputKey, value);
    }

}

From source file:com.kylinolap.job.hadoop.cube.NewBaseCuboidMapper.java

License:Apache License

@Override
public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException {
    // combining the hive table flattening logic into base cuboid building.
    // the input of this mapper is the fact table rows

    counter++;/*from ww w .  jav  a2  s.  co  m*/
    if (counter % BatchConstants.COUNTER_MAX == 0) {
        logger.info("Handled " + counter + " records!");
    }

    if (!byteRowDelimiterInferred)
        byteRowDelimiter = bytesSplitter.inferByteRowDelimiter(value.getBytes(), value.getLength(),
                factTableDesc.getColumns().length);

    bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter);

    try {
        byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers());
        if (rowKey == null)
            return;// skip this fact table row

        outputKey.set(rowKey, 0, rowKey.length);

        buildValue(bytesSplitter.getSplitBuffers());
        outputValue.set(valueBuf.array(), 0, valueBuf.position());

        context.write(outputKey, outputValue);

    } catch (Throwable t) {
        logger.error("", t);
        context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Error records").increment(1L);
        return;
    }
}