List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { counter++;//from w ww .j a v a2s . c o m if (counter % BatchConstants.COUNTER_MAX == 0) { logger.info("Handled " + counter + " records!"); } try { bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter); intermediateTableDesc.sanityCheck(bytesSplitter); byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers()); outputKey.set(rowKey, 0, rowKey.length); buildValue(bytesSplitter.getSplitBuffers()); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); } catch (Exception ex) { handleErrorRecord(bytesSplitter, ex); } }
From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapperTest.java
License:Apache License
@Test public void testMapperWithHeader() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrancesWomenAuction15123456789132.331")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size());//from w ww . j a v a 2 s . co m Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, Women, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "132.33", "132.33", "132.33", 1); }
From source file:com.kylinolap.job.hadoop.cube.BaseCuboidMapperTest.java
License:Apache License
@Test public void testMapperWithNull() throws Exception { String cubeName = "test_kylin_cube_with_slr_1_new_segment"; String segmentName = "20130331080000_20131212080000"; mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName); // mapDriver.getConfiguration().set(BatchConstants.CFG_METADATA_URL, // metadata); mapDriver.withInput(new Text("key"), new Text("2012-12-15118480Health & BeautyFragrances\\NAuction15123456789\\N\\N")); List<Pair<Text, Text>> result = mapDriver.run(); CubeManager cubeMgr = CubeManager.getInstance(this.getTestConfig()); CubeInstance cube = cubeMgr.getCube(cubeName); assertEquals(1, result.size());/* w ww .j a va 2s. c o m*/ Text rowkey = result.get(0).getFirst(); byte[] key = rowkey.getBytes(); byte[] header = Bytes.head(key, 26); byte[] sellerId = Bytes.tail(header, 18); byte[] cuboidId = Bytes.head(header, 8); byte[] restKey = Bytes.tail(key, rowkey.getLength() - 26); RowKeyDecoder decoder = new RowKeyDecoder(cube.getFirstSegment()); decoder.decode(key); assertEquals("[123456789, 2012-12-15, 11848, Health & Beauty, Fragrances, null, Auction, 0, 15]", decoder.getValues().toString()); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertEquals(22, restKey.length); verifyMeasures(cube.getDescriptor().getMeasures(), result.get(0).getSecond(), "0", "0", "0", 1L); }
From source file:com.kylinolap.job.hadoop.cube.CubeHFileMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { outputKey.set(key.getBytes(), 0, key.getLength()); KeyValue outputValue;// www. j a v a 2 s . co m int n = keyValueCreators.size(); if (n == 1 && keyValueCreators.get(0).isFullCopy) { // shortcut for // simple full copy outputValue = keyValueCreators.get(0).create(key, value.getBytes(), 0, value.getLength()); context.write(outputKey, outputValue); } else { // normal (complex) case that distributes measures to multiple // HBase columns inputCodec.decode(value, inputMeasures); for (int i = 0; i < n; i++) { outputValue = keyValueCreators.get(i).create(key, inputMeasures); context.write(outputKey, outputValue); } } }
From source file:com.kylinolap.job.hadoop.cube.CubeHFileMapper2Test.java
License:Apache License
@Test public void testBasic() throws Exception { Configuration hconf = new Configuration(); Context context = MockupMapContext.create(hconf, this.getTestConfig().getMetadataUrl(), cubeName, outKV); CubeHFileMapper mapper = new CubeHFileMapper(); mapper.setup(context);/*w w w. jav a 2s. com*/ Text key = new Text("not important"); Text value = new Text(new byte[] { 2, 2, 51, -79, 1 }); mapper.map(key, value, context); ImmutableBytesWritable outKey = (ImmutableBytesWritable) outKV[0]; KeyValue outValue = (KeyValue) outKV[1]; assertTrue(Bytes.compareTo(key.getBytes(), 0, key.getLength(), outKey.get(), outKey.getOffset(), outKey.getLength()) == 0); assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0); }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsCombiner.java
License:Apache License
@Override public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { HashSet<ByteArray> set = new HashSet<ByteArray>(); for (Text textValue : values) { ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength())); set.add(value);//from ww w. j av a 2s.co m } for (ByteArray value : set) { outputValue.set(value.data); context.write(key, outputValue); } }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { try {//from w ww .ja va 2 s . co m bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter); intermediateTableDesc.sanityCheck(bytesSplitter); SplittedBytes[] splitBuffers = bytesSplitter.getSplitBuffers(); int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); for (int i : factDictCols) { outputKey.set((short) i); SplittedBytes bytes = splitBuffers[flatTableIndexes[i]]; outputValue.set(bytes.value, 0, bytes.length); context.write(outputKey, outputValue); } } catch (Exception ex) { handleErrorRecord(bytesSplitter, ex); } }
From source file:com.kylinolap.job.hadoop.cube.FactDistinctColumnsReducer.java
License:Apache License
@Override public void reduce(ShortWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { TblColRef col = columnList.get(key.get()); HashSet<ByteArray> set = new HashSet<ByteArray>(); for (Text textValue : values) { ByteArray value = new ByteArray(Bytes.copy(textValue.getBytes(), 0, textValue.getLength())); set.add(value);//from ww w .j a va2 s.co m } Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get(BatchConstants.OUTPUT_PATH); FSDataOutputStream out = fs.create(new Path(outputPath, col.getName())); try { for (ByteArray value : set) { out.write(value.data); out.write('\n'); } } finally { out.close(); } }
From source file:com.kylinolap.job.hadoop.cube.NDCuboidMapper.java
License:Apache License
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { long cuboidId = rowKeySplitter.split(key.getBytes(), key.getLength()); Cuboid parentCuboid = Cuboid.findById(cubeDesc, cuboidId); Collection<Long> myChildren = cuboidScheduler.getSpanningCuboid(cuboidId); // if still empty or null if (myChildren == null || myChildren.size() == 0) { context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Skipped records").increment(1L); skipCounter++;/*from w ww . j ava 2s . c o m*/ if (skipCounter % BatchConstants.COUNTER_MAX == 0) { logger.info("Skipped " + skipCounter + " records!"); } return; } context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Processed records").increment(1L); handleCounter++; if (handleCounter % BatchConstants.COUNTER_MAX == 0) { logger.info("Handled " + handleCounter + " records!"); } for (Long child : myChildren) { Cuboid childCuboid = Cuboid.findById(cubeDesc, child); int keyLength = buildKey(parentCuboid, childCuboid, rowKeySplitter.getSplitBuffers()); outputKey.set(keyBuf, 0, keyLength); context.write(outputKey, value); } }
From source file:com.kylinolap.job.hadoop.cube.NewBaseCuboidMapper.java
License:Apache License
@Override public void map(KEYIN key, Text value, Context context) throws IOException, InterruptedException { // combining the hive table flattening logic into base cuboid building. // the input of this mapper is the fact table rows counter++;/*from ww w . jav a2 s. co m*/ if (counter % BatchConstants.COUNTER_MAX == 0) { logger.info("Handled " + counter + " records!"); } if (!byteRowDelimiterInferred) byteRowDelimiter = bytesSplitter.inferByteRowDelimiter(value.getBytes(), value.getLength(), factTableDesc.getColumns().length); bytesSplitter.split(value.getBytes(), value.getLength(), byteRowDelimiter); try { byte[] rowKey = buildKey(bytesSplitter.getSplitBuffers()); if (rowKey == null) return;// skip this fact table row outputKey.set(rowKey, 0, rowKey.length); buildValue(bytesSplitter.getSplitBuffers()); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); } catch (Throwable t) { logger.error("", t); context.getCounter(BatchConstants.MAPREDUCE_COUTNER_GROUP_NAME, "Error records").increment(1L); return; } }