List of usage examples for org.apache.hadoop.io BytesWritable getBytes
@Override public byte[] getBytes()
From source file:org.apache.hama.bsp.TestBSPMessageBundle.java
License:Apache License
public void testSerializationDeserialization() throws IOException { BSPMessageBundle<BytesWritable> bundle = new BSPMessageBundle<BytesWritable>(); BytesWritable[] testMessages = new BytesWritable[16]; for (int i = 0; i < testMessages.length; ++i) { // Create a one byte tag containing the number of the message. byte[] tag = new byte[1]; tag[0] = (byte) i; // Create a four bytes data part containing serialized number of the // message. ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(i);//from w ww. j a v a 2 s . co m baos.close(); byte[] data = baos.toByteArray(); BytesWritable msg = new BytesWritable(); msg.set(data, 0, data.length); testMessages[i] = msg; bundle.addMessage(testMessages[i]); } // Serialize it. ByteArrayOutputStream baos = new ByteArrayOutputStream(); bundle.write(new DataOutputStream(baos)); baos.close(); // Deserialize it. BSPMessageBundle<BytesWritable> readBundle = new BSPMessageBundle<BytesWritable>(); readBundle.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray()))); // Check contents. int messageNumber = 0; Iterator<BytesWritable> it = readBundle.iterator(); while (it.hasNext()) { BytesWritable byteMessage = it.next(); assertTrue(Arrays.equals(testMessages[messageNumber].getBytes(), byteMessage.getBytes())); ++messageNumber; } assertEquals(testMessages.length, messageNumber); }
From source file:org.apache.hama.pipes.BinaryProtocol.java
License:Apache License
/** * Write the given object to the stream. If it is a Text or BytesWritable, * write it directly. Otherwise, write it to a buffer and then write the * length and data to the stream./* w w w .j a va 2 s .com*/ * * @param obj the object to write * @throws IOException */ protected void writeObject(Writable obj) throws IOException { // For Text and BytesWritable, encode them directly, so that they end up // in C++ as the natural translations. if (obj instanceof Text) { Text t = (Text) obj; int len = t.getLength(); WritableUtils.writeVInt(stream, len); stream.write(t.getBytes(), 0, len); } else if (obj instanceof BytesWritable) { BytesWritable b = (BytesWritable) obj; int len = b.getLength(); WritableUtils.writeVInt(stream, len); stream.write(b.getBytes(), 0, len); } else { buffer.reset(); obj.write(buffer); int length = buffer.getLength(); WritableUtils.writeVInt(stream, length); stream.write(buffer.getData(), 0, length); } }
From source file:org.apache.hama.pipes.protocol.BinaryProtocol.java
License:Apache License
/** * Write the given object to the stream. If it is a IntWritable, LongWritable, * FloatWritable, DoubleWritable, Text or BytesWritable, write it directly. * Otherwise, write it to a buffer and then write the length and data to the * stream.//from ww w . ja va 2 s .c o m * * @param obj the object to write * @throws IOException */ protected void writeObject(Writable obj) throws IOException { // For basic types IntWritable, LongWritable, Text and BytesWritable, // encode them directly, so that they end up // in C++ as the natural translations. if (obj instanceof Text) { Text t = (Text) obj; int len = t.getLength(); WritableUtils.writeVInt(this.outStream, len); this.outStream.write(t.getBytes(), 0, len); } else if (obj instanceof BytesWritable) { BytesWritable b = (BytesWritable) obj; int len = b.getLength(); WritableUtils.writeVInt(this.outStream, len); this.outStream.write(b.getBytes(), 0, len); } else if (obj instanceof IntWritable) { WritableUtils.writeVInt(this.outStream, ((IntWritable) obj).get()); } else if (obj instanceof LongWritable) { WritableUtils.writeVLong(this.outStream, ((LongWritable) obj).get()); } else { // Note: FloatWritable and DoubleWritable are written here obj.write(this.outStream); } }
From source file:org.apache.hawq.pxf.plugins.hive.HiveResolver.java
License:Apache License
private void resolvePrimitive(Object o, PrimitiveObjectInspector oi, List<OneField> record, boolean toFlatten) throws IOException { Object val; switch (oi.getPrimitiveCategory()) { case BOOLEAN: { val = (o != null) ? ((BooleanObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, BOOLEAN, val); break;//from ww w . j ava2s. co m } case SHORT: { val = (o != null) ? ((ShortObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, SMALLINT, val); break; } case INT: { val = (o != null) ? ((IntObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, INTEGER, val); break; } case LONG: { val = (o != null) ? ((LongObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, BIGINT, val); break; } case FLOAT: { val = (o != null) ? ((FloatObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, REAL, val); break; } case DOUBLE: { val = (o != null) ? ((DoubleObjectInspector) oi).get(o) : null; addOneFieldToRecord(record, FLOAT8, val); break; } case DECIMAL: { String sVal = null; if (o != null) { HiveDecimal hd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); if (hd != null) { BigDecimal bd = hd.bigDecimalValue(); sVal = bd.toString(); } } addOneFieldToRecord(record, NUMERIC, sVal); break; } case STRING: { val = (o != null) ? ((StringObjectInspector) oi).getPrimitiveJavaObject(o) : null; addOneFieldToRecord(record, TEXT, toFlatten ? String.format("\"%s\"", val) : val); break; } case VARCHAR: val = (o != null) ? ((HiveVarcharObjectInspector) oi).getPrimitiveJavaObject(o) : null; addOneFieldToRecord(record, VARCHAR, toFlatten ? String.format("\"%s\"", val) : val); break; case CHAR: val = (o != null) ? ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o) : null; addOneFieldToRecord(record, BPCHAR, toFlatten ? String.format("\"%s\"", val) : val); break; case BINARY: { byte[] toEncode = null; if (o != null) { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength()); } addOneFieldToRecord(record, BYTEA, toEncode); break; } case TIMESTAMP: { val = (o != null) ? ((TimestampObjectInspector) oi).getPrimitiveJavaObject(o) : null; addOneFieldToRecord(record, TIMESTAMP, val); break; } case DATE: val = (o != null) ? ((DateObjectInspector) oi).getPrimitiveJavaObject(o) : null; addOneFieldToRecord(record, DATE, val); break; case BYTE: { /* TINYINT */ val = (o != null) ? new Short(((ByteObjectInspector) oi).get(o)) : null; addOneFieldToRecord(record, SMALLINT, val); break; } default: { throw new UnsupportedTypeException( oi.getTypeName() + " conversion is not supported by " + getClass().getSimpleName()); } } }
From source file:org.apache.ignite.hadoop.io.BytesWritablePartiallyRawComparator.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w w w .j a v a 2 s . c om*/ public int compare(BytesWritable val1, long val2Ptr, int val2Len) { return HadoopUtils.compareBytes(val1.getBytes(), val1.getLength(), val2Ptr + LEN_BYTES, val2Len - LEN_BYTES); }
From source file:org.apache.kylin.engine.mr.common.CubeStatsReader.java
License:Apache License
public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException { ResourceStore store = ResourceStore.getStore(kylinConfig); cuboidScheduler = new CuboidScheduler(cubeSegment.getCubeDesc()); String statsKey = cubeSegment.getStatisticsResourcePath(); File tmpSeqFile = writeTmpSeqFile(store.getResource(statsKey).inputStream); Reader reader = null;// ww w .j a v a 2s .c om try { Configuration hadoopConf = HadoopUtil.getCurrentConfiguration(); Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath())); Option seqInput = SequenceFile.Reader.file(path); reader = new SequenceFile.Reader(hadoopConf, seqInput); int percentage = 100; int mapperNumber = 0; double mapperOverlapRatio = 0; Map<Long, HLLCounter> counterMap = Maps.newHashMap(); LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf); while (reader.next(key, value)) { if (key.get() == 0L) { percentage = Bytes.toInt(value.getBytes()); } else if (key.get() == -1) { mapperOverlapRatio = Bytes.toDouble(value.getBytes()); } else if (key.get() == -2) { mapperNumber = Bytes.toInt(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); counterMap.put(key.get(), hll); } } this.seg = cubeSegment; this.samplingPercentage = percentage; this.mapperNumberOfFirstBuild = mapperNumber; this.mapperOverlapRatioOfFirstBuild = mapperOverlapRatio; this.cuboidRowEstimatesHLL = counterMap; } finally { IOUtils.closeStream(reader); tmpSeqFile.delete(); } }
From source file:org.apache.kylin.engine.mr.steps.CreateDictionaryJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OPTION_CUBE_NAME); options.addOption(OPTION_SEGMENT_ID); options.addOption(OPTION_INPUT_PATH); parseOptions(options, args);//from ww w . ja va2 s . c om final String cubeName = getOptionValue(OPTION_CUBE_NAME); final String segmentID = getOptionValue(OPTION_SEGMENT_ID); final String factColumnsInputPath = getOptionValue(OPTION_INPUT_PATH); final KylinConfig config = KylinConfig.getInstanceFromEnv(); DictionaryGeneratorCLI.processSegment(config, cubeName, segmentID, new DistinctColumnValuesProvider() { @Override public IReadableTable getDistinctValuesFor(TblColRef col) { return new SortedColumnDFSFile(factColumnsInputPath + "/" + col.getIdentity(), col.getType()); } }, new DictionaryProvider() { @Override public Dictionary<String> getDictionary(TblColRef col) throws IOException { Path colDir = new Path(factColumnsInputPath, col.getIdentity()); FileSystem fs = HadoopUtil.getWorkingFileSystem(); Path dictFile = HadoopUtil.getFilterOnlyPath(fs, colDir, col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX); if (dictFile == null) { logger.info("Dict for '" + col.getName() + "' not pre-built."); return null; } try (SequenceFile.Reader reader = new SequenceFile.Reader(HadoopUtil.getCurrentConfiguration(), SequenceFile.Reader.file(dictFile))) { NullWritable key = NullWritable.get(); BytesWritable value = new BytesWritable(); reader.next(key, value); ByteBuffer buffer = new ByteArray(value.getBytes()).asBuffer(); try (DataInputStream is = new DataInputStream(new ByteBufferBackedInputStream(buffer))) { String dictClassName = is.readUTF(); Dictionary<String> dict = (Dictionary<String>) ClassUtil.newInstance(dictClassName); dict.readFields(is); logger.info("DictionaryProvider read dict from file: " + dictFile); return dict; } } } }); return 0; }
From source file:org.apache.kylin.engine.mr.steps.MergeDictionaryMapper.java
License:Apache License
@Override protected void doMap(IntWritable key, NullWritable value, Context context) throws IOException, InterruptedException { int index = key.get(); if (index < tblColRefs.length) { // merge dictionary TblColRef col = tblColRefs[index]; List<DictionaryInfo> dictInfos = Lists.newArrayList(); for (CubeSegment segment : mergingSegments) { if (segment.getDictResPath(col) != null) { DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); if (dictInfo != null && !dictInfos.contains(dictInfo)) { dictInfos.add(dictInfo); }//from w ww. ja v a 2 s. com } } DictionaryInfo mergedDictInfo = dictMgr.mergeDictionary(dictInfos); String tblCol = col.getTableAlias() + ":" + col.getName(); String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath(); context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath)); } else { // merge statistics KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs( new SerializableConfiguration(context.getConfiguration()), context.getConfiguration().get(BatchConstants.ARG_META_URL)); final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME); final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID); final String statOutputPath = context.getConfiguration() .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt()); CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName); logger.info("Statistics output path: {}", statOutputPath); CubeSegment newSegment = cubeInstance.getSegmentById(segmentId); ResourceStore rs = ResourceStore.getStore(kylinConfig); Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap(); Configuration conf = null; int averageSamplingPercentage = 0; for (CubeSegment cubeSegment : mergingSegments) { String filePath = cubeSegment.getStatisticsResourcePath(); InputStream is = rs.getResource(filePath).inputStream; File tempFile; FileOutputStream tempFileStream = null; try { tempFile = File.createTempFile(segmentId, ".seq"); tempFileStream = new FileOutputStream(tempFile); org.apache.commons.io.IOUtils.copy(is, tempFileStream); } finally { IOUtils.closeStream(is); IOUtils.closeStream(tempFileStream); } FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath()); SequenceFile.Reader reader = null; try { conf = HadoopUtil.getCurrentConfiguration(); //noinspection deprecation reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(keyW, valueW)) { if (keyW.get() == 0L) { // sampling percentage; averageSamplingPercentage += Bytes.toInt(valueW.getBytes()); } else if (keyW.get() > 0) { HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(valueW.getBytes()); hll.readRegisters(byteArray.asBuffer()); if (cuboidHLLMap.get(keyW.get()) != null) { cuboidHLLMap.get(keyW.get()).merge(hll); } else { cuboidHLLMap.put(keyW.get(), hll); } } } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } } averageSamplingPercentage = averageSamplingPercentage / mergingSegments.size(); CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(statOutputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf); FSDataInputStream fis = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, fis, System.currentTimeMillis()); } finally { IOUtils.closeStream(fis); } context.write(new IntWritable(-1), new Text("")); } }
From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java
License:Apache License
@Override @SuppressWarnings("deprecation") protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); try {/* ww w . j a v a 2 s . c o m*/ int averageSamplingPercentage = 0; for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) { String fileKey = CubeSegment .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId); InputStream is = rs.getResource(fileKey).inputStream; File tempFile = null; FileOutputStream tempFileStream = null; try { tempFile = File.createTempFile(segmentId, ".seq"); tempFileStream = new FileOutputStream(tempFile); org.apache.commons.io.IOUtils.copy(is, tempFileStream); } finally { IOUtils.closeStream(is); IOUtils.closeStream(tempFileStream); } FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath()); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { if (key.get() == 0L) { // sampling percentage; averageSamplingPercentage += Bytes.toInt(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); if (cuboidHLLMap.get(key.get()) != null) { cuboidHLLMap.get(key.get()).merge(hll); } else { cuboidHLLMap.put(key.get(), hll); } } } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); if (tempFile != null) tempFile.delete(); } } averageSamplingPercentage = averageSamplingPercentage / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size(); CubeStatsWriter.writeCuboidStatistics(conf, new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf); FSDataInputStream is = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, is, System.currentTimeMillis()); } finally { IOUtils.closeStream(is); } return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed"); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } }
From source file:org.apache.kylin.job.hadoop.cardinality.ColumnCardinalityReducer.java
License:Apache License
@Override public void reduce(IntWritable key, Iterable<BytesWritable> values, Context context) throws IOException, InterruptedException { int skey = key.get(); for (BytesWritable v : values) { ByteBuffer buffer = ByteBuffer.wrap(v.getBytes()); HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(); hll.readRegisters(buffer);//from w ww. jav a 2 s. c o m getHllc(skey).merge(hll); hll.clear(); } }