Example usage for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes()

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:org.apache.hama.bsp.TestBSPMessageBundle.java

License:Apache License

public void testSerializationDeserialization() throws IOException {
    BSPMessageBundle<BytesWritable> bundle = new BSPMessageBundle<BytesWritable>();
    BytesWritable[] testMessages = new BytesWritable[16];
    for (int i = 0; i < testMessages.length; ++i) {
        // Create a one byte tag containing the number of the message.
        byte[] tag = new byte[1];
        tag[0] = (byte) i;
        // Create a four bytes data part containing serialized number of the
        // message.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        baos.write(i);//from  w  ww.  j a v  a 2 s . co m
        baos.close();
        byte[] data = baos.toByteArray();
        BytesWritable msg = new BytesWritable();
        msg.set(data, 0, data.length);
        testMessages[i] = msg;
        bundle.addMessage(testMessages[i]);
    }

    // Serialize it.
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    bundle.write(new DataOutputStream(baos));
    baos.close();
    // Deserialize it.
    BSPMessageBundle<BytesWritable> readBundle = new BSPMessageBundle<BytesWritable>();
    readBundle.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
    // Check contents.
    int messageNumber = 0;

    Iterator<BytesWritable> it = readBundle.iterator();
    while (it.hasNext()) {
        BytesWritable byteMessage = it.next();

        assertTrue(Arrays.equals(testMessages[messageNumber].getBytes(), byteMessage.getBytes()));
        ++messageNumber;
    }

    assertEquals(testMessages.length, messageNumber);
}

From source file:org.apache.hama.pipes.BinaryProtocol.java

License:Apache License

/**
 * Write the given object to the stream. If it is a Text or BytesWritable,
 * write it directly. Otherwise, write it to a buffer and then write the
 * length and data to the stream./*  w w  w .j  a va  2  s .com*/
 * 
 * @param obj the object to write
 * @throws IOException
 */
protected void writeObject(Writable obj) throws IOException {
    // For Text and BytesWritable, encode them directly, so that they end up
    // in C++ as the natural translations.
    if (obj instanceof Text) {
        Text t = (Text) obj;
        int len = t.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(t.getBytes(), 0, len);
    } else if (obj instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) obj;
        int len = b.getLength();
        WritableUtils.writeVInt(stream, len);
        stream.write(b.getBytes(), 0, len);
    } else {
        buffer.reset();
        obj.write(buffer);
        int length = buffer.getLength();
        WritableUtils.writeVInt(stream, length);
        stream.write(buffer.getData(), 0, length);
    }
}

From source file:org.apache.hama.pipes.protocol.BinaryProtocol.java

License:Apache License

/**
 * Write the given object to the stream. If it is a IntWritable, LongWritable,
 * FloatWritable, DoubleWritable, Text or BytesWritable, write it directly.
 * Otherwise, write it to a buffer and then write the length and data to the
 * stream.//from ww w  . ja  va 2  s  .c o  m
 * 
 * @param obj the object to write
 * @throws IOException
 */
protected void writeObject(Writable obj) throws IOException {
    // For basic types IntWritable, LongWritable, Text and BytesWritable,
    // encode them directly, so that they end up
    // in C++ as the natural translations.
    if (obj instanceof Text) {
        Text t = (Text) obj;
        int len = t.getLength();
        WritableUtils.writeVInt(this.outStream, len);
        this.outStream.write(t.getBytes(), 0, len);

    } else if (obj instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) obj;
        int len = b.getLength();
        WritableUtils.writeVInt(this.outStream, len);
        this.outStream.write(b.getBytes(), 0, len);

    } else if (obj instanceof IntWritable) {
        WritableUtils.writeVInt(this.outStream, ((IntWritable) obj).get());

    } else if (obj instanceof LongWritable) {
        WritableUtils.writeVLong(this.outStream, ((LongWritable) obj).get());

    } else {
        // Note: FloatWritable and DoubleWritable are written here
        obj.write(this.outStream);
    }
}

From source file:org.apache.hawq.pxf.plugins.hive.HiveResolver.java

License:Apache License

private void resolvePrimitive(Object o, PrimitiveObjectInspector oi, List<OneField> record, boolean toFlatten)
        throws IOException {
    Object val;
    switch (oi.getPrimitiveCategory()) {
    case BOOLEAN: {
        val = (o != null) ? ((BooleanObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, BOOLEAN, val);
        break;//from  ww  w . j ava2s. co  m
    }
    case SHORT: {
        val = (o != null) ? ((ShortObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, SMALLINT, val);
        break;
    }
    case INT: {
        val = (o != null) ? ((IntObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, INTEGER, val);
        break;
    }
    case LONG: {
        val = (o != null) ? ((LongObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, BIGINT, val);
        break;
    }
    case FLOAT: {
        val = (o != null) ? ((FloatObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, REAL, val);
        break;
    }
    case DOUBLE: {
        val = (o != null) ? ((DoubleObjectInspector) oi).get(o) : null;
        addOneFieldToRecord(record, FLOAT8, val);
        break;
    }
    case DECIMAL: {
        String sVal = null;
        if (o != null) {
            HiveDecimal hd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
            if (hd != null) {
                BigDecimal bd = hd.bigDecimalValue();
                sVal = bd.toString();
            }
        }
        addOneFieldToRecord(record, NUMERIC, sVal);
        break;
    }
    case STRING: {
        val = (o != null) ? ((StringObjectInspector) oi).getPrimitiveJavaObject(o) : null;
        addOneFieldToRecord(record, TEXT, toFlatten ? String.format("\"%s\"", val) : val);
        break;
    }
    case VARCHAR:
        val = (o != null) ? ((HiveVarcharObjectInspector) oi).getPrimitiveJavaObject(o) : null;
        addOneFieldToRecord(record, VARCHAR, toFlatten ? String.format("\"%s\"", val) : val);
        break;
    case CHAR:
        val = (o != null) ? ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o) : null;
        addOneFieldToRecord(record, BPCHAR, toFlatten ? String.format("\"%s\"", val) : val);
        break;
    case BINARY: {
        byte[] toEncode = null;
        if (o != null) {
            BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
            toEncode = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength());
        }
        addOneFieldToRecord(record, BYTEA, toEncode);
        break;
    }
    case TIMESTAMP: {
        val = (o != null) ? ((TimestampObjectInspector) oi).getPrimitiveJavaObject(o) : null;
        addOneFieldToRecord(record, TIMESTAMP, val);
        break;
    }
    case DATE:
        val = (o != null) ? ((DateObjectInspector) oi).getPrimitiveJavaObject(o) : null;
        addOneFieldToRecord(record, DATE, val);
        break;
    case BYTE: { /* TINYINT */
        val = (o != null) ? new Short(((ByteObjectInspector) oi).get(o)) : null;
        addOneFieldToRecord(record, SMALLINT, val);
        break;
    }
    default: {
        throw new UnsupportedTypeException(
                oi.getTypeName() + " conversion is not supported by " + getClass().getSimpleName());
    }
    }
}

From source file:org.apache.ignite.hadoop.io.BytesWritablePartiallyRawComparator.java

License:Apache License

/** {@inheritDoc} */
@Override/*from w  w w  .j  a  v a  2 s .  c  om*/
public int compare(BytesWritable val1, long val2Ptr, int val2Len) {
    return HadoopUtils.compareBytes(val1.getBytes(), val1.getLength(), val2Ptr + LEN_BYTES,
            val2Len - LEN_BYTES);
}

From source file:org.apache.kylin.engine.mr.common.CubeStatsReader.java

License:Apache License

public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
    ResourceStore store = ResourceStore.getStore(kylinConfig);
    cuboidScheduler = new CuboidScheduler(cubeSegment.getCubeDesc());
    String statsKey = cubeSegment.getStatisticsResourcePath();
    File tmpSeqFile = writeTmpSeqFile(store.getResource(statsKey).inputStream);
    Reader reader = null;//  ww  w  .j a v a 2s .c om

    try {
        Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();

        Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath()));
        Option seqInput = SequenceFile.Reader.file(path);
        reader = new SequenceFile.Reader(hadoopConf, seqInput);

        int percentage = 100;
        int mapperNumber = 0;
        double mapperOverlapRatio = 0;
        Map<Long, HLLCounter> counterMap = Maps.newHashMap();

        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }

        this.seg = cubeSegment;
        this.samplingPercentage = percentage;
        this.mapperNumberOfFirstBuild = mapperNumber;
        this.mapperOverlapRatioOfFirstBuild = mapperOverlapRatio;
        this.cuboidRowEstimatesHLL = counterMap;

    } finally {
        IOUtils.closeStream(reader);
        tmpSeqFile.delete();
    }
}

From source file:org.apache.kylin.engine.mr.steps.CreateDictionaryJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OPTION_CUBE_NAME);
    options.addOption(OPTION_SEGMENT_ID);
    options.addOption(OPTION_INPUT_PATH);
    parseOptions(options, args);//from ww w .  ja  va2 s . c  om

    final String cubeName = getOptionValue(OPTION_CUBE_NAME);
    final String segmentID = getOptionValue(OPTION_SEGMENT_ID);
    final String factColumnsInputPath = getOptionValue(OPTION_INPUT_PATH);

    final KylinConfig config = KylinConfig.getInstanceFromEnv();

    DictionaryGeneratorCLI.processSegment(config, cubeName, segmentID, new DistinctColumnValuesProvider() {
        @Override
        public IReadableTable getDistinctValuesFor(TblColRef col) {
            return new SortedColumnDFSFile(factColumnsInputPath + "/" + col.getIdentity(), col.getType());
        }
    }, new DictionaryProvider() {

        @Override
        public Dictionary<String> getDictionary(TblColRef col) throws IOException {
            Path colDir = new Path(factColumnsInputPath, col.getIdentity());
            FileSystem fs = HadoopUtil.getWorkingFileSystem();

            Path dictFile = HadoopUtil.getFilterOnlyPath(fs, colDir,
                    col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX);
            if (dictFile == null) {
                logger.info("Dict for '" + col.getName() + "' not pre-built.");
                return null;
            }

            try (SequenceFile.Reader reader = new SequenceFile.Reader(HadoopUtil.getCurrentConfiguration(),
                    SequenceFile.Reader.file(dictFile))) {
                NullWritable key = NullWritable.get();
                BytesWritable value = new BytesWritable();
                reader.next(key, value);

                ByteBuffer buffer = new ByteArray(value.getBytes()).asBuffer();
                try (DataInputStream is = new DataInputStream(new ByteBufferBackedInputStream(buffer))) {
                    String dictClassName = is.readUTF();
                    Dictionary<String> dict = (Dictionary<String>) ClassUtil.newInstance(dictClassName);
                    dict.readFields(is);
                    logger.info("DictionaryProvider read dict from file: " + dictFile);
                    return dict;
                }
            }
        }
    });

    return 0;
}

From source file:org.apache.kylin.engine.mr.steps.MergeDictionaryMapper.java

License:Apache License

@Override
protected void doMap(IntWritable key, NullWritable value, Context context)
        throws IOException, InterruptedException {

    int index = key.get();

    if (index < tblColRefs.length) {
        // merge dictionary
        TblColRef col = tblColRefs[index];
        List<DictionaryInfo> dictInfos = Lists.newArrayList();
        for (CubeSegment segment : mergingSegments) {
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                }//from  w ww. ja v  a  2  s.  com
            }
        }

        DictionaryInfo mergedDictInfo = dictMgr.mergeDictionary(dictInfos);
        String tblCol = col.getTableAlias() + ":" + col.getName();
        String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath();

        context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath));

    } else {
        // merge statistics
        KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(
                new SerializableConfiguration(context.getConfiguration()),
                context.getConfiguration().get(BatchConstants.ARG_META_URL));

        final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
        final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID);
        final String statOutputPath = context.getConfiguration()
                .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt());
        CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

        logger.info("Statistics output path: {}", statOutputPath);

        CubeSegment newSegment = cubeInstance.getSegmentById(segmentId);
        ResourceStore rs = ResourceStore.getStore(kylinConfig);

        Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
        Configuration conf = null;
        int averageSamplingPercentage = 0;

        for (CubeSegment cubeSegment : mergingSegments) {
            String filePath = cubeSegment.getStatisticsResourcePath();
            InputStream is = rs.getResource(filePath).inputStream;
            File tempFile;
            FileOutputStream tempFileStream = null;

            try {
                tempFile = File.createTempFile(segmentId, ".seq");
                tempFileStream = new FileOutputStream(tempFile);
                org.apache.commons.io.IOUtils.copy(is, tempFileStream);
            } finally {
                IOUtils.closeStream(is);
                IOUtils.closeStream(tempFileStream);
            }

            FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath());
            SequenceFile.Reader reader = null;
            try {
                conf = HadoopUtil.getCurrentConfiguration();
                //noinspection deprecation
                reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
                LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);

                while (reader.next(keyW, valueW)) {
                    if (keyW.get() == 0L) {
                        // sampling percentage;
                        averageSamplingPercentage += Bytes.toInt(valueW.getBytes());
                    } else if (keyW.get() > 0) {
                        HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
                        ByteArray byteArray = new ByteArray(valueW.getBytes());
                        hll.readRegisters(byteArray.asBuffer());

                        if (cuboidHLLMap.get(keyW.get()) != null) {
                            cuboidHLLMap.get(keyW.get()).merge(hll);
                        } else {
                            cuboidHLLMap.put(keyW.get(), hll);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw e;
            } finally {
                IOUtils.closeStream(reader);
            }
        }

        averageSamplingPercentage = averageSamplingPercentage / mergingSegments.size();
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap,
                averageSamplingPercentage);
        Path statisticsFilePath = new Path(statOutputPath,
                BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);

        FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf);
        FSDataInputStream fis = fs.open(statisticsFilePath);

        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, fis, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(fis);
        }

        context.write(new IntWritable(-1), new Text(""));
    }
}

From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = cube.getConfig();

    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {/* ww w .  j  a  v  a 2  s  .  c  o  m*/

        int averageSamplingPercentage = 0;
        for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) {
            String fileKey = CubeSegment
                    .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId);
            InputStream is = rs.getResource(fileKey).inputStream;
            File tempFile = null;
            FileOutputStream tempFileStream = null;
            try {
                tempFile = File.createTempFile(segmentId, ".seq");
                tempFileStream = new FileOutputStream(tempFile);
                org.apache.commons.io.IOUtils.copy(is, tempFileStream);
            } finally {
                IOUtils.closeStream(is);
                IOUtils.closeStream(tempFileStream);
            }

            FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
                LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
                while (reader.next(key, value)) {
                    if (key.get() == 0L) {
                        // sampling percentage;
                        averageSamplingPercentage += Bytes.toInt(value.getBytes());
                    } else if (key.get() > 0) {
                        HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision());
                        ByteArray byteArray = new ByteArray(value.getBytes());
                        hll.readRegisters(byteArray.asBuffer());

                        if (cuboidHLLMap.get(key.get()) != null) {
                            cuboidHLLMap.get(key.get()).merge(hll);
                        } else {
                            cuboidHLLMap.put(key.get(), hll);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw e;
            } finally {
                IOUtils.closeStream(reader);
                if (tempFile != null)
                    tempFile.delete();
            }
        }
        averageSamplingPercentage = averageSamplingPercentage
                / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size();
        CubeStatsWriter.writeCuboidStatistics(conf,
                new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap,
                averageSamplingPercentage);
        Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()),
                BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf);
        FSDataInputStream is = fs.open(statisticsFilePath);
        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, is, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(is);
        }

        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}

From source file:org.apache.kylin.job.hadoop.cardinality.ColumnCardinalityReducer.java

License:Apache License

@Override
public void reduce(IntWritable key, Iterable<BytesWritable> values, Context context)
        throws IOException, InterruptedException {
    int skey = key.get();
    for (BytesWritable v : values) {
        ByteBuffer buffer = ByteBuffer.wrap(v.getBytes());
        HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter();
        hll.readRegisters(buffer);//from   w  ww.  jav a 2  s.  c  o m
        getHllc(skey).merge(hll);
        hll.clear();
    }
}