Example usage for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get()

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:org.apache.hama.bsp.TestCheckpoint.java

License:Apache License

private static void checkSuperstepMsgCount(PeerSyncClient syncClient,
        @SuppressWarnings("rawtypes") BSPPeer bspTask, BSPJob job, long step, long count) {

    ArrayWritable writableVal = new ArrayWritable(LongWritable.class);

    boolean result = syncClient.getInformation(
            syncClient.constructKey(job.getJobID(), "checkpoint", "" + bspTask.getPeerIndex()), writableVal);

    assertTrue(result);/*  www  . j a  va 2s .  co m*/

    LongWritable superstepNo = (LongWritable) writableVal.get()[0];
    LongWritable msgCount = (LongWritable) writableVal.get()[1];

    assertEquals(step, superstepNo.get());
    assertEquals(count, msgCount.get());
}

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Read result from HDFS reduce output directory and return the results
 * @param outputDir where to read the data from. Expects the file to be {SequenceFile}
 * @param jobConf/*w w  w.j a  va  2  s  . com*/
 * @return
 * @throws IOException
 */
private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException {
    SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>();
    FileStatus[] files = fs.listStatus(outputDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });
    for (FileStatus status : files) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf);
        LongWritable timeStamp = new LongWritable();
        ReduceResult result = new ReduceResult();
        while (reader.next(timeStamp, result)) {
            LOG.info("Timestamp: " + timeStamp);
            LOG.info("ThreadCount: " + result.getThreadCount());
            LOG.info("Stats:\n" + result.getStatistics());
            LOG.info("Errors: " + result.getNumErrors());
            timeseriesResults.put(timeStamp.get(), result);
            timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading
            result = new ReduceResult();
        }
        reader.close();
    }
    return timeseriesResults;
}

From source file:org.apache.kudu.mapreduce.tools.ImportCsvMapper.java

License:Apache License

/**
 * Convert a line of CSV text into a Kudu Insert
 *//*ww  w.j a va 2 s  .  com*/
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        CsvParser.ParsedLine parsed = this.parser.parse(lineBytes, value.getLength());

        Insert insert = this.table.newInsert();
        PartialRow row = insert.getRow();
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            String colName = parsed.getColumnName(i);
            ColumnSchema col = this.schema.getColumn(colName);
            String colValue = Bytes.getString(parsed.getLineBytes(), parsed.getColumnOffset(i),
                    parsed.getColumnLength(i));
            switch (col.getType()) {
            case BOOL:
                row.addBoolean(colName, Boolean.parseBoolean(colValue));
                break;
            case INT8:
                row.addByte(colName, Byte.parseByte(colValue));
                break;
            case INT16:
                row.addShort(colName, Short.parseShort(colValue));
                break;
            case INT32:
                row.addInt(colName, Integer.parseInt(colValue));
                break;
            case INT64:
                row.addLong(colName, Long.parseLong(colValue));
                break;
            case STRING:
                row.addString(colName, colValue);
                break;
            case FLOAT:
                row.addFloat(colName, Float.parseFloat(colValue));
                break;
            case DOUBLE:
                row.addDouble(colName, Double.parseDouble(colValue));
                break;
            default:
                throw new IllegalArgumentException("Type " + col.getType() + " not recognized");
            }
        }
        context.write(NULL_KEY, insert);
    } catch (CsvParser.BadCsvLineException badLine) {
        if (this.skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            this.badLineCount.increment(1);
            return;
        } else {
            throw new IOException("Failing task because of a bad line", badLine);
        }
    } catch (IllegalArgumentException e) {
        if (this.skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage());
            this.badLineCount.increment(1);
            return;
        } else {
            throw new IOException("Failing task because of an illegal argument", e);
        }
    } catch (InterruptedException e) {
        throw new IOException("Failing task since it was interrupted", e);
    }
}

From source file:org.apache.kylin.engine.mr.common.CubeStatsReader.java

License:Apache License

public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
    ResourceStore store = ResourceStore.getStore(kylinConfig);
    cuboidScheduler = new CuboidScheduler(cubeSegment.getCubeDesc());
    String statsKey = cubeSegment.getStatisticsResourcePath();
    File tmpSeqFile = writeTmpSeqFile(store.getResource(statsKey).inputStream);
    Reader reader = null;/* w  ww  .j  a v a2 s. c  o m*/

    try {
        Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();

        Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath()));
        Option seqInput = SequenceFile.Reader.file(path);
        reader = new SequenceFile.Reader(hadoopConf, seqInput);

        int percentage = 100;
        int mapperNumber = 0;
        double mapperOverlapRatio = 0;
        Map<Long, HLLCounter> counterMap = Maps.newHashMap();

        LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
        BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
        while (reader.next(key, value)) {
            if (key.get() == 0L) {
                percentage = Bytes.toInt(value.getBytes());
            } else if (key.get() == -1) {
                mapperOverlapRatio = Bytes.toDouble(value.getBytes());
            } else if (key.get() == -2) {
                mapperNumber = Bytes.toInt(value.getBytes());
            } else if (key.get() > 0) {
                HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
                ByteArray byteArray = new ByteArray(value.getBytes());
                hll.readRegisters(byteArray.asBuffer());
                counterMap.put(key.get(), hll);
            }
        }

        this.seg = cubeSegment;
        this.samplingPercentage = percentage;
        this.mapperNumberOfFirstBuild = mapperNumber;
        this.mapperOverlapRatioOfFirstBuild = mapperOverlapRatio;
        this.cuboidRowEstimatesHLL = counterMap;

    } finally {
        IOUtils.closeStream(reader);
        tmpSeqFile.delete();
    }
}

From source file:org.apache.kylin.engine.mr.steps.MergeDictionaryMapper.java

License:Apache License

@Override
protected void doMap(IntWritable key, NullWritable value, Context context)
        throws IOException, InterruptedException {

    int index = key.get();

    if (index < tblColRefs.length) {
        // merge dictionary
        TblColRef col = tblColRefs[index];
        List<DictionaryInfo> dictInfos = Lists.newArrayList();
        for (CubeSegment segment : mergingSegments) {
            if (segment.getDictResPath(col) != null) {
                DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col));
                if (dictInfo != null && !dictInfos.contains(dictInfo)) {
                    dictInfos.add(dictInfo);
                }//from   ww  w . j  ava 2 s.  c o m
            }
        }

        DictionaryInfo mergedDictInfo = dictMgr.mergeDictionary(dictInfos);
        String tblCol = col.getTableAlias() + ":" + col.getName();
        String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath();

        context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath));

    } else {
        // merge statistics
        KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(
                new SerializableConfiguration(context.getConfiguration()),
                context.getConfiguration().get(BatchConstants.ARG_META_URL));

        final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
        final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID);
        final String statOutputPath = context.getConfiguration()
                .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt());
        CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);

        logger.info("Statistics output path: {}", statOutputPath);

        CubeSegment newSegment = cubeInstance.getSegmentById(segmentId);
        ResourceStore rs = ResourceStore.getStore(kylinConfig);

        Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
        Configuration conf = null;
        int averageSamplingPercentage = 0;

        for (CubeSegment cubeSegment : mergingSegments) {
            String filePath = cubeSegment.getStatisticsResourcePath();
            InputStream is = rs.getResource(filePath).inputStream;
            File tempFile;
            FileOutputStream tempFileStream = null;

            try {
                tempFile = File.createTempFile(segmentId, ".seq");
                tempFileStream = new FileOutputStream(tempFile);
                org.apache.commons.io.IOUtils.copy(is, tempFileStream);
            } finally {
                IOUtils.closeStream(is);
                IOUtils.closeStream(tempFileStream);
            }

            FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath());
            SequenceFile.Reader reader = null;
            try {
                conf = HadoopUtil.getCurrentConfiguration();
                //noinspection deprecation
                reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
                LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);

                while (reader.next(keyW, valueW)) {
                    if (keyW.get() == 0L) {
                        // sampling percentage;
                        averageSamplingPercentage += Bytes.toInt(valueW.getBytes());
                    } else if (keyW.get() > 0) {
                        HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision());
                        ByteArray byteArray = new ByteArray(valueW.getBytes());
                        hll.readRegisters(byteArray.asBuffer());

                        if (cuboidHLLMap.get(keyW.get()) != null) {
                            cuboidHLLMap.get(keyW.get()).merge(hll);
                        } else {
                            cuboidHLLMap.put(keyW.get(), hll);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw e;
            } finally {
                IOUtils.closeStream(reader);
            }
        }

        averageSamplingPercentage = averageSamplingPercentage / mergingSegments.size();
        CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap,
                averageSamplingPercentage);
        Path statisticsFilePath = new Path(statOutputPath,
                BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);

        FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf);
        FSDataInputStream fis = fs.open(statisticsFilePath);

        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, fis, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(fis);
        }

        context.write(new IntWritable(-1), new Text(""));
    }
}

From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = cube.getConfig();

    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {//from   w w  w.  j av a  2  s  .  c  o m

        int averageSamplingPercentage = 0;
        for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) {
            String fileKey = CubeSegment
                    .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId);
            InputStream is = rs.getResource(fileKey).inputStream;
            File tempFile = null;
            FileOutputStream tempFileStream = null;
            try {
                tempFile = File.createTempFile(segmentId, ".seq");
                tempFileStream = new FileOutputStream(tempFile);
                org.apache.commons.io.IOUtils.copy(is, tempFileStream);
            } finally {
                IOUtils.closeStream(is);
                IOUtils.closeStream(tempFileStream);
            }

            FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
                LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
                while (reader.next(key, value)) {
                    if (key.get() == 0L) {
                        // sampling percentage;
                        averageSamplingPercentage += Bytes.toInt(value.getBytes());
                    } else if (key.get() > 0) {
                        HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision());
                        ByteArray byteArray = new ByteArray(value.getBytes());
                        hll.readRegisters(byteArray.asBuffer());

                        if (cuboidHLLMap.get(key.get()) != null) {
                            cuboidHLLMap.get(key.get()).merge(hll);
                        } else {
                            cuboidHLLMap.put(key.get(), hll);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw e;
            } finally {
                IOUtils.closeStream(reader);
                if (tempFile != null)
                    tempFile.delete();
            }
        }
        averageSamplingPercentage = averageSamplingPercentage
                / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size();
        CubeStatsWriter.writeCuboidStatistics(conf,
                new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap,
                averageSamplingPercentage);
        Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()),
                BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf);
        FSDataInputStream is = fs.open(statisticsFilePath);
        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, is, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(is);
        }

        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}

From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerReducer.java

License:Apache License

@Override
public void doReduce(Text key, Iterable<LongWritable> values, Context context)
        throws IOException, InterruptedException {

    long length = 0;
    for (LongWritable v : values) {
        length += v.get();
    }// w  w w  . jav a  2 s  .  c  o  m

    outputKey.set(length);
    context.write(key, outputKey);
}

From source file:org.apache.kylin.metadata.measure.fixedlen.FixedPointLongCodec.java

License:Apache License

@Override
public void write(LongWritable v, byte[] buf, int offset) {
    BytesUtil.writeLong(v == null ? 0 : v.get(), buf, offset, SIZE);
}

From source file:org.apache.kylin.source.hive.cardinality.ColumnCardinalityReducerTest.java

License:Apache License

@Test
public void testReducer() throws IOException {
    IntWritable key1 = new IntWritable(1);
    List<BytesWritable> values1 = new ArrayList<BytesWritable>();
    values1.add(new BytesWritable(getBytes(strArr)));

    IntWritable key2 = new IntWritable(2);
    List<BytesWritable> values2 = new ArrayList<BytesWritable>();
    values2.add(new BytesWritable(getBytes(strArr + " x")));

    IntWritable key3 = new IntWritable(3);
    List<BytesWritable> values3 = new ArrayList<BytesWritable>();
    values3.add(new BytesWritable(getBytes(strArr + " xx")));

    IntWritable key4 = new IntWritable(4);
    List<BytesWritable> values4 = new ArrayList<BytesWritable>();
    values4.add(new BytesWritable(getBytes(strArr + " xxx")));

    IntWritable key5 = new IntWritable(5);
    List<BytesWritable> values5 = new ArrayList<BytesWritable>();
    values5.add(new BytesWritable(getBytes(strArr + " xxxx")));

    reduceDriver.withInput(key1, values1);
    reduceDriver.withInput(key2, values2);
    reduceDriver.withInput(key3, values3);
    reduceDriver.withInput(key4, values4);
    reduceDriver.withInput(key5, values5);

    List<Pair<IntWritable, LongWritable>> result = reduceDriver.run();

    assertEquals(5, result.size());//from w  w w.j av a2s  . c  o m

    int outputKey1 = result.get(0).getFirst().get();
    LongWritable value1 = result.get(0).getSecond();
    assertTrue(outputKey1 == 1);
    assertTrue((10 == value1.get()) || (9 == value1.get()));

}

From source file:org.apache.kylin.source.kafka.hadoop.KafkaFlatTableMapper.java

License:Apache License

@Override
public void doMap(LongWritable key, BytesWritable value, Context context)
        throws IOException, InterruptedException {
    outKey.set(Bytes.toBytes(key.get()));
    outValue.set(value.getBytes(), 0, value.getLength());
    context.write(outKey, outValue);//from   www.  ja  v a 2  s . c  o m
}