List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:org.apache.hama.bsp.TestCheckpoint.java
License:Apache License
private static void checkSuperstepMsgCount(PeerSyncClient syncClient, @SuppressWarnings("rawtypes") BSPPeer bspTask, BSPJob job, long step, long count) { ArrayWritable writableVal = new ArrayWritable(LongWritable.class); boolean result = syncClient.getInformation( syncClient.constructKey(job.getJobID(), "checkpoint", "" + bspTask.getPeerIndex()), writableVal); assertTrue(result);/* www . j a va 2s . co m*/ LongWritable superstepNo = (LongWritable) writableVal.get()[0]; LongWritable msgCount = (LongWritable) writableVal.get()[1]; assertEquals(step, superstepNo.get()); assertEquals(count, msgCount.get()); }
From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java
License:Apache License
/** * Read result from HDFS reduce output directory and return the results * @param outputDir where to read the data from. Expects the file to be {SequenceFile} * @param jobConf/*w w w.j a va 2 s . com*/ * @return * @throws IOException */ private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException { SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>(); FileStatus[] files = fs.listStatus(outputDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); for (FileStatus status : files) { Path path = status.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf); LongWritable timeStamp = new LongWritable(); ReduceResult result = new ReduceResult(); while (reader.next(timeStamp, result)) { LOG.info("Timestamp: " + timeStamp); LOG.info("ThreadCount: " + result.getThreadCount()); LOG.info("Stats:\n" + result.getStatistics()); LOG.info("Errors: " + result.getNumErrors()); timeseriesResults.put(timeStamp.get(), result); timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading result = new ReduceResult(); } reader.close(); } return timeseriesResults; }
From source file:org.apache.kudu.mapreduce.tools.ImportCsvMapper.java
License:Apache License
/** * Convert a line of CSV text into a Kudu Insert *//*ww w.j a va 2 s . com*/ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { CsvParser.ParsedLine parsed = this.parser.parse(lineBytes, value.getLength()); Insert insert = this.table.newInsert(); PartialRow row = insert.getRow(); for (int i = 0; i < parsed.getColumnCount(); i++) { String colName = parsed.getColumnName(i); ColumnSchema col = this.schema.getColumn(colName); String colValue = Bytes.getString(parsed.getLineBytes(), parsed.getColumnOffset(i), parsed.getColumnLength(i)); switch (col.getType()) { case BOOL: row.addBoolean(colName, Boolean.parseBoolean(colValue)); break; case INT8: row.addByte(colName, Byte.parseByte(colValue)); break; case INT16: row.addShort(colName, Short.parseShort(colValue)); break; case INT32: row.addInt(colName, Integer.parseInt(colValue)); break; case INT64: row.addLong(colName, Long.parseLong(colValue)); break; case STRING: row.addString(colName, colValue); break; case FLOAT: row.addFloat(colName, Float.parseFloat(colValue)); break; case DOUBLE: row.addDouble(colName, Double.parseDouble(colValue)); break; default: throw new IllegalArgumentException("Type " + col.getType() + " not recognized"); } } context.write(NULL_KEY, insert); } catch (CsvParser.BadCsvLineException badLine) { if (this.skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); this.badLineCount.increment(1); return; } else { throw new IOException("Failing task because of a bad line", badLine); } } catch (IllegalArgumentException e) { if (this.skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage()); this.badLineCount.increment(1); return; } else { throw new IOException("Failing task because of an illegal argument", e); } } catch (InterruptedException e) { throw new IOException("Failing task since it was interrupted", e); } }
From source file:org.apache.kylin.engine.mr.common.CubeStatsReader.java
License:Apache License
public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException { ResourceStore store = ResourceStore.getStore(kylinConfig); cuboidScheduler = new CuboidScheduler(cubeSegment.getCubeDesc()); String statsKey = cubeSegment.getStatisticsResourcePath(); File tmpSeqFile = writeTmpSeqFile(store.getResource(statsKey).inputStream); Reader reader = null;/* w ww .j a v a2 s. c o m*/ try { Configuration hadoopConf = HadoopUtil.getCurrentConfiguration(); Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath())); Option seqInput = SequenceFile.Reader.file(path); reader = new SequenceFile.Reader(hadoopConf, seqInput); int percentage = 100; int mapperNumber = 0; double mapperOverlapRatio = 0; Map<Long, HLLCounter> counterMap = Maps.newHashMap(); LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf); while (reader.next(key, value)) { if (key.get() == 0L) { percentage = Bytes.toInt(value.getBytes()); } else if (key.get() == -1) { mapperOverlapRatio = Bytes.toDouble(value.getBytes()); } else if (key.get() == -2) { mapperNumber = Bytes.toInt(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); counterMap.put(key.get(), hll); } } this.seg = cubeSegment; this.samplingPercentage = percentage; this.mapperNumberOfFirstBuild = mapperNumber; this.mapperOverlapRatioOfFirstBuild = mapperOverlapRatio; this.cuboidRowEstimatesHLL = counterMap; } finally { IOUtils.closeStream(reader); tmpSeqFile.delete(); } }
From source file:org.apache.kylin.engine.mr.steps.MergeDictionaryMapper.java
License:Apache License
@Override protected void doMap(IntWritable key, NullWritable value, Context context) throws IOException, InterruptedException { int index = key.get(); if (index < tblColRefs.length) { // merge dictionary TblColRef col = tblColRefs[index]; List<DictionaryInfo> dictInfos = Lists.newArrayList(); for (CubeSegment segment : mergingSegments) { if (segment.getDictResPath(col) != null) { DictionaryInfo dictInfo = dictMgr.getDictionaryInfo(segment.getDictResPath(col)); if (dictInfo != null && !dictInfos.contains(dictInfo)) { dictInfos.add(dictInfo); }//from ww w . j ava 2 s. c o m } } DictionaryInfo mergedDictInfo = dictMgr.mergeDictionary(dictInfos); String tblCol = col.getTableAlias() + ":" + col.getName(); String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath(); context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath)); } else { // merge statistics KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs( new SerializableConfiguration(context.getConfiguration()), context.getConfiguration().get(BatchConstants.ARG_META_URL)); final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME); final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID); final String statOutputPath = context.getConfiguration() .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt()); CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName); logger.info("Statistics output path: {}", statOutputPath); CubeSegment newSegment = cubeInstance.getSegmentById(segmentId); ResourceStore rs = ResourceStore.getStore(kylinConfig); Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap(); Configuration conf = null; int averageSamplingPercentage = 0; for (CubeSegment cubeSegment : mergingSegments) { String filePath = cubeSegment.getStatisticsResourcePath(); InputStream is = rs.getResource(filePath).inputStream; File tempFile; FileOutputStream tempFileStream = null; try { tempFile = File.createTempFile(segmentId, ".seq"); tempFileStream = new FileOutputStream(tempFile); org.apache.commons.io.IOUtils.copy(is, tempFileStream); } finally { IOUtils.closeStream(is); IOUtils.closeStream(tempFileStream); } FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath()); SequenceFile.Reader reader = null; try { conf = HadoopUtil.getCurrentConfiguration(); //noinspection deprecation reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(keyW, valueW)) { if (keyW.get() == 0L) { // sampling percentage; averageSamplingPercentage += Bytes.toInt(valueW.getBytes()); } else if (keyW.get() > 0) { HLLCounter hll = new HLLCounter(kylinConfig.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(valueW.getBytes()); hll.readRegisters(byteArray.asBuffer()); if (cuboidHLLMap.get(keyW.get()) != null) { cuboidHLLMap.get(keyW.get()).merge(hll); } else { cuboidHLLMap.put(keyW.get(), hll); } } } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } } averageSamplingPercentage = averageSamplingPercentage / mergingSegments.size(); CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(statOutputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf); FSDataInputStream fis = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, fis, System.currentTimeMillis()); } finally { IOUtils.closeStream(fis); } context.write(new IntWritable(-1), new Text("")); } }
From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java
License:Apache License
@Override @SuppressWarnings("deprecation") protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); try {//from w w w. j av a 2 s . c o m int averageSamplingPercentage = 0; for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) { String fileKey = CubeSegment .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId); InputStream is = rs.getResource(fileKey).inputStream; File tempFile = null; FileOutputStream tempFileStream = null; try { tempFile = File.createTempFile(segmentId, ".seq"); tempFileStream = new FileOutputStream(tempFile); org.apache.commons.io.IOUtils.copy(is, tempFileStream); } finally { IOUtils.closeStream(is); IOUtils.closeStream(tempFileStream); } FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath()); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { if (key.get() == 0L) { // sampling percentage; averageSamplingPercentage += Bytes.toInt(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); if (cuboidHLLMap.get(key.get()) != null) { cuboidHLLMap.get(key.get()).merge(hll); } else { cuboidHLLMap.put(key.get(), hll); } } } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); if (tempFile != null) tempFile.delete(); } } averageSamplingPercentage = averageSamplingPercentage / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size(); CubeStatsWriter.writeCuboidStatistics(conf, new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf); FSDataInputStream is = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, is, System.currentTimeMillis()); } finally { IOUtils.closeStream(is); } return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed"); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } }
From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerReducer.java
License:Apache License
@Override public void doReduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long length = 0; for (LongWritable v : values) { length += v.get(); }// w w w . jav a 2 s . c o m outputKey.set(length); context.write(key, outputKey); }
From source file:org.apache.kylin.metadata.measure.fixedlen.FixedPointLongCodec.java
License:Apache License
@Override public void write(LongWritable v, byte[] buf, int offset) { BytesUtil.writeLong(v == null ? 0 : v.get(), buf, offset, SIZE); }
From source file:org.apache.kylin.source.hive.cardinality.ColumnCardinalityReducerTest.java
License:Apache License
@Test public void testReducer() throws IOException { IntWritable key1 = new IntWritable(1); List<BytesWritable> values1 = new ArrayList<BytesWritable>(); values1.add(new BytesWritable(getBytes(strArr))); IntWritable key2 = new IntWritable(2); List<BytesWritable> values2 = new ArrayList<BytesWritable>(); values2.add(new BytesWritable(getBytes(strArr + " x"))); IntWritable key3 = new IntWritable(3); List<BytesWritable> values3 = new ArrayList<BytesWritable>(); values3.add(new BytesWritable(getBytes(strArr + " xx"))); IntWritable key4 = new IntWritable(4); List<BytesWritable> values4 = new ArrayList<BytesWritable>(); values4.add(new BytesWritable(getBytes(strArr + " xxx"))); IntWritable key5 = new IntWritable(5); List<BytesWritable> values5 = new ArrayList<BytesWritable>(); values5.add(new BytesWritable(getBytes(strArr + " xxxx"))); reduceDriver.withInput(key1, values1); reduceDriver.withInput(key2, values2); reduceDriver.withInput(key3, values3); reduceDriver.withInput(key4, values4); reduceDriver.withInput(key5, values5); List<Pair<IntWritable, LongWritable>> result = reduceDriver.run(); assertEquals(5, result.size());//from w w w.j av a2s . c o m int outputKey1 = result.get(0).getFirst().get(); LongWritable value1 = result.get(0).getSecond(); assertTrue(outputKey1 == 1); assertTrue((10 == value1.get()) || (9 == value1.get())); }
From source file:org.apache.kylin.source.kafka.hadoop.KafkaFlatTableMapper.java
License:Apache License
@Override public void doMap(LongWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { outKey.set(Bytes.toBytes(key.get())); outValue.set(value.getBytes(), 0, value.getLength()); context.write(outKey, outValue);//from www. ja v a 2 s . c o m }