List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java
License:Apache License
@Override @SuppressWarnings("deprecation") protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager mgr = CubeManager.getInstance(context.getConfig()); final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = cube.getConfig(); Configuration conf = HadoopUtil.getCurrentConfiguration(); ResourceStore rs = ResourceStore.getStore(kylinConf); try {/*w w w.j a v a 2 s .c om*/ int averageSamplingPercentage = 0; for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) { String fileKey = CubeSegment .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId); InputStream is = rs.getResource(fileKey).inputStream; File tempFile = null; FileOutputStream tempFileStream = null; try { tempFile = File.createTempFile(segmentId, ".seq"); tempFileStream = new FileOutputStream(tempFile); org.apache.commons.io.IOUtils.copy(is, tempFileStream); } finally { IOUtils.closeStream(is); IOUtils.closeStream(tempFileStream); } FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath()); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { if (key.get() == 0L) { // sampling percentage; averageSamplingPercentage += Bytes.toInt(value.getBytes()); } else if (key.get() > 0) { HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision()); ByteArray byteArray = new ByteArray(value.getBytes()); hll.readRegisters(byteArray.asBuffer()); if (cuboidHLLMap.get(key.get()) != null) { cuboidHLLMap.get(key.get()).merge(hll); } else { cuboidHLLMap.put(key.get(), hll); } } } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); if (tempFile != null) tempFile.delete(); } } averageSamplingPercentage = averageSamplingPercentage / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size(); CubeStatsWriter.writeCuboidStatistics(conf, new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap, averageSamplingPercentage); Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()), BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf); FSDataInputStream is = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, is, System.currentTimeMillis()); } finally { IOUtils.closeStream(is); } return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed"); } catch (IOException e) { logger.error("fail to merge cuboid statistics", e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } }
From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerMapper.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) { List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try {/*from w w w . j ava 2 s. com*/ reader = new SequenceFile.Reader(HadoopUtil.getFileSystem(path, conf), path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { byte[] tmp = ((Text) key).copyBytes(); if (rowkeyList.contains(tmp) == false) { rowkeyList.add(tmp); } } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue; }
From source file:org.apache.kylin.engine.mr.steps.SaveStatisticsStep.java
License:Apache License
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { CubeSegment newSegment = CubingExecutableUtil.findSegment(context, CubingExecutableUtil.getCubeName(this.getParams()), CubingExecutableUtil.getSegmentId(this.getParams())); KylinConfig kylinConf = newSegment.getConfig(); ResourceStore rs = ResourceStore.getStore(kylinConf); try {//from ww w . ja v a 2s .c om FileSystem fs = HadoopUtil.getWorkingFileSystem(); Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams())); Path statisticsFilePath = HadoopUtil.getFilterOnlyPath(fs, statisticsDir, BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFilePath == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDir); } FSDataInputStream is = fs.open(statisticsFilePath); try { // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); rs.putResource(statisticsFileName, is, System.currentTimeMillis()); } finally { IOUtils.closeStream(is); } decideCubingAlgorithm(newSegment, kylinConf); return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed"); } catch (IOException e) { logger.error("fail to save cuboid statistics", e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } }
From source file:org.apache.kylin.engine.mr.steps.UpdateDictionaryStep.java
License:Apache License
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig()); final DictionaryManager dictMgrHdfs; final DictionaryManager dictMgrHbase; final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); final List<CubeSegment> mergingSegments = getMergingSegments(cube); final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH); final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL); final KylinConfig kylinConfHbase = cube.getConfig(); final KylinConfig kylinConfHdfs = KylinConfig.createInstanceFromUri(metadataUrl); Collections.sort(mergingSegments); try {// ww w .j a v a 2 s.co m Configuration conf = HadoopUtil.getCurrentConfiguration(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase); ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs); dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs); dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase); // work on copy instead of cached objects CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid()); // update cube segment dictionary FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); for (FileStatus fileStatus : fileStatuss) { Path filePath = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { String tblCol = key.toString(); String dictInfoResource = value.toString(); if (StringUtils.isNotEmpty(dictInfoResource)) { logger.info(dictInfoResource); // put dictionary file to metadata store DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase .trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs); if (dicInfoHbase != null) { TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]); newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath()); } } } IOUtils.closeStream(reader); } CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1); for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) { newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue()); } // update statistics // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).inputStream, System.currentTimeMillis()); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(newSegCopy); cubeMgr.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to merge dictionary", e); return ExecuteResult.createError(e); } }
From source file:org.apache.kylin.job.hadoop.hbase.CreateHTableJob.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path) == false) { System.err.println("Path " + path + " not found, no region split, HTable will be one region"); return null; }/*ww w .jav a 2 s. c om*/ List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { rowkeyList.add(((Text) key).copyBytes()); } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } logger.info((rowkeyList.size() + 1) + " regions"); logger.info(rowkeyList.size() + " splits"); for (byte[] split : rowkeyList) { System.out.println(StringUtils.byteToHexString(split)); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue.length == 0 ? null : retValue; }
From source file:org.apache.kylin.storage.hbase.steps.CreateHTableJob.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getRegionSplits(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path) == false) { System.err.println("Path " + path + " not found, no region split, HTable will be one region"); return null; }//from ww w .j a va 2s . c o m List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { rowkeyList.add(((Text) key).copyBytes()); } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } logger.info((rowkeyList.size() + 1) + " regions"); logger.info(rowkeyList.size() + " splits"); for (byte[] split : rowkeyList) { logger.info(StringUtils.byteToHexString(split)); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue.length == 0 ? null : retValue; }
From source file:org.apache.mahout.h2obindings.H2OHdfs.java
License:Apache License
/** * Internal method called from <code>drmFromFile</code> if format verified. *//*from w w w . ja v a2 s. com*/ public static H2ODrm drmFromSeqfile(String filename, int parMin) { long rows = 0; int cols = 0; Frame frame = null; Vec labels = null; SequenceFile.Reader reader = null; try { String uri = filename; Configuration conf = new Configuration(); Path path = new Path(uri); FileSystem fs = FileSystem.get(URI.create(uri), conf); Vec.Writer writers[]; Vec.Writer labelwriter = null; boolean isIntKey = false, isLongKey = false, isStringKey = false; reader = new SequenceFile.Reader(fs, path, conf); if (reader.getValueClass() != VectorWritable.class) { System.out.println("ValueClass in file " + filename + "must be VectorWritable, but found " + reader.getValueClassName()); return null; } Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); long start = reader.getPosition(); if (reader.getKeyClass() == Text.class) { isStringKey = true; } else if (reader.getKeyClass() == LongWritable.class) { isLongKey = true; } else { isIntKey = true; } while (reader.next(key, value)) { if (cols == 0) { Vector v = value.get(); cols = Math.max(v.size(), cols); } if (isLongKey) { rows = Math.max(((LongWritable) (key)).get() + 1, rows); } if (isIntKey) { rows = Math.max(((IntWritable) (key)).get() + 1, rows); } if (isStringKey) { rows++; } } reader.seek(start); frame = H2OHelper.emptyFrame(rows, cols, parMin, -1); writers = new Vec.Writer[cols]; for (int i = 0; i < writers.length; i++) { writers[i] = frame.vecs()[i].open(); } if (reader.getKeyClass() == Text.class) { labels = frame.anyVec().makeZero(); labelwriter = labels.open(); } long r = 0; while (reader.next(key, value)) { Vector v = value.get(); if (isLongKey) { r = ((LongWritable) (key)).get(); } if (isIntKey) { r = ((IntWritable) (key)).get(); } for (int c = 0; c < v.size(); c++) { writers[c].set(r, v.getQuick(c)); } if (labels != null) { labelwriter.set(r, ((Text) key).toString()); } if (isStringKey) { r++; } } Futures fus = new Futures(); for (Vec.Writer w : writers) { w.close(fus); } if (labelwriter != null) { labelwriter.close(fus); } fus.blockForPending(); } catch (java.io.IOException e) { return null; } finally { IOUtils.closeStream(reader); } return new H2ODrm(frame, labels); }
From source file:org.apache.mahout.utils.SplitInputTest.java
License:Apache License
/** * Create a Sequencefile for testing consisting of IntWritable * keys and VectorWritable values//from w w w . j a v a2 s .co m * @param path path for test SequenceFile * @param testPoints number of records in test SequenceFile */ private void writeVectorSequenceFile(Path path, int testPoints) throws IOException { Path tempSequenceFile = new Path(path, "part-00000"); Configuration conf = getConfiguration(); IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class); for (int i = 0; i < testPoints; i++) { key.set(i); Vector v = new SequentialAccessSparseVector(4); v.assign(i); value.set(v); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:org.apache.mahout.utils.SplitInputTest.java
License:Apache License
/** * Create a Sequencefile for testing consisting of IntWritable keys and Text values * @param path path for test SequenceFile * @param testPoints number of records in test SequenceFile *///from w w w.j a v a2s .c o m private void writeTextSequenceFile(Path path, int testPoints) throws IOException { Path tempSequenceFile = new Path(path, "part-00000"); Configuration conf = getConfiguration(); Text key = new Text(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, Text.class, Text.class); for (int i = 0; i < testPoints; i++) { key.set(Integer.toString(i)); value.set("Line " + i); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:org.apache.mahout.utils.vectors.lucene.SeqFilePrint.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path inputPath = new Path(this.inputSeqFile); BufferedWriter br = null;// w ww .ja va 2 s . c o m File textOutFile = new File(this.outFile); Writer writer = Files.newWriter(textOutFile, Charsets.UTF_8); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, inputPath, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.write(key.toString()); writer.write(value.toString()); writer.write('\n'); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); Closeables.close(writer, false); } return 0; }