Example usage for org.apache.hadoop.io IOUtils closeStream

List of usage examples for org.apache.hadoop.io IOUtils closeStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils closeStream.

Prototype

public static void closeStream(java.io.Closeable stream) 

Source Link

Document

Closes the stream ignoring Throwable .

Usage

From source file:org.apache.kylin.engine.mr.steps.MergeStatisticsStep.java

License:Apache License

@Override
@SuppressWarnings("deprecation")
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager mgr = CubeManager.getInstance(context.getConfig());
    final CubeInstance cube = mgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = cube.getConfig();

    Configuration conf = HadoopUtil.getCurrentConfiguration();
    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {/*w  w w.j a v a  2  s  .c om*/

        int averageSamplingPercentage = 0;
        for (String segmentId : CubingExecutableUtil.getMergingSegmentIds(this.getParams())) {
            String fileKey = CubeSegment
                    .getStatisticsResourcePath(CubingExecutableUtil.getCubeName(this.getParams()), segmentId);
            InputStream is = rs.getResource(fileKey).inputStream;
            File tempFile = null;
            FileOutputStream tempFileStream = null;
            try {
                tempFile = File.createTempFile(segmentId, ".seq");
                tempFileStream = new FileOutputStream(tempFile);
                org.apache.commons.io.IOUtils.copy(is, tempFileStream);
            } finally {
                IOUtils.closeStream(is);
                IOUtils.closeStream(tempFileStream);
            }

            FileSystem fs = HadoopUtil.getFileSystem("file:///" + tempFile.getAbsolutePath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
                LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
                while (reader.next(key, value)) {
                    if (key.get() == 0L) {
                        // sampling percentage;
                        averageSamplingPercentage += Bytes.toInt(value.getBytes());
                    } else if (key.get() > 0) {
                        HLLCounter hll = new HLLCounter(kylinConf.getCubeStatsHLLPrecision());
                        ByteArray byteArray = new ByteArray(value.getBytes());
                        hll.readRegisters(byteArray.asBuffer());

                        if (cuboidHLLMap.get(key.get()) != null) {
                            cuboidHLLMap.get(key.get()).merge(hll);
                        } else {
                            cuboidHLLMap.put(key.get(), hll);
                        }
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                throw e;
            } finally {
                IOUtils.closeStream(reader);
                if (tempFile != null)
                    tempFile.delete();
            }
        }
        averageSamplingPercentage = averageSamplingPercentage
                / CubingExecutableUtil.getMergingSegmentIds(this.getParams()).size();
        CubeStatsWriter.writeCuboidStatistics(conf,
                new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams())), cuboidHLLMap,
                averageSamplingPercentage);
        Path statisticsFilePath = new Path(CubingExecutableUtil.getMergedStatisticsPath(this.getParams()),
                BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        FileSystem fs = HadoopUtil.getFileSystem(statisticsFilePath, conf);
        FSDataInputStream is = fs.open(statisticsFilePath);
        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, is, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(is);
        }

        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to merge cuboid statistics", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}

From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerMapper.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) {
    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {/*from  w  w w .  j  ava 2 s. com*/
        reader = new SequenceFile.Reader(HadoopUtil.getFileSystem(path, conf), path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            byte[] tmp = ((Text) key).copyBytes();
            if (rowkeyList.contains(tmp) == false) {
                rowkeyList.add(tmp);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);

    return retValue;
}

From source file:org.apache.kylin.engine.mr.steps.SaveStatisticsStep.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    CubeSegment newSegment = CubingExecutableUtil.findSegment(context,
            CubingExecutableUtil.getCubeName(this.getParams()),
            CubingExecutableUtil.getSegmentId(this.getParams()));
    KylinConfig kylinConf = newSegment.getConfig();

    ResourceStore rs = ResourceStore.getStore(kylinConf);
    try {//from   ww w  .  ja  v a  2s .c om
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        Path statisticsDir = new Path(CubingExecutableUtil.getStatisticsPath(this.getParams()));
        Path statisticsFilePath = HadoopUtil.getFilterOnlyPath(fs, statisticsDir,
                BatchConstants.CFG_OUTPUT_STATISTICS);
        if (statisticsFilePath == null) {
            throw new IOException("fail to find the statistics file in base dir: " + statisticsDir);
        }

        FSDataInputStream is = fs.open(statisticsFilePath);
        try {
            // put the statistics to metadata store
            String statisticsFileName = newSegment.getStatisticsResourcePath();
            rs.putResource(statisticsFileName, is, System.currentTimeMillis());
        } finally {
            IOUtils.closeStream(is);
        }

        decideCubingAlgorithm(newSegment, kylinConf);

        return new ExecuteResult(ExecuteResult.State.SUCCEED, "succeed");
    } catch (IOException e) {
        logger.error("fail to save cuboid statistics", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}

From source file:org.apache.kylin.engine.mr.steps.UpdateDictionaryStep.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = KylinConfig.createInstanceFromUri(metadataUrl);

    Collections.sort(mergingSegments);

    try {//  ww  w  .j a  v  a  2 s.co  m
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase
                            .trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null) {
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0],
                                tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName,
                hdfsRS.getResource(newSegment.getStatisticsResourcePath()).inputStream,
                System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

From source file:org.apache.kylin.job.hadoop.hbase.CreateHTableJob.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) throws Exception {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path) == false) {
        System.err.println("Path " + path + " not found, no region split, HTable will be one region");
        return null;
    }/*ww  w .jav a  2 s.  c  om*/

    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            rowkeyList.add(((Text) key).copyBytes());
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        IOUtils.closeStream(reader);
    }

    logger.info((rowkeyList.size() + 1) + " regions");
    logger.info(rowkeyList.size() + " splits");
    for (byte[] split : rowkeyList) {
        System.out.println(StringUtils.byteToHexString(split));
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);
    return retValue.length == 0 ? null : retValue;
}

From source file:org.apache.kylin.storage.hbase.steps.CreateHTableJob.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getRegionSplits(Configuration conf, Path path) throws Exception {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path) == false) {
        System.err.println("Path " + path + " not found, no region split, HTable will be one region");
        return null;
    }//from ww w .j a va 2s  .  c  o  m

    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            rowkeyList.add(((Text) key).copyBytes());
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        IOUtils.closeStream(reader);
    }

    logger.info((rowkeyList.size() + 1) + " regions");
    logger.info(rowkeyList.size() + " splits");
    for (byte[] split : rowkeyList) {
        logger.info(StringUtils.byteToHexString(split));
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);
    return retValue.length == 0 ? null : retValue;
}

From source file:org.apache.mahout.h2obindings.H2OHdfs.java

License:Apache License

/**
 * Internal method called from <code>drmFromFile</code> if format verified.
 *//*from   w w w  .  ja v  a2 s. com*/
public static H2ODrm drmFromSeqfile(String filename, int parMin) {
    long rows = 0;
    int cols = 0;
    Frame frame = null;
    Vec labels = null;

    SequenceFile.Reader reader = null;
    try {
        String uri = filename;
        Configuration conf = new Configuration();
        Path path = new Path(uri);
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Vec.Writer writers[];
        Vec.Writer labelwriter = null;
        boolean isIntKey = false, isLongKey = false, isStringKey = false;

        reader = new SequenceFile.Reader(fs, path, conf);

        if (reader.getValueClass() != VectorWritable.class) {
            System.out.println("ValueClass in file " + filename + "must be VectorWritable, but found "
                    + reader.getValueClassName());
            return null;
        }

        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        long start = reader.getPosition();

        if (reader.getKeyClass() == Text.class) {
            isStringKey = true;
        } else if (reader.getKeyClass() == LongWritable.class) {
            isLongKey = true;
        } else {
            isIntKey = true;
        }

        while (reader.next(key, value)) {
            if (cols == 0) {
                Vector v = value.get();
                cols = Math.max(v.size(), cols);
            }
            if (isLongKey) {
                rows = Math.max(((LongWritable) (key)).get() + 1, rows);
            }
            if (isIntKey) {
                rows = Math.max(((IntWritable) (key)).get() + 1, rows);
            }
            if (isStringKey) {
                rows++;
            }
        }
        reader.seek(start);

        frame = H2OHelper.emptyFrame(rows, cols, parMin, -1);
        writers = new Vec.Writer[cols];
        for (int i = 0; i < writers.length; i++) {
            writers[i] = frame.vecs()[i].open();
        }

        if (reader.getKeyClass() == Text.class) {
            labels = frame.anyVec().makeZero();
            labelwriter = labels.open();
        }

        long r = 0;
        while (reader.next(key, value)) {
            Vector v = value.get();
            if (isLongKey) {
                r = ((LongWritable) (key)).get();
            }
            if (isIntKey) {
                r = ((IntWritable) (key)).get();
            }
            for (int c = 0; c < v.size(); c++) {
                writers[c].set(r, v.getQuick(c));
            }
            if (labels != null) {
                labelwriter.set(r, ((Text) key).toString());
            }
            if (isStringKey) {
                r++;
            }
        }

        Futures fus = new Futures();
        for (Vec.Writer w : writers) {
            w.close(fus);
        }
        if (labelwriter != null) {
            labelwriter.close(fus);
        }
        fus.blockForPending();
    } catch (java.io.IOException e) {
        return null;
    } finally {
        IOUtils.closeStream(reader);
    }
    return new H2ODrm(frame, labels);
}

From source file:org.apache.mahout.utils.SplitInputTest.java

License:Apache License

/**
 * Create a Sequencefile for testing consisting of IntWritable
 * keys and VectorWritable values//from   w w  w . j  a v a2  s  .co m
 * @param path path for test SequenceFile
 * @param testPoints number of records in test SequenceFile
 */
private void writeVectorSequenceFile(Path path, int testPoints) throws IOException {
    Path tempSequenceFile = new Path(path, "part-00000");
    Configuration conf = getConfiguration();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class);
        for (int i = 0; i < testPoints; i++) {
            key.set(i);
            Vector v = new SequentialAccessSparseVector(4);
            v.assign(i);
            value.set(v);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:org.apache.mahout.utils.SplitInputTest.java

License:Apache License

/**
 * Create a Sequencefile for testing consisting of IntWritable keys and Text values
 * @param path path for test SequenceFile
 * @param testPoints number of records in test SequenceFile
 *///from w w w.j a v  a2s .c  o  m
private void writeTextSequenceFile(Path path, int testPoints) throws IOException {
    Path tempSequenceFile = new Path(path, "part-00000");
    Configuration conf = getConfiguration();
    Text key = new Text();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, Text.class, Text.class);
        for (int i = 0; i < testPoints; i++) {
            key.set(Integer.toString(i));
            value.set("Line " + i);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:org.apache.mahout.utils.vectors.lucene.SeqFilePrint.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path inputPath = new Path(this.inputSeqFile);
    BufferedWriter br = null;//  w ww .ja va 2 s .  c o m

    File textOutFile = new File(this.outFile);
    Writer writer = Files.newWriter(textOutFile, Charsets.UTF_8);

    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, inputPath, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            writer.write(key.toString());
            writer.write(value.toString());
            writer.write('\n');
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
        Closeables.close(writer, false);

    }

    return 0;
}