Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;// w w w  .j  a  v a 2  s . c  o m
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alibaba.jstorm.hdfs.spout.FileLock.java

License:Apache License

/**
 * returns the last log entry/*from www  .  j  a  v a 2s. c  o m*/
 * @param fs
 * @param lockFile
 * @return
 * @throws IOException
 */
public static LogEntry getLastEntry(FileSystem fs, Path lockFile) throws IOException {
    FSDataInputStream in = fs.open(lockFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String lastLine = null;
    for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        lastLine = line;
    }
    return LogEntry.deserialize(lastLine);
}

From source file:com.alibaba.jstorm.hdfs.spout.TextFileReader.java

License:Apache License

private TextFileReader(FileSystem fs, Path file, Map conf, TextFileReader.Offset startOffset)
        throws IOException {
    super(fs, file);
    offset = startOffset;// ww w.  j a v a 2 s  .  co m
    FSDataInputStream in = fs.open(file);

    String charSet = (conf == null || !conf.containsKey(CHARSET)) ? "UTF-8" : conf.get(CHARSET).toString();
    int buffSz = (conf == null || !conf.containsKey(BUFFER_SIZE)) ? DEFAULT_BUFF_SIZE
            : Integer.parseInt(conf.get(BUFFER_SIZE).toString());
    reader = new BufferedReader(new InputStreamReader(in, charSet), buffSz);
    if (offset.charOffset > 0) {
        reader.skip(offset.charOffset);
    }

}

From source file:com.aliyun.fs.oss.common.OssRecordReader.java

License:Apache License

public OssRecordReader(Configuration job, FileSplit split, FileSystem fs, byte[] recordDelimiter)
        throws IOException {
    this.maxLineLength = job.getInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH,
            Integer.MAX_VALUE);/*from  w  ww. ja v a2  s.c o  m*/
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job, recordDelimiter);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn; // take pos from compressed stream
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiter);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job, recordDelimiter);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.anhth12.lambda.ml.MLUpdate.java

@Override
public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData,
        JavaPairRDD<String, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {

    Preconditions.checkNotNull(newKeyMessageData);

    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();//from   w w w.  j a  v a2  s.  co m
        newData.foreachPartition(Functions.<Iterator<M>>noOp());
    }
    if (pastData != null) {
        pastData.cache();
        pastData.foreachPartition(Functions.<Iterator<M>>noOp());
    }

    List<HyperParamValues<?>> hyperParamValues = getHyperParamValues();

    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);

    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,
            valuesPerHyperParam);

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));
    fs.mkdirs(candiatesPath);

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,
            candiatesPath);

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        fs.rename(bestCandidatePath, finalPath);
    }

    fs.delete(candiatesPath, true);

    Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);

    if (fs.exists(bestModelPath)) {
        PMML bestModel;
        try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) {
            bestModel = PMMLUtils.read(in);
        }

        modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
        publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic);
    }

    if (newData != null) {
        newData.unpersist();
    }

    if (pastData != null) {
        pastData.unpersist();
    }

}

From source file:com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest.java

License:Apache License

private List<String> get(String target) throws IOException {
    FileSystem fs = FileSystem.get(tester.configuration());
    List<String> results = new ArrayList<>();
    for (Path path : find(target)) {
        try (InputStream input = fs.open(path);
                Scanner s = new Scanner(new InputStreamReader(input, "UTF-8"))) {
            while (s.hasNextLine()) {
                results.add(s.nextLine());
            }//from  w  w  w  .  j  a va  2s .c o  m
        }
    }
    return results;
}

From source file:com.asakusafw.dag.runtime.internalio.HadoopInternalInputTaskInfo.java

License:Apache License

/**
 * Opens a model input./*from   w ww .  ja  v a2 s .  c  o m*/
 * @param <T> the input data type
 * @param fileSystem the Hadoop file system
 * @param file the target file
 * @param blockOffset the block offset
 * @param blockLength the block length
 * @return the opened file
 * @throws IOException if I/O error was occurred while opening the file
 */
public static <T extends Writable> ModelInput<T> open(FileSystem fileSystem, Path file, int blockOffset,
        int blockLength) throws IOException {
    try (Initializer<FSDataInputStream> init = new Initializer<>(fileSystem.open(file))) {
        if (blockOffset > 0) {
            init.get().seek((long) blockOffset * TemporaryFile.BLOCK_SIZE);
        }
        return new TemporaryFileInput<>(init.done(), blockLength);
    }
}

From source file:com.asakusafw.m3bp.compiler.tester.externalio.TestIoTaskExecutor.java

License:Apache License

private <T extends Writable> void executeOutput(String name, Class<T> dataType, List<Path> paths)
        throws IOException {
    Action<Object, Exception> action = outputs.get(name);
    Invariants.requireNonNull(action, () -> MessageFormat.format("missing output: {0}", name));
    List<T> results = new ArrayList<>();
    for (Path pattern : paths) {
        FileSystem fs = pattern.getFileSystem(configuration);
        FileStatus[] stats = fs.globStatus(pattern);
        if (stats == null) {
            continue;
        }//from  ww  w . j  ava2  s .  c o m
        for (FileStatus stat : stats) {
            try (ModelInput<T> in = new TemporaryFileInput<>(fs.open(stat.getPath()), 0)) {
                while (true) {
                    T instance = dataType.newInstance();
                    if (in.readTo(instance)) {
                        results.add(instance);
                    } else {
                        break;
                    }
                }
            } catch (Error | RuntimeException | IOException e) {
                throw e;
            } catch (Exception e) {
                throw new AssertionError(e);
            }
        }
    }
    try {
        action.perform(results);
    } catch (Error | RuntimeException | IOException e) {
        throw e;
    } catch (Exception e) {
        throw new AssertionError(e);
    }
}

From source file:com.asakusafw.runtime.directio.hadoop.DirectIoTransactionEditor.java

License:Apache License

private TransactionInfo toInfoObject(FileStatus stat) throws IOException {
    assert stat != null;
    Path path = stat.getPath();//w  ww  . j  a  v  a2s  .  c  o  m
    String executionId = HadoopDataSourceUtil.getTransactionInfoExecutionId(path);
    long timestamp = stat.getModificationTime();
    List<String> comment = new ArrayList<>();
    Path commitMarkPath = HadoopDataSourceUtil.getCommitMarkPath(getConf(), executionId);
    FileSystem fs = path.getFileSystem(getConf());
    boolean committed = fs.exists(commitMarkPath);
    try (FSDataInputStream input = fs.open(path);
            Scanner scanner = new Scanner(new InputStreamReader(input, HadoopDataSourceUtil.COMMENT_CHARSET))) {
        while (scanner.hasNextLine()) {
            comment.add(scanner.nextLine());
        }
    } catch (IOException e) {
        comment.add(e.toString());
    }

    return new TransactionInfo(executionId, timestamp, committed, comment);
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopFileFormatAdapter.java

License:Apache License

@Override
public ModelInput<T> createInput(Class<? extends T> dataType, FileSystem fileSystem, final Path path,
        final long offset, final long fragmentSize, Counter counter) throws IOException, InterruptedException {
    FSDataInputStream stream = fileSystem.open(path);
    boolean succeed = false;
    try {//from   w  ww . ja v a  2  s .c o  m
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Process opening input [stream opened] (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                    path, offset, fragmentSize));
        }
        if (offset != 0) {
            stream.seek(offset);
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format(
                        "Process opening input [sought to offset] (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                        path, offset, fragmentSize));
            }
        }
        CountInputStream cstream;
        if (LOG.isDebugEnabled()) {
            cstream = new CountInputStream(stream, counter) {
                @Override
                public void close() throws IOException {
                    LOG.debug(MessageFormat.format("Start closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                            path, offset, fragmentSize));
                    super.close();
                    LOG.debug(MessageFormat.format("Finish closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$
                            path, offset, fragmentSize));
                }
            };
        } else {
            cstream = new CountInputStream(stream, counter);
        }
        ModelInput<T> input = streamFormat.createInput(dataType, path.toString(), cstream, offset,
                fragmentSize);
        succeed = true;
        return input;
    } finally {
        if (succeed == false) {
            try {
                stream.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("Failed to close input (path={0}, offset={1}, size={2})", path,
                        offset, fragmentSize), e);
            }
        }
    }
}