List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;// w w w .j a v a 2 s . c o m FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.alibaba.jstorm.hdfs.spout.FileLock.java
License:Apache License
/** * returns the last log entry/*from www . j a v a 2s. c o m*/ * @param fs * @param lockFile * @return * @throws IOException */ public static LogEntry getLastEntry(FileSystem fs, Path lockFile) throws IOException { FSDataInputStream in = fs.open(lockFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String lastLine = null; for (String line = reader.readLine(); line != null; line = reader.readLine()) { lastLine = line; } return LogEntry.deserialize(lastLine); }
From source file:com.alibaba.jstorm.hdfs.spout.TextFileReader.java
License:Apache License
private TextFileReader(FileSystem fs, Path file, Map conf, TextFileReader.Offset startOffset) throws IOException { super(fs, file); offset = startOffset;// ww w. j a v a 2 s . co m FSDataInputStream in = fs.open(file); String charSet = (conf == null || !conf.containsKey(CHARSET)) ? "UTF-8" : conf.get(CHARSET).toString(); int buffSz = (conf == null || !conf.containsKey(BUFFER_SIZE)) ? DEFAULT_BUFF_SIZE : Integer.parseInt(conf.get(BUFFER_SIZE).toString()); reader = new BufferedReader(new InputStreamReader(in, charSet), buffSz); if (offset.charOffset > 0) { reader.skip(offset.charOffset); } }
From source file:com.aliyun.fs.oss.common.OssRecordReader.java
License:Apache License
public OssRecordReader(Configuration job, FileSplit split, FileSystem fs, byte[] recordDelimiter) throws IOException { this.maxLineLength = job.getInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);/*from w ww. ja v a2 s.c o m*/ start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new LineReader(cIn, job, recordDelimiter); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; // take pos from compressed stream } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiter); filePosition = fileIn; } } else { fileIn.seek(start); in = new LineReader(fileIn, job, recordDelimiter); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.anhth12.lambda.ml.MLUpdate.java
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData, JavaPairRDD<String, M> pastKeyMessageData, String modelDirString, TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException { Preconditions.checkNotNull(newKeyMessageData); JavaRDD<M> newData = newKeyMessageData.values(); JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values(); if (newData != null) { newData.cache();//from w w w. j a v a2 s. co m newData.foreachPartition(Functions.<Iterator<M>>noOp()); } if (pastData != null) { pastData.cache(); pastData.foreachPartition(Functions.<Iterator<M>>noOp()); } List<HyperParamValues<?>> hyperParamValues = getHyperParamValues(); int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates); List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates, valuesPerHyperParam); FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration()); Path modelDir = new Path(modelDirString); Path tempModelPath = new Path(modelDir, ".temporary"); Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis())); fs.mkdirs(candiatesPath); Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos, candiatesPath); Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis())); if (bestCandidatePath == null) { log.info("Unable to build any model"); } else { fs.rename(bestCandidatePath, finalPath); } fs.delete(candiatesPath, true); Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME); if (fs.exists(bestModelPath)) { PMML bestModel; try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) { bestModel = PMMLUtils.read(in); } modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel)); publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic); } if (newData != null) { newData.unpersist(); } if (pastData != null) { pastData.unpersist(); } }
From source file:com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest.java
License:Apache License
private List<String> get(String target) throws IOException { FileSystem fs = FileSystem.get(tester.configuration()); List<String> results = new ArrayList<>(); for (Path path : find(target)) { try (InputStream input = fs.open(path); Scanner s = new Scanner(new InputStreamReader(input, "UTF-8"))) { while (s.hasNextLine()) { results.add(s.nextLine()); }//from w w w . j a va 2s .c o m } } return results; }
From source file:com.asakusafw.dag.runtime.internalio.HadoopInternalInputTaskInfo.java
License:Apache License
/** * Opens a model input./*from w ww . ja v a2 s . c o m*/ * @param <T> the input data type * @param fileSystem the Hadoop file system * @param file the target file * @param blockOffset the block offset * @param blockLength the block length * @return the opened file * @throws IOException if I/O error was occurred while opening the file */ public static <T extends Writable> ModelInput<T> open(FileSystem fileSystem, Path file, int blockOffset, int blockLength) throws IOException { try (Initializer<FSDataInputStream> init = new Initializer<>(fileSystem.open(file))) { if (blockOffset > 0) { init.get().seek((long) blockOffset * TemporaryFile.BLOCK_SIZE); } return new TemporaryFileInput<>(init.done(), blockLength); } }
From source file:com.asakusafw.m3bp.compiler.tester.externalio.TestIoTaskExecutor.java
License:Apache License
private <T extends Writable> void executeOutput(String name, Class<T> dataType, List<Path> paths) throws IOException { Action<Object, Exception> action = outputs.get(name); Invariants.requireNonNull(action, () -> MessageFormat.format("missing output: {0}", name)); List<T> results = new ArrayList<>(); for (Path pattern : paths) { FileSystem fs = pattern.getFileSystem(configuration); FileStatus[] stats = fs.globStatus(pattern); if (stats == null) { continue; }//from ww w . j ava2 s . c o m for (FileStatus stat : stats) { try (ModelInput<T> in = new TemporaryFileInput<>(fs.open(stat.getPath()), 0)) { while (true) { T instance = dataType.newInstance(); if (in.readTo(instance)) { results.add(instance); } else { break; } } } catch (Error | RuntimeException | IOException e) { throw e; } catch (Exception e) { throw new AssertionError(e); } } } try { action.perform(results); } catch (Error | RuntimeException | IOException e) { throw e; } catch (Exception e) { throw new AssertionError(e); } }
From source file:com.asakusafw.runtime.directio.hadoop.DirectIoTransactionEditor.java
License:Apache License
private TransactionInfo toInfoObject(FileStatus stat) throws IOException { assert stat != null; Path path = stat.getPath();//w ww . j a v a2s . c o m String executionId = HadoopDataSourceUtil.getTransactionInfoExecutionId(path); long timestamp = stat.getModificationTime(); List<String> comment = new ArrayList<>(); Path commitMarkPath = HadoopDataSourceUtil.getCommitMarkPath(getConf(), executionId); FileSystem fs = path.getFileSystem(getConf()); boolean committed = fs.exists(commitMarkPath); try (FSDataInputStream input = fs.open(path); Scanner scanner = new Scanner(new InputStreamReader(input, HadoopDataSourceUtil.COMMENT_CHARSET))) { while (scanner.hasNextLine()) { comment.add(scanner.nextLine()); } } catch (IOException e) { comment.add(e.toString()); } return new TransactionInfo(executionId, timestamp, committed, comment); }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopFileFormatAdapter.java
License:Apache License
@Override public ModelInput<T> createInput(Class<? extends T> dataType, FileSystem fileSystem, final Path path, final long offset, final long fragmentSize, Counter counter) throws IOException, InterruptedException { FSDataInputStream stream = fileSystem.open(path); boolean succeed = false; try {//from w ww . ja v a 2 s .c o m if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Process opening input [stream opened] (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } if (offset != 0) { stream.seek(offset); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Process opening input [sought to offset] (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } } CountInputStream cstream; if (LOG.isDebugEnabled()) { cstream = new CountInputStream(stream, counter) { @Override public void close() throws IOException { LOG.debug(MessageFormat.format("Start closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); super.close(); LOG.debug(MessageFormat.format("Finish closing input (path={0}, offset={1}, size={2})", //$NON-NLS-1$ path, offset, fragmentSize)); } }; } else { cstream = new CountInputStream(stream, counter); } ModelInput<T> input = streamFormat.createInput(dataType, path.toString(), cstream, offset, fragmentSize); succeed = true; return input; } finally { if (succeed == false) { try { stream.close(); } catch (IOException e) { LOG.warn(MessageFormat.format("Failed to close input (path={0}, offset={1}, size={2})", path, offset, fragmentSize), e); } } } }