List of usage examples for org.apache.hadoop.fs FileSystem append
public FSDataOutputStream append(Path f, int bufferSize) throws IOException
From source file:com.cloudera.hoop.fs.FSAppend.java
License:Open Source License
/** * Executes the filesystem operation./*from ww w.j a v a2 s . c o m*/ * * @param fs filesystem instance to use. * @return void. * @throws IOException thrown if an IO error occured. */ @Override public Void execute(FileSystem fs) throws IOException { int bufferSize = fs.getConf().getInt("hoop.buffer.size", 4096); OutputStream os = fs.append(path, bufferSize); IOUtils.copy(is, os); os.close(); return null; }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
/** * Append to an existing file (optional operation). * //from w ww .ja v a 2s . c o m * @param f * the existing file to be appended. * @param flag * Flags for the Append operation. CreateFlag.APPEND is mandatory * to be present. * @param bufferSize * the size of the buffer to be used. * @param progress * for reporting progress if it is not null. * @return Returns instance of {@link FSDataOutputStream} * @throws IOException */ public FSDataOutputStream append(Path f, final EnumSet<CreateFlag> flag, final int bufferSize, final Progressable progress) throws IOException { statistics.incrementWriteOps(1); Path absF = fixRelativePart(f); return new FileSystemLinkResolver<FSDataOutputStream>() { @Override public FSDataOutputStream doCall(final Path p) throws IOException { return dfs.append(getPathName(p), bufferSize, flag, progress, statistics); } @Override public FSDataOutputStream next(final FileSystem fs, final Path p) throws IOException { return fs.append(p, bufferSize); } }.resolve(this, absF); }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
/** * Append to an existing file (optional operation). * //from www . j a v a 2 s . c o m * @param f * the existing file to be appended. * @param flag * Flags for the Append operation. CreateFlag.APPEND is mandatory * to be present. * @param bufferSize * the size of the buffer to be used. * @param progress * for reporting progress if it is not null. * @param favoredNodes * Favored nodes for new blocks * @return Returns instance of {@link FSDataOutputStream} * @throws IOException */ public FSDataOutputStream append(Path f, final EnumSet<CreateFlag> flag, final int bufferSize, final Progressable progress, final InetSocketAddress[] favoredNodes) throws IOException { statistics.incrementWriteOps(1); Path absF = fixRelativePart(f); return new FileSystemLinkResolver<FSDataOutputStream>() { @Override public FSDataOutputStream doCall(final Path p) throws IOException { return dfs.append(getPathName(p), bufferSize, flag, progress, statistics, favoredNodes); } @Override public FSDataOutputStream next(final FileSystem fs, final Path p) throws IOException { return fs.append(p, bufferSize); } }.resolve(this, absF); }
From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java
License:Apache License
public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException { FileSystem fs = config.getFs(); this.config = config; this.autoFlush = config.isAutoFlush(); GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema()); this.writer = new DataFileWriter<>(datumWriter); Path path = config.getLogFile().getPath(); if (fs.exists(path)) { //TODO - check for log corruption and roll over if needed log.info(config.getLogFile() + " exists. Appending to existing file"); // this log path exists, we will append to it fs = FileSystem.get(fs.getConf()); try {//from w w w . j a v a 2 s . co m this.output = fs.append(path, config.getBufferSize()); } catch (RemoteException e) { // this happens when either another task executor writing to this file died or data node is going down if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName()) && fs instanceof DistributedFileSystem) { log.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { log.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, config.getBufferSize()); } else { log.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } } this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output); // we always want to flush to disk everytime a avro block is written this.writer.setFlushOnEveryBlock(true); } else { log.info(config.getLogFile() + " does not exist. Create a new file"); this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(), config.getBlockSize(), null); this.writer.create(config.getSchema(), output); this.writer.setFlushOnEveryBlock(true); // We need to close the writer to be able to tell the name node that we created this file // this.writer.close(); } }
From source file:com.uber.hoodie.common.table.log.HoodieLogFormatWriter.java
License:Apache License
/** * @param fs// w w w .ja v a 2 s. c o m * @param logFile * @param bufferSize * @param replication * @param sizeThreshold */ HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication, Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken) throws IOException, InterruptedException { this.fs = fs; this.logFile = logFile; this.sizeThreshold = sizeThreshold; this.bufferSize = bufferSize; this.replication = replication; this.logWriteToken = logWriteToken; this.rolloverLogWriteToken = rolloverLogWriteToken; Path path = logFile.getPath(); if (fs.exists(path)) { boolean isAppendSupported = StorageSchemes.isAppendSupported(fs.getScheme()); if (isAppendSupported) { log.info(logFile + " exists. Appending to existing file"); try { this.output = fs.append(path, bufferSize); } catch (RemoteException e) { log.warn("Remote Exception, attempting to handle or recover lease", e); handleAppendExceptionOrRecoverLease(path, e); } catch (IOException ioe) { if (ioe.getMessage().toLowerCase().contains("not supported")) { // may still happen if scheme is viewfs. isAppendSupported = false; } else { throw ioe; } } } if (!isAppendSupported) { this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); log.info("Append not supported.. Rolling over to " + logFile); createNewFile(); } } else { log.info(logFile + " does not exist. Create a new file"); // Block size does not matter as we will always manually autoflush createNewFile(); } }
From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java
License:Apache License
public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize, boolean append) throws IOException { path = path.replace("\\", "/"); switch (fileType) { case LOCAL:/*from ww w. j a v a2s .c om*/ return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path, append), bufferSize)); case HDFS: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); FSDataOutputStream stream = null; if (append) { // append to a file only if file already exists else file not found // exception will be thrown by hdfs if (CarbonUtil.isFileExists(path)) { stream = fs.append(pt, bufferSize); } else { stream = fs.create(pt, true, bufferSize); } } else { stream = fs.create(pt, true, bufferSize); } return stream; default: return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); } }
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize, boolean append) throws IOException { path = path.replace("\\", "/"); switch (fileType) { case LOCAL:/*from w ww. j av a 2s .com*/ path = getUpdatedFilePath(path, fileType); return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path, append), bufferSize)); case HDFS: case ALLUXIO: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); FSDataOutputStream stream = null; if (append) { // append to a file only if file already exists else file not found // exception will be thrown by hdfs if (CarbonUtil.isFileExists(path)) { stream = fs.append(pt, bufferSize); } else { stream = fs.create(pt, true, bufferSize); } } else { stream = fs.create(pt, true, bufferSize); } return stream; default: path = getUpdatedFilePath(path, fileType); return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize)); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.delegate.HadoopIgfsSecondaryFileSystemDelegateImpl.java
License:Apache License
/** {@inheritDoc} */ @Override/* ww w.j av a2 s . c om*/ public OutputStream append(IgfsPath path, int bufSize, boolean create, @Nullable Map<String, String> props) { try { Path hadoopPath = convert(path); FileSystem fs = fileSystemForUser(); if (create && !fs.exists(hadoopPath)) return fs.create(hadoopPath, false, bufSize); else return fs.append(convert(path), bufSize); } catch (IOException e) { throw handleSecondaryFsError(e, "Failed to append file [path=" + path + ", bufSize=" + bufSize + "]"); } }
From source file:org.apache.nifi.processors.hadoop.PutHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;/* w ww. j a va 2s .c om*/ } final FileSystem hdfs = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || hdfs == null || ugi == null) { getLogger().error("HDFS not configured properly"); session.transfer(flowFile, REL_FAILURE); context.yield(); return; } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile) .getValue(); final Path configuredRootDirPath = new Path(dirValue); final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger(); final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath); final CompressionCodec codec = getCompressionCodec(context, configuration); final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException( configuredRootDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootDirPath)) { throw new IOException(configuredRootDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootDirPath, flowFile); } final boolean destinationExists = hdfs.exists(copyFile); // If destination file already exists, resolve that based on processor configuration if (destinationExists) { switch (conflictResponse) { case REPLACE_RESOLUTION: if (hdfs.delete(copyFile, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile }); } break; case IGNORE_RESOLUTION: session.transfer(putFlowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { putFlowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; default: break; } } // Write FlowFile to temp file on HDFS final StopWatch stopWatch = new StopWatch(true); session.read(putFlowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { OutputStream fos = null; Path createdFile = null; try { if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) { fos = hdfs.append(copyFile, bufferSize); } else { fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize); } if (codec != null) { fos = codec.createOutputStream(fos); } createdFile = tempCopyFile; BufferedInputStream bis = new BufferedInputStream(in); StreamUtils.copy(bis, fos); bis = null; fos.flush(); } finally { try { if (fos != null) { fos.close(); } } catch (RemoteException re) { // when talking to remote HDFS clusters, we don't notice problems until fos.close() if (createdFile != null) { try { hdfs.delete(createdFile, false); } catch (Throwable ignore) { } } throw re; } catch (Throwable ignore) { } fos = null; } } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempCopyFile; if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) { boolean renamed = false; for (int i = 0; i < 10; i++) { // try to rename multiple times. if (hdfs.rename(tempCopyFile, copyFile)) { renamed = true; break;// rename was successful } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!renamed) { hdfs.delete(tempCopyFile, false); throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename"); } changeOwner(context, hdfs, copyFile, flowFile); } getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate }); final String newFilename = copyFile.getName(); final String hdfsPath = copyFile.getParent().toString(); putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename); putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (final Throwable t) { if (tempDotCopyFile != null) { try { hdfs.delete(tempDotCopyFile, false); } catch (Exception e) { getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e }); } } getLogger().error("Failed to write to HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(putFlowFile), REL_FAILURE); context.yield(); } return null; } }); }