Example usage for org.apache.hadoop.fs FileSystem append

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem append.

Prototype

public FSDataOutputStream append(Path f, int bufferSize) throws IOException

Source Link

Document

Append to an existing file (optional operation).

Usage

From source file:com.cloudera.hoop.fs.FSAppend.java

License:Open Source License

/**
 * Executes the filesystem operation./*from  ww w.j a v a2  s .  c o  m*/
 *
 * @param fs filesystem instance to use.
 * @return void.
 * @throws IOException thrown if an IO error occured.
 */
@Override
public Void execute(FileSystem fs) throws IOException {
    int bufferSize = fs.getConf().getInt("hoop.buffer.size", 4096);
    OutputStream os = fs.append(path, bufferSize);
    IOUtils.copy(is, os);
    os.close();
    return null;
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * Append to an existing file (optional operation).
 * //from   w ww  .ja v a 2s  . c  o  m
 * @param f
 *            the existing file to be appended.
 * @param flag
 *            Flags for the Append operation. CreateFlag.APPEND is mandatory
 *            to be present.
 * @param bufferSize
 *            the size of the buffer to be used.
 * @param progress
 *            for reporting progress if it is not null.
 * @return Returns instance of {@link FSDataOutputStream}
 * @throws IOException
 */
public FSDataOutputStream append(Path f, final EnumSet<CreateFlag> flag, final int bufferSize,
        final Progressable progress) throws IOException {
    statistics.incrementWriteOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FSDataOutputStream>() {
        @Override
        public FSDataOutputStream doCall(final Path p) throws IOException {
            return dfs.append(getPathName(p), bufferSize, flag, progress, statistics);
        }

        @Override
        public FSDataOutputStream next(final FileSystem fs, final Path p) throws IOException {
            return fs.append(p, bufferSize);
        }
    }.resolve(this, absF);
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * Append to an existing file (optional operation).
 * //from   www .  j  a v  a 2 s  . c  o  m
 * @param f
 *            the existing file to be appended.
 * @param flag
 *            Flags for the Append operation. CreateFlag.APPEND is mandatory
 *            to be present.
 * @param bufferSize
 *            the size of the buffer to be used.
 * @param progress
 *            for reporting progress if it is not null.
 * @param favoredNodes
 *            Favored nodes for new blocks
 * @return Returns instance of {@link FSDataOutputStream}
 * @throws IOException
 */
public FSDataOutputStream append(Path f, final EnumSet<CreateFlag> flag, final int bufferSize,
        final Progressable progress, final InetSocketAddress[] favoredNodes) throws IOException {
    statistics.incrementWriteOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FSDataOutputStream>() {
        @Override
        public FSDataOutputStream doCall(final Path p) throws IOException {
            return dfs.append(getPathName(p), bufferSize, flag, progress, statistics, favoredNodes);
        }

        @Override
        public FSDataOutputStream next(final FileSystem fs, final Path p) throws IOException {
            return fs.append(p, bufferSize);
        }
    }.resolve(this, absF);
}

From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java

License:Apache License

public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException {
    FileSystem fs = config.getFs();
    this.config = config;
    this.autoFlush = config.isAutoFlush();
    GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema());
    this.writer = new DataFileWriter<>(datumWriter);
    Path path = config.getLogFile().getPath();

    if (fs.exists(path)) {
        //TODO - check for log corruption and roll over if needed
        log.info(config.getLogFile() + " exists. Appending to existing file");
        // this log path exists, we will append to it
        fs = FileSystem.get(fs.getConf());
        try {//from   w w w . j a  v  a  2  s  . co  m
            this.output = fs.append(path, config.getBufferSize());
        } catch (RemoteException e) {
            // this happens when either another task executor writing to this file died or data node is going down
            if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName())
                    && fs instanceof DistributedFileSystem) {
                log.warn("Trying to recover log on path " + path);
                if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) {
                    log.warn("Recovered lease on path " + path);
                    // try again
                    this.output = fs.append(path, config.getBufferSize());
                } else {
                    log.warn("Failed to recover lease on path " + path);
                    throw new HoodieException(e);
                }
            }
        }
        this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output);
        // we always want to flush to disk everytime a avro block is written
        this.writer.setFlushOnEveryBlock(true);
    } else {
        log.info(config.getLogFile() + " does not exist. Create a new file");
        this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(),
                config.getBlockSize(), null);
        this.writer.create(config.getSchema(), output);
        this.writer.setFlushOnEveryBlock(true);
        // We need to close the writer to be able to tell the name node that we created this file
        // this.writer.close();
    }
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFormatWriter.java

License:Apache License

/**
 * @param fs//  w  w  w .ja v  a  2 s. c o  m
 * @param logFile
 * @param bufferSize
 * @param replication
 * @param sizeThreshold
 */
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication,
        Long sizeThreshold, String logWriteToken, String rolloverLogWriteToken)
        throws IOException, InterruptedException {
    this.fs = fs;
    this.logFile = logFile;
    this.sizeThreshold = sizeThreshold;
    this.bufferSize = bufferSize;
    this.replication = replication;
    this.logWriteToken = logWriteToken;
    this.rolloverLogWriteToken = rolloverLogWriteToken;
    Path path = logFile.getPath();
    if (fs.exists(path)) {
        boolean isAppendSupported = StorageSchemes.isAppendSupported(fs.getScheme());
        if (isAppendSupported) {
            log.info(logFile + " exists. Appending to existing file");
            try {
                this.output = fs.append(path, bufferSize);
            } catch (RemoteException e) {
                log.warn("Remote Exception, attempting to handle or recover lease", e);
                handleAppendExceptionOrRecoverLease(path, e);
            } catch (IOException ioe) {
                if (ioe.getMessage().toLowerCase().contains("not supported")) {
                    // may still happen if scheme is viewfs.
                    isAppendSupported = false;
                } else {
                    throw ioe;
                }
            }
        }
        if (!isAppendSupported) {
            this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
            log.info("Append not supported.. Rolling over to " + logFile);
            createNewFile();
        }
    } else {
        log.info(logFile + " does not exist. Create a new file");
        // Block size does not matter as we will always manually autoflush
        createNewFile();
    }
}

From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java

License:Apache License

public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize,
        boolean append) throws IOException {
    path = path.replace("\\", "/");
    switch (fileType) {
    case LOCAL:/*from   ww  w.  j a v a2s  .c  om*/
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path, append), bufferSize));
    case HDFS:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        FSDataOutputStream stream = null;
        if (append) {
            // append to a file only if file already exists else file not found
            // exception will be thrown by hdfs
            if (CarbonUtil.isFileExists(path)) {
                stream = fs.append(pt, bufferSize);
            } else {
                stream = fs.create(pt, true, bufferSize);
            }
        } else {
            stream = fs.create(pt, true, bufferSize);
        }
        return stream;
    default:
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    }
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize,
        boolean append) throws IOException {
    path = path.replace("\\", "/");
    switch (fileType) {
    case LOCAL:/*from   w  ww. j av  a 2s .com*/
        path = getUpdatedFilePath(path, fileType);
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path, append), bufferSize));
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        FSDataOutputStream stream = null;
        if (append) {
            // append to a file only if file already exists else file not found
            // exception will be thrown by hdfs
            if (CarbonUtil.isFileExists(path)) {
                stream = fs.append(pt, bufferSize);
            } else {
                stream = fs.create(pt, true, bufferSize);
            }
        } else {
            stream = fs.create(pt, true, bufferSize);
        }
        return stream;
    default:
        path = getUpdatedFilePath(path, fileType);
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.delegate.HadoopIgfsSecondaryFileSystemDelegateImpl.java

License:Apache License

/** {@inheritDoc} */
@Override/*  ww w.j  av a2  s .  c om*/
public OutputStream append(IgfsPath path, int bufSize, boolean create, @Nullable Map<String, String> props) {
    try {
        Path hadoopPath = convert(path);

        FileSystem fs = fileSystemForUser();

        if (create && !fs.exists(hadoopPath))
            return fs.create(hadoopPath, false, bufSize);
        else
            return fs.append(convert(path), bufSize);
    } catch (IOException e) {
        throw handleSecondaryFsError(e, "Failed to append file [path=" + path + ", bufSize=" + bufSize + "]");
    }
}

From source file:org.apache.nifi.processors.hadoop.PutHDFS.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;/* w ww.  j a va  2s .c  om*/
    }

    final FileSystem hdfs = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || hdfs == null || ugi == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
        return;
    }

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            Path tempDotCopyFile = null;
            FlowFile putFlowFile = flowFile;
            try {
                final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile)
                        .getValue();
                final Path configuredRootDirPath = new Path(dirValue);

                final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();

                final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
                final long blockSize = blockSizeProp != null ? blockSizeProp.longValue()
                        : hdfs.getDefaultBlockSize(configuredRootDirPath);

                final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
                final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue()
                        : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);

                final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
                final short replication = replicationProp != null ? replicationProp.shortValue()
                        : hdfs.getDefaultReplication(configuredRootDirPath);

                final CompressionCodec codec = getCompressionCodec(context, configuration);

                final String filename = codec != null
                        ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension()
                        : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());

                final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
                final Path copyFile = new Path(configuredRootDirPath, filename);

                // Create destination directory if it does not exist
                try {
                    if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
                        throw new IOException(
                                configuredRootDirPath.toString() + " already exists and is not a directory");
                    }
                } catch (FileNotFoundException fe) {
                    if (!hdfs.mkdirs(configuredRootDirPath)) {
                        throw new IOException(configuredRootDirPath.toString() + " could not be created");
                    }
                    changeOwner(context, hdfs, configuredRootDirPath, flowFile);
                }

                final boolean destinationExists = hdfs.exists(copyFile);

                // If destination file already exists, resolve that based on processor configuration
                if (destinationExists) {
                    switch (conflictResponse) {
                    case REPLACE_RESOLUTION:
                        if (hdfs.delete(copyFile, false)) {
                            getLogger().info("deleted {} in order to replace with the contents of {}",
                                    new Object[] { copyFile, putFlowFile });
                        }
                        break;
                    case IGNORE_RESOLUTION:
                        session.transfer(putFlowFile, REL_SUCCESS);
                        getLogger().info(
                                "transferring {} to success because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    case FAIL_RESOLUTION:
                        session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                        getLogger().warn(
                                "penalizing {} and routing to failure because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    default:
                        break;
                    }
                }

                // Write FlowFile to temp file on HDFS
                final StopWatch stopWatch = new StopWatch(true);
                session.read(putFlowFile, new InputStreamCallback() {

                    @Override
                    public void process(InputStream in) throws IOException {
                        OutputStream fos = null;
                        Path createdFile = null;
                        try {
                            if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
                                fos = hdfs.append(copyFile, bufferSize);
                            } else {
                                fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
                            }
                            if (codec != null) {
                                fos = codec.createOutputStream(fos);
                            }
                            createdFile = tempCopyFile;
                            BufferedInputStream bis = new BufferedInputStream(in);
                            StreamUtils.copy(bis, fos);
                            bis = null;
                            fos.flush();
                        } finally {
                            try {
                                if (fos != null) {
                                    fos.close();
                                }
                            } catch (RemoteException re) {
                                // when talking to remote HDFS clusters, we don't notice problems until fos.close()
                                if (createdFile != null) {
                                    try {
                                        hdfs.delete(createdFile, false);
                                    } catch (Throwable ignore) {
                                    }
                                }
                                throw re;
                            } catch (Throwable ignore) {
                            }
                            fos = null;
                        }
                    }

                });
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                tempDotCopyFile = tempCopyFile;

                if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue())
                        || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
                    boolean renamed = false;
                    for (int i = 0; i < 10; i++) { // try to rename multiple times.
                        if (hdfs.rename(tempCopyFile, copyFile)) {
                            renamed = true;
                            break;// rename was successful
                        }
                        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
                    }
                    if (!renamed) {
                        hdfs.delete(tempCopyFile, false);
                        throw new ProcessException("Copied file to HDFS but could not rename dot file "
                                + tempCopyFile + " to its final filename");
                    }

                    changeOwner(context, hdfs, copyFile, flowFile);
                }

                getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}",
                        new Object[] { putFlowFile, copyFile, millis, dataRate });

                final String newFilename = copyFile.getName();
                final String hdfsPath = copyFile.getParent().toString();
                putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
                putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
                session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());

                session.transfer(putFlowFile, REL_SUCCESS);

            } catch (final Throwable t) {
                if (tempDotCopyFile != null) {
                    try {
                        hdfs.delete(tempDotCopyFile, false);
                    } catch (Exception e) {
                        getLogger().error("Unable to remove temporary file {} due to {}",
                                new Object[] { tempDotCopyFile, e });
                    }
                }
                getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                context.yield();
            }

            return null;
        }
    });
}