Example usage for org.apache.hadoop.fs FileSystem getDefaultReplication

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getDefaultReplication.

Prototype

public short getDefaultReplication(Path path)

Source Link

Document

Get the default replication for a path.

Usage

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

/**
 * @param configuration Hadoop configuration
 * @param schema the schema of the data/*  w w  w .  j a va2s  .c  o m*/
 * @param file the file to write to
 * @param mode file creation mode
 * @param rowGroupSize the row group size
 * @throws IOException if the file can not be created
 */
public ParquetFileWriter(Configuration configuration, MessageType schema, Path file, Mode mode,
        long rowGroupSize, int maxPaddingSize) throws IOException {
    TypeUtil.checkValidWriteSchema(schema);
    this.schema = schema;
    FileSystem fs = file.getFileSystem(configuration);
    boolean overwriteFlag = (mode == Mode.OVERWRITE);
    if (supportsBlockSize(fs)) {
        // use the default block size, unless row group size is larger
        long dfsBlockSize = Math.max(fs.getDefaultBlockSize(file), rowGroupSize);

        this.alignment = PaddingAlignment.get(dfsBlockSize, rowGroupSize, maxPaddingSize);
        this.out = fs.create(file, overwriteFlag, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file),
                dfsBlockSize);

    } else {
        this.alignment = NoAlignment.get(rowGroupSize);
        this.out = fs.create(file, overwriteFlag);
    }
}

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

/**
 * FOR TESTING ONLY.// w  w  w .j  a  v  a2 s  . c o  m
 *
 * @param configuration Hadoop configuration
 * @param schema the schema of the data
 * @param file the file to write to
 * @param rowAndBlockSize the row group size
 * @throws IOException if the file can not be created
 */
ParquetFileWriter(Configuration configuration, MessageType schema, Path file, long rowAndBlockSize,
        int maxPaddingSize) throws IOException {
    FileSystem fs = file.getFileSystem(configuration);
    this.schema = schema;
    this.alignment = PaddingAlignment.get(rowAndBlockSize, rowAndBlockSize, maxPaddingSize);
    this.out = fs.create(file, true, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), rowAndBlockSize);
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.AbstractHadoopProcessor.java

License:Apache License

/**
 * Reset Hadoop Configuration and FileSystem based on the supplied configuration resources.
 *
 * @param configResources for configuration
 * @param dir             the target directory
 * @param context         for context, which gives access to the principal
 * @return An HdfsResources object//from   w  w w  . j  a  v a 2s. c  om
 * @throws IOException if unable to access HDFS
 */
HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context)
        throws IOException {
    // org.apache.hadoop.conf.Configuration saves its current thread context class loader to use for threads that it creates
    // later to do I/O. We need this class loader to be the NarClassLoader instead of the magical
    // NarThreadContextClassLoader.
    ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());

    try {
        Configuration config = getConfigurationFromResources(configResources);

        // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout
        checkHdfsUriForTimeout(config);

        // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete
        // restart
        String disableCacheName = String.format("fs.%s.impl.disable.cache",
                FileSystem.getDefaultUri(config).getScheme());
        config.set(disableCacheName, "true");

        // If kerberos is enabled, create the file system as the kerberos principal
        // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time
        FileSystem fs = null;
        UserGroupInformation ugi = null;
        synchronized (RESOURCES_LOCK) {
            if (config.get("hadoop.security.authentication").equalsIgnoreCase("kerberos")) {
                String principal = context.getProperty(kerberosPrincipal).getValue();
                String keyTab = context.getProperty(kerberosKeytab).getValue();
                UserGroupInformation.setConfiguration(config);
                ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keyTab);
                modifyConfig(context, config);
                fs = getFileSystemAsUser(config, ugi);
                lastKerberosReloginTime = System.currentTimeMillis() / 1000;
            } else {
                config.set("ipc.client.fallback-to-simple-auth-allowed", "true");
                config.set("hadoop.security.authentication", "simple");
                modifyConfig(context, config);
                fs = getFileSystem(config);
            }
        }
        getLog().info(
                "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}",
                new Object[] { fs.getWorkingDirectory(), fs.getDefaultBlockSize(new Path(dir)),
                        fs.getDefaultReplication(new Path(dir)), config.toString() });
        return new HdfsResources(config, fs, ugi);
    } finally {
        Thread.currentThread().setContextClassLoader(savedClassLoader);
    }
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static Short getDefaultReplication(FileSystem fs, Path path) {
    return fs.getDefaultReplication(path);
}

From source file:gobblin.util.SerializationUtils.java

License:Apache License

/**
 * Serialize a {@link State} instance to a file.
 *
 * @param fs the {@link FileSystem} instance for creating the file
 * @param jobStateFilePath the path to the file
 * @param state the {@link State} to serialize
 * @param <T> the {@link State} object type
 * @throws IOException if it fails to serialize the {@link State} instance
 *//*from   w  ww  . ja va  2s .co m*/
public static <T extends State> void serializeState(FileSystem fs, Path jobStateFilePath, T state)
        throws IOException {
    serializeState(fs, jobStateFilePath, state, fs.getDefaultReplication(jobStateFilePath));
}

From source file:org.apache.carbondata.core.datastorage.store.impl.FileFactory.java

License:Apache License

public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize,
        long blockSize) throws IOException {
    path = path.replace("\\", "/");
    switch (fileType) {
    case LOCAL:/*from  w w  w  . j  a  v  a  2  s  .  c  o  m*/
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    case HDFS:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        FSDataOutputStream stream = fs.create(pt, true, bufferSize, fs.getDefaultReplication(pt), blockSize);
        return stream;
    default:
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    }
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataOutputStream getDataOutputStream(String path, FileType fileType, int bufferSize,
        long blockSize) throws IOException {
    path = path.replace("\\", "/");
    switch (fileType) {
    case LOCAL:/*from  w  w  w  .j  a  v  a2  s  .  c o  m*/
        path = getUpdatedFilePath(path, fileType);
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        FSDataOutputStream stream = fs.create(pt, true, bufferSize, fs.getDefaultReplication(pt), blockSize);
        return stream;
    default:
        path = getUpdatedFilePath(path, fileType);
        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(path), bufferSize));
    }
}

From source file:org.apache.gobblin.data.management.copy.CopyableFile.java

License:Apache License

/**
 * @return desired replication for destination file.
 *///from  w  w  w .  j a  v  a  2 s  . c o m
public short getReplication(FileSystem targetFs) {
    return getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? getOrigin().getReplication()
            : targetFs.getDefaultReplication(this.destination);
}

From source file:org.apache.nifi.processors.hadoop.AbstractHadoopProcessor.java

License:Apache License

HdfsResources resetHDFSResources(String configResources, ProcessContext context) throws IOException {
    Configuration config = new ExtendedConfiguration(getLogger());
    config.setClassLoader(Thread.currentThread().getContextClassLoader());

    getConfigurationFromResources(config, configResources);

    // give sub-classes a chance to process configuration
    preProcessConfiguration(config, context);

    // first check for timeout on HDFS connection, because FileSystem has a hard coded 15 minute timeout
    checkHdfsUriForTimeout(config);/*from  w  w  w .j  a v a 2s.  c o m*/

    // disable caching of Configuration and FileSystem objects, else we cannot reconfigure the processor without a complete
    // restart
    String disableCacheName = String.format("fs.%s.impl.disable.cache",
            FileSystem.getDefaultUri(config).getScheme());
    config.set(disableCacheName, "true");

    // If kerberos is enabled, create the file system as the kerberos principal
    // -- use RESOURCE_LOCK to guarantee UserGroupInformation is accessed by only a single thread at at time
    FileSystem fs;
    UserGroupInformation ugi;
    synchronized (RESOURCES_LOCK) {
        if (SecurityUtil.isSecurityEnabled(config)) {
            String principal = context.getProperty(kerberosProperties.getKerberosPrincipal())
                    .evaluateAttributeExpressions().getValue();
            String keyTab = context.getProperty(kerberosProperties.getKerberosKeytab())
                    .evaluateAttributeExpressions().getValue();
            ugi = SecurityUtil.loginKerberos(config, principal, keyTab);
            fs = getFileSystemAsUser(config, ugi);
        } else {
            config.set("ipc.client.fallback-to-simple-auth-allowed", "true");
            config.set("hadoop.security.authentication", "simple");
            ugi = SecurityUtil.loginSimple(config);
            fs = getFileSystemAsUser(config, ugi);
        }
    }
    getLogger().debug("resetHDFSResources UGI {}", new Object[] { ugi });

    final Path workingDir = fs.getWorkingDirectory();
    getLogger().info(
            "Initialized a new HDFS File System with working dir: {} default block size: {} default replication: {} config: {}",
            new Object[] { workingDir, fs.getDefaultBlockSize(workingDir), fs.getDefaultReplication(workingDir),
                    config.toString() });

    return new HdfsResources(config, fs, ugi);
}

From source file:org.apache.nifi.processors.hadoop.PutHDFS.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;//  www  .j  a  v a 2 s.  com
    }

    final FileSystem hdfs = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || hdfs == null || ugi == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
        return;
    }

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            Path tempDotCopyFile = null;
            FlowFile putFlowFile = flowFile;
            try {
                final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile)
                        .getValue();
                final Path configuredRootDirPath = new Path(dirValue);

                final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();

                final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
                final long blockSize = blockSizeProp != null ? blockSizeProp.longValue()
                        : hdfs.getDefaultBlockSize(configuredRootDirPath);

                final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
                final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue()
                        : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);

                final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
                final short replication = replicationProp != null ? replicationProp.shortValue()
                        : hdfs.getDefaultReplication(configuredRootDirPath);

                final CompressionCodec codec = getCompressionCodec(context, configuration);

                final String filename = codec != null
                        ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension()
                        : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());

                final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
                final Path copyFile = new Path(configuredRootDirPath, filename);

                // Create destination directory if it does not exist
                try {
                    if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
                        throw new IOException(
                                configuredRootDirPath.toString() + " already exists and is not a directory");
                    }
                } catch (FileNotFoundException fe) {
                    if (!hdfs.mkdirs(configuredRootDirPath)) {
                        throw new IOException(configuredRootDirPath.toString() + " could not be created");
                    }
                    changeOwner(context, hdfs, configuredRootDirPath, flowFile);
                }

                final boolean destinationExists = hdfs.exists(copyFile);

                // If destination file already exists, resolve that based on processor configuration
                if (destinationExists) {
                    switch (conflictResponse) {
                    case REPLACE_RESOLUTION:
                        if (hdfs.delete(copyFile, false)) {
                            getLogger().info("deleted {} in order to replace with the contents of {}",
                                    new Object[] { copyFile, putFlowFile });
                        }
                        break;
                    case IGNORE_RESOLUTION:
                        session.transfer(putFlowFile, REL_SUCCESS);
                        getLogger().info(
                                "transferring {} to success because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    case FAIL_RESOLUTION:
                        session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                        getLogger().warn(
                                "penalizing {} and routing to failure because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    default:
                        break;
                    }
                }

                // Write FlowFile to temp file on HDFS
                final StopWatch stopWatch = new StopWatch(true);
                session.read(putFlowFile, new InputStreamCallback() {

                    @Override
                    public void process(InputStream in) throws IOException {
                        OutputStream fos = null;
                        Path createdFile = null;
                        try {
                            if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
                                fos = hdfs.append(copyFile, bufferSize);
                            } else {
                                fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
                            }
                            if (codec != null) {
                                fos = codec.createOutputStream(fos);
                            }
                            createdFile = tempCopyFile;
                            BufferedInputStream bis = new BufferedInputStream(in);
                            StreamUtils.copy(bis, fos);
                            bis = null;
                            fos.flush();
                        } finally {
                            try {
                                if (fos != null) {
                                    fos.close();
                                }
                            } catch (RemoteException re) {
                                // when talking to remote HDFS clusters, we don't notice problems until fos.close()
                                if (createdFile != null) {
                                    try {
                                        hdfs.delete(createdFile, false);
                                    } catch (Throwable ignore) {
                                    }
                                }
                                throw re;
                            } catch (Throwable ignore) {
                            }
                            fos = null;
                        }
                    }

                });
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                tempDotCopyFile = tempCopyFile;

                if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue())
                        || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
                    boolean renamed = false;
                    for (int i = 0; i < 10; i++) { // try to rename multiple times.
                        if (hdfs.rename(tempCopyFile, copyFile)) {
                            renamed = true;
                            break;// rename was successful
                        }
                        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
                    }
                    if (!renamed) {
                        hdfs.delete(tempCopyFile, false);
                        throw new ProcessException("Copied file to HDFS but could not rename dot file "
                                + tempCopyFile + " to its final filename");
                    }

                    changeOwner(context, hdfs, copyFile, flowFile);
                }

                getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}",
                        new Object[] { putFlowFile, copyFile, millis, dataRate });

                final String newFilename = copyFile.getName();
                final String hdfsPath = copyFile.getParent().toString();
                putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
                putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
                session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());

                session.transfer(putFlowFile, REL_SUCCESS);

            } catch (final Throwable t) {
                if (tempDotCopyFile != null) {
                    try {
                        hdfs.delete(tempDotCopyFile, false);
                    } catch (Exception e) {
                        getLogger().error("Unable to remove temporary file {} due to {}",
                                new Object[] { tempDotCopyFile, e });
                    }
                }
                getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                context.yield();
            }

            return null;
        }
    });
}