Example usage for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.apache.kylin.storage.hbase.util.IIDeployCoprocessorCLI.java

License:Apache License

private static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem,
        Set<String> oldJarPaths) throws IOException {
    Path uploadPath = null;/*from www.j  ava2  s.  c  o m*/
    File localCoprocessorFile = new File(localCoprocessorJar);

    // check existing jars
    if (oldJarPaths == null) {
        oldJarPaths = new HashSet<String>();
    }
    Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv());
    for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) {
        if (isSame(localCoprocessorFile, fileStatus)) {
            uploadPath = fileStatus.getPath();
            break;
        }
        String filename = fileStatus.getPath().toString();
        if (filename.endsWith(".jar")) {
            oldJarPaths.add(filename);
        }
    }

    // upload if not existing
    if (uploadPath == null) {
        // figure out a unique new jar file name
        Set<String> oldJarNames = new HashSet<String>();
        for (String path : oldJarPaths) {
            oldJarNames.add(new Path(path).getName());
        }
        String baseName = getBaseFileName(localCoprocessorJar);
        String newName = null;
        int i = 0;
        while (newName == null) {
            newName = baseName + "-" + (i++) + ".jar";
            if (oldJarNames.contains(newName))
                newName = null;
        }

        // upload
        uploadPath = new Path(coprocessorDir, newName);
        FileInputStream in = null;
        FSDataOutputStream out = null;
        try {
            in = new FileInputStream(localCoprocessorFile);
            out = fileSystem.create(uploadPath);
            IOUtils.copy(in, out);
        } finally {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out);
        }

        fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), -1);

    }

    uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null);
    return uploadPath;
}

From source file:org.apache.metron.maas.service.Client.java

License:Apache License

/**
 * Main run function for the client// w  ww. j  av  a2s. c o m
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    Path ajPath = addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources,
            null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    if (domainId != null && domainId.length() > 0) {
        env.put(Constants.TIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add(ApplicationMaster.AMOptions.toArgs(ApplicationMaster.AMOptions.ZK_QUORUM.of(zkQuorum),
            ApplicationMaster.AMOptions.ZK_ROOT.of(zkRoot),
            ApplicationMaster.AMOptions.APP_JAR_PATH.of(ajPath.toString())));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.apache.nifi.processors.hadoop.AbstractFetchHDFSRecord.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error(/*ww w  .j  av  a 2  s . c o  m*/
                "Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }

    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        context.yield();
        return;
    }

    ugi.doAs((PrivilegedAction<Object>) () -> {
        FlowFile child = null;
        final String filenameValue = context.getProperty(FILENAME)
                .evaluateAttributeExpressions(originalFlowFile).getValue();
        try {
            final Path path = new Path(filenameValue);
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();

            final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER)
                    .asControllerService(RecordSetWriterFactory.class);

            final StopWatch stopWatch = new StopWatch(true);

            // use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
            child = session.create(originalFlowFile);

            final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
            child = session.write(child, (final OutputStream rawOut) -> {
                try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
                        final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile,
                                configuration, path)) {

                    Record record = recordReader.nextRecord();
                    final RecordSchema schema = recordSetWriterFactory.getSchema(
                            originalFlowFile.getAttributes(), record == null ? null : record.getSchema());

                    try (final RecordSetWriter recordSetWriter = recordSetWriterFactory
                            .createWriter(getLogger(), schema, out)) {
                        recordSetWriter.beginRecordSet();
                        if (record != null) {
                            recordSetWriter.write(record);
                        }

                        while ((record = recordReader.nextRecord()) != null) {
                            recordSetWriter.write(record);
                        }

                        writeResult.set(recordSetWriter.finishRecordSet());
                        mimeTypeRef.set(recordSetWriter.getMimeType());
                    }
                } catch (Exception e) {
                    exceptionHolder.set(e);
                }
            });

            stopWatch.stop();

            // if any errors happened within the session.write then throw the exception so we jump
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }

            FlowFile successFlowFile = postProcess(context, session, child, path);

            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            successFlowFile = session.putAllAttributes(successFlowFile, attributes);

            final Path qualifiedPath = path.makeQualified(fileSystem.getUri(),
                    fileSystem.getWorkingDirectory());
            getLogger().info("Successfully received content from {} for {} in {} milliseconds",
                    new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
            session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(),
                    stopWatch.getDuration(TimeUnit.MILLISECONDS));
            session.transfer(successFlowFile, REL_SUCCESS);
            session.remove(originalFlowFile);
            return null;

        } catch (final FileNotFoundException | AccessControlException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                    new Object[] { filenameValue, originalFlowFile, e });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR,
                    e.getMessage() == null ? e.toString() : e.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        } catch (final IOException | FlowFileAccessException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry",
                    new Object[] { filenameValue, originalFlowFile, e });
            session.transfer(session.penalize(originalFlowFile), REL_RETRY);
            context.yield();
        } catch (final Throwable t) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                    new Object[] { filenameValue, originalFlowFile, t });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR,
                    t.getMessage() == null ? t.toString() : t.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        }

        // if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
        if (child != null) {
            session.remove(child);
        }

        return null;
    });

}

From source file:org.apache.nifi.processors.hadoop.AbstractPutHDFSRecord.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error(//from  w ww  . j ava2 s.c o m
                "Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }

    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        context.yield();
        return;
    }

    ugi.doAs((PrivilegedAction<Object>) () -> {
        Path tempDotCopyFile = null;
        FlowFile putFlowFile = flowFile;
        try {
            final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); // TODO codec extension
            final String directoryValue = context.getProperty(DIRECTORY)
                    .evaluateAttributeExpressions(putFlowFile).getValue();

            // create the directory if it doesn't exist
            final Path directoryPath = new Path(directoryValue);
            createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);

            // write to tempFile first and on success rename to destFile
            final Path tempFile = new Path(directoryPath, "." + filenameValue);
            final Path destFile = new Path(directoryPath, filenameValue);

            final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
            final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();

            // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
            if (destinationExists && !shouldOverwrite) {
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                getLogger().warn(
                        "penalizing {} and routing to failure because file with same name already exists",
                        new Object[] { putFlowFile });
                return null;
            }

            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER)
                    .asControllerService(RecordReaderFactory.class);

            final FlowFile flowFileIn = putFlowFile;
            final StopWatch stopWatch = new StopWatch(true);

            // Read records from the incoming FlowFile and write them the tempFile
            session.read(putFlowFile, (final InputStream rawIn) -> {
                RecordReader recordReader = null;
                HDFSRecordWriter recordWriter = null;

                try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {

                    // if we fail to create the RecordReader then we want to route to failure, so we need to
                    // handle this separately from the other IOExceptions which normally route to retry
                    try {
                        recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
                    } catch (Exception e) {
                        final RecordReaderFactoryException rrfe = new RecordReaderFactoryException(
                                "Unable to create RecordReader", e);
                        exceptionHolder.set(rrfe);
                        return;
                    }

                    final RecordSet recordSet = recordReader.createRecordSet();

                    recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile,
                            recordReader.getSchema());
                    writeResult.set(recordWriter.write(recordSet));
                } catch (Exception e) {
                    exceptionHolder.set(e);
                } finally {
                    IOUtils.closeQuietly(recordReader);
                    IOUtils.closeQuietly(recordWriter);
                }
            });
            stopWatch.stop();

            final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            tempDotCopyFile = tempFile;

            // if any errors happened within the session.read then throw the exception so we jump
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }

            // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
            rename(fileSystem, tempFile, destFile);
            changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);

            getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}",
                    new Object[] { putFlowFile, destFile, millis, dataRate });

            putFlowFile = postProcess(context, session, putFlowFile, destFile);

            final String newFilename = destFile.getName();
            final String hdfsPath = destFile.getParent().toString();

            // Update the filename and absolute path attributes
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(CoreAttributes.FILENAME.key(), newFilename);
            attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            putFlowFile = session.putAllAttributes(putFlowFile, attributes);

            // Send a provenance event and transfer to success
            final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(),
                    fileSystem.getWorkingDirectory());
            session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
            session.transfer(putFlowFile, REL_SUCCESS);

        } catch (IOException | FlowFileAccessException e) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { e });
            session.transfer(session.penalize(putFlowFile), REL_RETRY);
            context.yield();
        } catch (Throwable t) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { t });
            session.transfer(putFlowFile, REL_FAILURE);
        }

        return null;
    });
}

From source file:org.apache.nifi.processors.hadoop.DeleteHDFS.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile originalFlowFile = session.get();

    // If this processor has an incoming connection, then do not run unless a
    // FlowFile is actually sent through
    if (originalFlowFile == null && context.hasIncomingConnection()) {
        context.yield();//  w w  w  . ja v  a  2s.  co m
        return;
    }

    // We need a FlowFile to report provenance correctly.
    FlowFile flowFile = originalFlowFile != null ? originalFlowFile : session.create();

    final String fileOrDirectoryName = context.getProperty(FILE_OR_DIRECTORY)
            .evaluateAttributeExpressions(flowFile).getValue();

    final FileSystem fileSystem = getFileSystem();
    try {
        // Check if the user has supplied a file or directory pattern
        List<Path> pathList = Lists.newArrayList();
        if (GLOB_MATCHER.reset(fileOrDirectoryName).find()) {
            FileStatus[] fileStatuses = fileSystem.globStatus(new Path(fileOrDirectoryName));
            if (fileStatuses != null) {
                for (FileStatus fileStatus : fileStatuses) {
                    pathList.add(fileStatus.getPath());
                }
            }
        } else {
            pathList.add(new Path(fileOrDirectoryName));
        }

        int failedPath = 0;
        for (Path path : pathList) {
            if (fileSystem.exists(path)) {
                try {
                    Map<String, String> attributes = Maps.newHashMapWithExpectedSize(2);
                    attributes.put("hdfs.filename", path.getName());
                    attributes.put("hdfs.path", path.getParent().toString());
                    flowFile = session.putAllAttributes(flowFile, attributes);

                    fileSystem.delete(path, context.getProperty(RECURSIVE).asBoolean());
                    getLogger().debug("For flowfile {} Deleted file at path {} with name {}",
                            new Object[] { originalFlowFile, path.getParent().toString(), path.getName() });
                    final Path qualifiedPath = path.makeQualified(fileSystem.getUri(),
                            fileSystem.getWorkingDirectory());
                    session.getProvenanceReporter().invokeRemoteProcess(flowFile, qualifiedPath.toString());
                } catch (IOException ioe) {
                    // One possible scenario is that the IOException is permissions based, however it would be impractical to check every possible
                    // external HDFS authorization tool (Ranger, Sentry, etc). Local ACLs could be checked but the operation would be expensive.
                    getLogger().warn("Failed to delete file or directory", ioe);

                    Map<String, String> attributes = Maps.newHashMapWithExpectedSize(1);
                    // The error message is helpful in understanding at a flowfile level what caused the IOException (which ACL is denying the operation, e.g.)
                    attributes.put("hdfs.error.message", ioe.getMessage());

                    session.transfer(session.putAllAttributes(session.clone(flowFile), attributes),
                            REL_FAILURE);
                    failedPath++;
                }
            }
        }

        if (failedPath == 0) {
            session.transfer(flowFile, DeleteHDFS.REL_SUCCESS);
        } else {
            // If any path has been failed to be deleted, remove the FlowFile as it's been cloned and sent to failure.
            session.remove(flowFile);
        }
    } catch (IOException e) {
        getLogger().error("Error processing delete for flowfile {} due to {}",
                new Object[] { flowFile, e.getMessage() }, e);
        session.transfer(flowFile, DeleteHDFS.REL_FAILURE);
    }

}

From source file:org.apache.nifi.processors.hadoop.FetchHDFS.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();/*ww w. j a va2 s  .  com*/
    if (flowFile == null) {
        return;
    }

    final FileSystem hdfs = getFileSystem();
    final UserGroupInformation ugi = getUserGroupInformation();
    final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile)
            .getValue();

    final Path path;
    try {
        path = new Path(filenameValue);
    } catch (IllegalArgumentException e) {
        getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                new Object[] { filenameValue, flowFile, e });
        flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }

    final StopWatch stopWatch = new StopWatch(true);
    final FlowFile finalFlowFile = flowFile;

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            InputStream stream = null;
            CompressionCodec codec = null;
            Configuration conf = getConfiguration();
            final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
            final CompressionType compressionType = CompressionType
                    .valueOf(context.getProperty(COMPRESSION_CODEC).toString());
            final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC;

            if (inferCompressionCodec) {
                codec = compressionCodecFactory.getCodec(path);
            } else if (compressionType != CompressionType.NONE) {
                codec = getCompressionCodec(context, getConfiguration());
            }

            FlowFile flowFile = finalFlowFile;
            final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
            try {
                final String outputFilename;
                final String originalFilename = path.getName();
                stream = hdfs.open(path, 16384);

                // Check if compression codec is defined (inferred or otherwise)
                if (codec != null) {
                    stream = codec.createInputStream(stream);
                    outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension());
                } else {
                    outputFilename = originalFilename;
                }

                flowFile = session.importFrom(stream, finalFlowFile);
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename);

                stopWatch.stop();
                getLogger().info("Successfully received content from {} for {} in {}",
                        new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() });
                session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(),
                        stopWatch.getDuration(TimeUnit.MILLISECONDS));
                session.transfer(flowFile, REL_SUCCESS);
            } catch (final FileNotFoundException | AccessControlException e) {
                getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure",
                        new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage());
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_FAILURE);
            } catch (final IOException e) {
                getLogger().error(
                        "Failed to retrieve content from {} for {} due to {}; routing to comms.failure",
                        new Object[] { qualifiedPath, flowFile, e });
                flowFile = session.penalize(flowFile);
                session.transfer(flowFile, REL_COMMS_FAILURE);
            } finally {
                IOUtils.closeQuietly(stream);
            }

            return null;
        }
    });

}

From source file:org.apache.nifi.processors.hadoop.PutHDFS.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;//w  w w .  ja v a 2s .  c  o m
    }

    final FileSystem hdfs = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (configuration == null || hdfs == null || ugi == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
        return;
    }

    ugi.doAs(new PrivilegedAction<Object>() {
        @Override
        public Object run() {
            Path tempDotCopyFile = null;
            FlowFile putFlowFile = flowFile;
            try {
                final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile)
                        .getValue();
                final Path configuredRootDirPath = new Path(dirValue);

                final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();

                final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
                final long blockSize = blockSizeProp != null ? blockSizeProp.longValue()
                        : hdfs.getDefaultBlockSize(configuredRootDirPath);

                final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
                final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue()
                        : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);

                final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
                final short replication = replicationProp != null ? replicationProp.shortValue()
                        : hdfs.getDefaultReplication(configuredRootDirPath);

                final CompressionCodec codec = getCompressionCodec(context, configuration);

                final String filename = codec != null
                        ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension()
                        : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());

                final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
                final Path copyFile = new Path(configuredRootDirPath, filename);

                // Create destination directory if it does not exist
                try {
                    if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
                        throw new IOException(
                                configuredRootDirPath.toString() + " already exists and is not a directory");
                    }
                } catch (FileNotFoundException fe) {
                    if (!hdfs.mkdirs(configuredRootDirPath)) {
                        throw new IOException(configuredRootDirPath.toString() + " could not be created");
                    }
                    changeOwner(context, hdfs, configuredRootDirPath, flowFile);
                }

                final boolean destinationExists = hdfs.exists(copyFile);

                // If destination file already exists, resolve that based on processor configuration
                if (destinationExists) {
                    switch (conflictResponse) {
                    case REPLACE_RESOLUTION:
                        if (hdfs.delete(copyFile, false)) {
                            getLogger().info("deleted {} in order to replace with the contents of {}",
                                    new Object[] { copyFile, putFlowFile });
                        }
                        break;
                    case IGNORE_RESOLUTION:
                        session.transfer(putFlowFile, REL_SUCCESS);
                        getLogger().info(
                                "transferring {} to success because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    case FAIL_RESOLUTION:
                        session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                        getLogger().warn(
                                "penalizing {} and routing to failure because file with same name already exists",
                                new Object[] { putFlowFile });
                        return null;
                    default:
                        break;
                    }
                }

                // Write FlowFile to temp file on HDFS
                final StopWatch stopWatch = new StopWatch(true);
                session.read(putFlowFile, new InputStreamCallback() {

                    @Override
                    public void process(InputStream in) throws IOException {
                        OutputStream fos = null;
                        Path createdFile = null;
                        try {
                            if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
                                fos = hdfs.append(copyFile, bufferSize);
                            } else {
                                fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
                            }
                            if (codec != null) {
                                fos = codec.createOutputStream(fos);
                            }
                            createdFile = tempCopyFile;
                            BufferedInputStream bis = new BufferedInputStream(in);
                            StreamUtils.copy(bis, fos);
                            bis = null;
                            fos.flush();
                        } finally {
                            try {
                                if (fos != null) {
                                    fos.close();
                                }
                            } catch (RemoteException re) {
                                // when talking to remote HDFS clusters, we don't notice problems until fos.close()
                                if (createdFile != null) {
                                    try {
                                        hdfs.delete(createdFile, false);
                                    } catch (Throwable ignore) {
                                    }
                                }
                                throw re;
                            } catch (Throwable ignore) {
                            }
                            fos = null;
                        }
                    }

                });
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                tempDotCopyFile = tempCopyFile;

                if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue())
                        || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
                    boolean renamed = false;
                    for (int i = 0; i < 10; i++) { // try to rename multiple times.
                        if (hdfs.rename(tempCopyFile, copyFile)) {
                            renamed = true;
                            break;// rename was successful
                        }
                        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
                    }
                    if (!renamed) {
                        hdfs.delete(tempCopyFile, false);
                        throw new ProcessException("Copied file to HDFS but could not rename dot file "
                                + tempCopyFile + " to its final filename");
                    }

                    changeOwner(context, hdfs, copyFile, flowFile);
                }

                getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}",
                        new Object[] { putFlowFile, copyFile, millis, dataRate });

                final String newFilename = copyFile.getName();
                final String hdfsPath = copyFile.getParent().toString();
                putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
                putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
                session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());

                session.transfer(putFlowFile, REL_SUCCESS);

            } catch (final Throwable t) {
                if (tempDotCopyFile != null) {
                    try {
                        hdfs.delete(tempDotCopyFile, false);
                    } catch (Exception e) {
                        getLogger().error("Unable to remove temporary file {} due to {}",
                                new Object[] { tempDotCopyFile, e });
                    }
                }
                getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                context.yield();
            }

            return null;
        }
    });
}

From source file:org.apache.nifi.processors.hadoop.PutHDFSTest.java

License:Apache License

@Test
public void testPutFileWithException() throws IOException {
    // Refer to comment in the BeforeClass method for an explanation
    assumeTrue(isNotWindows());/*from ww  w . j a  v  a  2  s  . c o m*/

    String dirName = "target/testPutFileWrongPermissions";
    File file = new File(dirName);
    file.mkdirs();
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    Path p = new Path(dirName).makeQualified(fs.getUri(), fs.getWorkingDirectory());

    final KerberosProperties testKerberosProperties = kerberosProperties;
    TestRunner runner = TestRunners.newTestRunner(new PutHDFS() {
        @Override
        protected void changeOwner(ProcessContext context, FileSystem hdfs, Path name, FlowFile flowFile) {
            throw new ProcessException("Forcing Exception to get thrown in order to verify proper handling");
        }

        @Override
        protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
            return testKerberosProperties;
        }
    });
    runner.setProperty(PutHDFS.DIRECTORY, dirName);
    runner.setProperty(PutHDFS.CONFLICT_RESOLUTION, "replace");

    try (FileInputStream fis = new FileInputStream("src/test/resources/testdata/randombytes-1");) {
        Map<String, String> attributes = new HashMap<String, String>();
        attributes.put(CoreAttributes.FILENAME.key(), "randombytes-1");
        runner.enqueue(fis, attributes);
        runner.run();
    }

    List<MockFlowFile> failedFlowFiles = runner
            .getFlowFilesForRelationship(new Relationship.Builder().name("failure").build());
    assertFalse(failedFlowFiles.isEmpty());
    assertTrue(failedFlowFiles.get(0).isPenalized());

    fs.delete(p, true);
}

From source file:org.apache.oozie.action.hadoop.HadoopUriFinder.java

License:Apache License

/**
 * Spark compares URIs based on scheme, host and port. Here we convert URIs
 * into the default format so that Spark won't think those belong to
 * different file system. This will avoid an extra copy of files which
 * already exists on same hdfs./*from  www  . j  ava 2s. c o m*/
 *
 * @param fs
 * @param fileUri
 * @return fixed uri
 * @throws URISyntaxException
 */
static URI getFixedUri(final FileSystem fs, final URI fileUri) throws URISyntaxException {
    if (fs.getUri().getScheme().equals(fileUri.getScheme())
            && (fs.getUri().getHost().equals(fileUri.getHost()) || fileUri.getHost() == null)
            && (fs.getUri().getPort() == -1 || fileUri.getPort() == -1
                    || fs.getUri().getPort() == fileUri.getPort())) {
        return new URI(fs.getUri().getScheme(), fileUri.getUserInfo(), fs.getUri().getHost(),
                fs.getUri().getPort(), fileUri.getPath(), fileUri.getQuery(), fileUri.getFragment());
    }
    return fileUri;
}

From source file:org.apache.oozie.service.TestShareLibService.java

License:Apache License

@Test
public void testShareLibLoadFile() throws Exception {
    services = new Services();
    FileSystem fs = getFileSystem();
    setSystemProps();/*w ww .j a v a 2 s  .  co  m*/
    createTestShareLibMetaFile(fs);
    Configuration conf = services.get(ConfigurationService.class).getConf();
    conf.set(ShareLibService.SHARELIB_MAPPING_FILE, fs.getUri() + "/user/test/config.properties");
    conf.set(ShareLibService.SHIP_LAUNCHER_JAR, "true");
    try {
        services.init();
        ShareLibService shareLibService = Services.get().get(ShareLibService.class);
        assertTrue(
                shareLibService.getShareLibJars("something_new").get(0).getName().endsWith("somethingNew.jar"));
        assertTrue(shareLibService.getShareLibJars("pig").get(0).getName().endsWith("pig.jar"));
        fs.delete(new Path("shareLibPath/"), true);
    } finally {
        services.destroy();
    }
}