List of usage examples for org.apache.hadoop.fs FileSystem getUri
public abstract URI getUri();
From source file:org.apache.kylin.storage.hbase.util.IIDeployCoprocessorCLI.java
License:Apache License
private static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem, Set<String> oldJarPaths) throws IOException { Path uploadPath = null;/*from www.j ava2 s. c o m*/ File localCoprocessorFile = new File(localCoprocessorJar); // check existing jars if (oldJarPaths == null) { oldJarPaths = new HashSet<String>(); } Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv()); for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) { if (isSame(localCoprocessorFile, fileStatus)) { uploadPath = fileStatus.getPath(); break; } String filename = fileStatus.getPath().toString(); if (filename.endsWith(".jar")) { oldJarPaths.add(filename); } } // upload if not existing if (uploadPath == null) { // figure out a unique new jar file name Set<String> oldJarNames = new HashSet<String>(); for (String path : oldJarPaths) { oldJarNames.add(new Path(path).getName()); } String baseName = getBaseFileName(localCoprocessorJar); String newName = null; int i = 0; while (newName == null) { newName = baseName + "-" + (i++) + ".jar"; if (oldJarNames.contains(newName)) newName = null; } // upload uploadPath = new Path(coprocessorDir, newName); FileInputStream in = null; FSDataOutputStream out = null; try { in = new FileInputStream(localCoprocessorFile); out = fileSystem.create(uploadPath); IOUtils.copy(in, out); } finally { IOUtils.closeQuietly(in); IOUtils.closeQuietly(out); } fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), -1); } uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null); return uploadPath; }
From source file:org.apache.metron.maas.service.Client.java
License:Apache License
/** * Main run function for the client// w ww. j av a2s. c o m * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); Path ajPath = addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts if (domainId != null && domainId.length() > 0) { env.put(Constants.TIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add(ApplicationMaster.AMOptions.toArgs(ApplicationMaster.AMOptions.ZK_QUORUM.of(zkQuorum), ApplicationMaster.AMOptions.ZK_ROOT.of(zkRoot), ApplicationMaster.AMOptions.APP_JAR_PATH.of(ajPath.toString()))); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // Monitor the application return monitorApplication(appId); }
From source file:org.apache.nifi.processors.hadoop.AbstractFetchHDFSRecord.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin final FileSystem fileSystem = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || fileSystem == null || ugi == null) { getLogger().error(/*ww w .j av a 2 s . c o m*/ "Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null"); context.yield(); return; } final FlowFile originalFlowFile = session.get(); if (originalFlowFile == null) { context.yield(); return; } ugi.doAs((PrivilegedAction<Object>) () -> { FlowFile child = null; final String filenameValue = context.getProperty(FILENAME) .evaluateAttributeExpressions(originalFlowFile).getValue(); try { final Path path = new Path(filenameValue); final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null); final AtomicReference<WriteResult> writeResult = new AtomicReference<>(); final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER) .asControllerService(RecordSetWriterFactory.class); final StopWatch stopWatch = new StopWatch(true); // use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure child = session.create(originalFlowFile); final AtomicReference<String> mimeTypeRef = new AtomicReference<>(); child = session.write(child, (final OutputStream rawOut) -> { try (final BufferedOutputStream out = new BufferedOutputStream(rawOut); final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) { Record record = recordReader.nextRecord(); final RecordSchema schema = recordSetWriterFactory.getSchema( originalFlowFile.getAttributes(), record == null ? null : record.getSchema()); try (final RecordSetWriter recordSetWriter = recordSetWriterFactory .createWriter(getLogger(), schema, out)) { recordSetWriter.beginRecordSet(); if (record != null) { recordSetWriter.write(record); } while ((record = recordReader.nextRecord()) != null) { recordSetWriter.write(record); } writeResult.set(recordSetWriter.finishRecordSet()); mimeTypeRef.set(recordSetWriter.getMimeType()); } } catch (Exception e) { exceptionHolder.set(e); } }); stopWatch.stop(); // if any errors happened within the session.write then throw the exception so we jump // into one of the appropriate catch blocks below if (exceptionHolder.get() != null) { throw exceptionHolder.get(); } FlowFile successFlowFile = postProcess(context, session, child, path); final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes()); attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount())); attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get()); successFlowFile = session.putAllAttributes(successFlowFile, attributes); final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() }); session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS)); session.transfer(successFlowFile, REL_SUCCESS); session.remove(originalFlowFile); return null; } catch (final FileNotFoundException | AccessControlException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e }); final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage()); session.transfer(failureFlowFile, REL_FAILURE); } catch (final IOException | FlowFileAccessException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e }); session.transfer(session.penalize(originalFlowFile), REL_RETRY); context.yield(); } catch (final Throwable t) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t }); final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage()); session.transfer(failureFlowFile, REL_FAILURE); } // if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized if (child != null) { session.remove(child); } return null; }); }
From source file:org.apache.nifi.processors.hadoop.AbstractPutHDFSRecord.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin final FileSystem fileSystem = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || fileSystem == null || ugi == null) { getLogger().error(//from w ww . j ava2 s.c o m "Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null"); context.yield(); return; } final FlowFile flowFile = session.get(); if (flowFile == null) { context.yield(); return; } ugi.doAs((PrivilegedAction<Object>) () -> { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); // TODO codec extension final String directoryValue = context.getProperty(DIRECTORY) .evaluateAttributeExpressions(putFlowFile).getValue(); // create the directory if it doesn't exist final Path directoryPath = new Path(directoryValue); createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup); // write to tempFile first and on success rename to destFile final Path tempFile = new Path(directoryPath, "." + filenameValue); final Path destFile = new Path(directoryPath, filenameValue); final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile); final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean(); // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure if (destinationExists && !shouldOverwrite) { session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; } final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null); final AtomicReference<WriteResult> writeResult = new AtomicReference<>(); final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER) .asControllerService(RecordReaderFactory.class); final FlowFile flowFileIn = putFlowFile; final StopWatch stopWatch = new StopWatch(true); // Read records from the incoming FlowFile and write them the tempFile session.read(putFlowFile, (final InputStream rawIn) -> { RecordReader recordReader = null; HDFSRecordWriter recordWriter = null; try (final BufferedInputStream in = new BufferedInputStream(rawIn)) { // if we fail to create the RecordReader then we want to route to failure, so we need to // handle this separately from the other IOExceptions which normally route to retry try { recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger()); } catch (Exception e) { final RecordReaderFactoryException rrfe = new RecordReaderFactoryException( "Unable to create RecordReader", e); exceptionHolder.set(rrfe); return; } final RecordSet recordSet = recordReader.createRecordSet(); recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema()); writeResult.set(recordWriter.write(recordSet)); } catch (Exception e) { exceptionHolder.set(e); } finally { IOUtils.closeQuietly(recordReader); IOUtils.closeQuietly(recordWriter); } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempFile; // if any errors happened within the session.read then throw the exception so we jump // into one of the appropriate catch blocks below if (exceptionHolder.get() != null) { throw exceptionHolder.get(); } // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed rename(fileSystem, tempFile, destFile); changeOwner(fileSystem, destFile, remoteOwner, remoteGroup); getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate }); putFlowFile = postProcess(context, session, putFlowFile, destFile); final String newFilename = destFile.getName(); final String hdfsPath = destFile.getParent().toString(); // Update the filename and absolute path attributes final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes()); attributes.put(CoreAttributes.FILENAME.key(), newFilename); attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount())); putFlowFile = session.putAllAttributes(putFlowFile, attributes); // Send a provenance event and transfer to success final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (IOException | FlowFileAccessException e) { deleteQuietly(fileSystem, tempDotCopyFile); getLogger().error("Failed to write due to {}", new Object[] { e }); session.transfer(session.penalize(putFlowFile), REL_RETRY); context.yield(); } catch (Throwable t) { deleteQuietly(fileSystem, tempDotCopyFile); getLogger().error("Failed to write due to {}", new Object[] { t }); session.transfer(putFlowFile, REL_FAILURE); } return null; }); }
From source file:org.apache.nifi.processors.hadoop.DeleteHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile originalFlowFile = session.get(); // If this processor has an incoming connection, then do not run unless a // FlowFile is actually sent through if (originalFlowFile == null && context.hasIncomingConnection()) { context.yield();// w w w . ja v a 2s. co m return; } // We need a FlowFile to report provenance correctly. FlowFile flowFile = originalFlowFile != null ? originalFlowFile : session.create(); final String fileOrDirectoryName = context.getProperty(FILE_OR_DIRECTORY) .evaluateAttributeExpressions(flowFile).getValue(); final FileSystem fileSystem = getFileSystem(); try { // Check if the user has supplied a file or directory pattern List<Path> pathList = Lists.newArrayList(); if (GLOB_MATCHER.reset(fileOrDirectoryName).find()) { FileStatus[] fileStatuses = fileSystem.globStatus(new Path(fileOrDirectoryName)); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { pathList.add(fileStatus.getPath()); } } } else { pathList.add(new Path(fileOrDirectoryName)); } int failedPath = 0; for (Path path : pathList) { if (fileSystem.exists(path)) { try { Map<String, String> attributes = Maps.newHashMapWithExpectedSize(2); attributes.put("hdfs.filename", path.getName()); attributes.put("hdfs.path", path.getParent().toString()); flowFile = session.putAllAttributes(flowFile, attributes); fileSystem.delete(path, context.getProperty(RECURSIVE).asBoolean()); getLogger().debug("For flowfile {} Deleted file at path {} with name {}", new Object[] { originalFlowFile, path.getParent().toString(), path.getName() }); final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); session.getProvenanceReporter().invokeRemoteProcess(flowFile, qualifiedPath.toString()); } catch (IOException ioe) { // One possible scenario is that the IOException is permissions based, however it would be impractical to check every possible // external HDFS authorization tool (Ranger, Sentry, etc). Local ACLs could be checked but the operation would be expensive. getLogger().warn("Failed to delete file or directory", ioe); Map<String, String> attributes = Maps.newHashMapWithExpectedSize(1); // The error message is helpful in understanding at a flowfile level what caused the IOException (which ACL is denying the operation, e.g.) attributes.put("hdfs.error.message", ioe.getMessage()); session.transfer(session.putAllAttributes(session.clone(flowFile), attributes), REL_FAILURE); failedPath++; } } } if (failedPath == 0) { session.transfer(flowFile, DeleteHDFS.REL_SUCCESS); } else { // If any path has been failed to be deleted, remove the FlowFile as it's been cloned and sent to failure. session.remove(flowFile); } } catch (IOException e) { getLogger().error("Error processing delete for flowfile {} due to {}", new Object[] { flowFile, e.getMessage() }, e); session.transfer(flowFile, DeleteHDFS.REL_FAILURE); } }
From source file:org.apache.nifi.processors.hadoop.FetchHDFS.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();/*ww w. j a va2 s . com*/ if (flowFile == null) { return; } final FileSystem hdfs = getFileSystem(); final UserGroupInformation ugi = getUserGroupInformation(); final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(flowFile) .getValue(); final Path path; try { path = new Path(filenameValue); } catch (IllegalArgumentException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, flowFile, e }); flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage()); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); return; } final StopWatch stopWatch = new StopWatch(true); final FlowFile finalFlowFile = flowFile; ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { InputStream stream = null; CompressionCodec codec = null; Configuration conf = getConfiguration(); final CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf); final CompressionType compressionType = CompressionType .valueOf(context.getProperty(COMPRESSION_CODEC).toString()); final boolean inferCompressionCodec = compressionType == CompressionType.AUTOMATIC; if (inferCompressionCodec) { codec = compressionCodecFactory.getCodec(path); } else if (compressionType != CompressionType.NONE) { codec = getCompressionCodec(context, getConfiguration()); } FlowFile flowFile = finalFlowFile; final Path qualifiedPath = path.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); try { final String outputFilename; final String originalFilename = path.getName(); stream = hdfs.open(path, 16384); // Check if compression codec is defined (inferred or otherwise) if (codec != null) { stream = codec.createInputStream(stream); outputFilename = StringUtils.removeEnd(originalFilename, codec.getDefaultExtension()); } else { outputFilename = originalFilename; } flowFile = session.importFrom(stream, finalFlowFile); flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), outputFilename); stopWatch.stop(); getLogger().info("Successfully received content from {} for {} in {}", new Object[] { qualifiedPath, flowFile, stopWatch.getDuration() }); session.getProvenanceReporter().fetch(flowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS)); session.transfer(flowFile, REL_SUCCESS); } catch (final FileNotFoundException | AccessControlException e) { getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { qualifiedPath, flowFile, e }); flowFile = session.putAttribute(flowFile, "hdfs.failure.reason", e.getMessage()); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_FAILURE); } catch (final IOException e) { getLogger().error( "Failed to retrieve content from {} for {} due to {}; routing to comms.failure", new Object[] { qualifiedPath, flowFile, e }); flowFile = session.penalize(flowFile); session.transfer(flowFile, REL_COMMS_FAILURE); } finally { IOUtils.closeQuietly(stream); } return null; } }); }
From source file:org.apache.nifi.processors.hadoop.PutHDFS.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final FlowFile flowFile = session.get(); if (flowFile == null) { return;//w w w . ja v a 2s . c o m } final FileSystem hdfs = getFileSystem(); final Configuration configuration = getConfiguration(); final UserGroupInformation ugi = getUserGroupInformation(); if (configuration == null || hdfs == null || ugi == null) { getLogger().error("HDFS not configured properly"); session.transfer(flowFile, REL_FAILURE); context.yield(); return; } ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { Path tempDotCopyFile = null; FlowFile putFlowFile = flowFile; try { final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile) .getValue(); final Path configuredRootDirPath = new Path(dirValue); final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B); final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath); final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B); final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT); final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger(); final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath); final CompressionCodec codec = getCompressionCodec(context, configuration); final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key()); final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename); final Path copyFile = new Path(configuredRootDirPath, filename); // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) { throw new IOException( configuredRootDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootDirPath)) { throw new IOException(configuredRootDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootDirPath, flowFile); } final boolean destinationExists = hdfs.exists(copyFile); // If destination file already exists, resolve that based on processor configuration if (destinationExists) { switch (conflictResponse) { case REPLACE_RESOLUTION: if (hdfs.delete(copyFile, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile }); } break; case IGNORE_RESOLUTION: session.transfer(putFlowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { putFlowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(putFlowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile }); return null; default: break; } } // Write FlowFile to temp file on HDFS final StopWatch stopWatch = new StopWatch(true); session.read(putFlowFile, new InputStreamCallback() { @Override public void process(InputStream in) throws IOException { OutputStream fos = null; Path createdFile = null; try { if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) { fos = hdfs.append(copyFile, bufferSize); } else { fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize); } if (codec != null) { fos = codec.createOutputStream(fos); } createdFile = tempCopyFile; BufferedInputStream bis = new BufferedInputStream(in); StreamUtils.copy(bis, fos); bis = null; fos.flush(); } finally { try { if (fos != null) { fos.close(); } } catch (RemoteException re) { // when talking to remote HDFS clusters, we don't notice problems until fos.close() if (createdFile != null) { try { hdfs.delete(createdFile, false); } catch (Throwable ignore) { } } throw re; } catch (Throwable ignore) { } fos = null; } } }); stopWatch.stop(); final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize()); final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS); tempDotCopyFile = tempCopyFile; if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) { boolean renamed = false; for (int i = 0; i < 10; i++) { // try to rename multiple times. if (hdfs.rename(tempCopyFile, copyFile)) { renamed = true; break;// rename was successful } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!renamed) { hdfs.delete(tempCopyFile, false); throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename"); } changeOwner(context, hdfs, copyFile, flowFile); } getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate }); final String newFilename = copyFile.getName(); final String hdfsPath = copyFile.getParent().toString(); putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename); putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory()); session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString()); session.transfer(putFlowFile, REL_SUCCESS); } catch (final Throwable t) { if (tempDotCopyFile != null) { try { hdfs.delete(tempDotCopyFile, false); } catch (Exception e) { getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e }); } } getLogger().error("Failed to write to HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(putFlowFile), REL_FAILURE); context.yield(); } return null; } }); }
From source file:org.apache.nifi.processors.hadoop.PutHDFSTest.java
License:Apache License
@Test public void testPutFileWithException() throws IOException { // Refer to comment in the BeforeClass method for an explanation assumeTrue(isNotWindows());/*from ww w . j a v a 2 s . c o m*/ String dirName = "target/testPutFileWrongPermissions"; File file = new File(dirName); file.mkdirs(); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); Path p = new Path(dirName).makeQualified(fs.getUri(), fs.getWorkingDirectory()); final KerberosProperties testKerberosProperties = kerberosProperties; TestRunner runner = TestRunners.newTestRunner(new PutHDFS() { @Override protected void changeOwner(ProcessContext context, FileSystem hdfs, Path name, FlowFile flowFile) { throw new ProcessException("Forcing Exception to get thrown in order to verify proper handling"); } @Override protected KerberosProperties getKerberosProperties(File kerberosConfigFile) { return testKerberosProperties; } }); runner.setProperty(PutHDFS.DIRECTORY, dirName); runner.setProperty(PutHDFS.CONFLICT_RESOLUTION, "replace"); try (FileInputStream fis = new FileInputStream("src/test/resources/testdata/randombytes-1");) { Map<String, String> attributes = new HashMap<String, String>(); attributes.put(CoreAttributes.FILENAME.key(), "randombytes-1"); runner.enqueue(fis, attributes); runner.run(); } List<MockFlowFile> failedFlowFiles = runner .getFlowFilesForRelationship(new Relationship.Builder().name("failure").build()); assertFalse(failedFlowFiles.isEmpty()); assertTrue(failedFlowFiles.get(0).isPenalized()); fs.delete(p, true); }
From source file:org.apache.oozie.action.hadoop.HadoopUriFinder.java
License:Apache License
/** * Spark compares URIs based on scheme, host and port. Here we convert URIs * into the default format so that Spark won't think those belong to * different file system. This will avoid an extra copy of files which * already exists on same hdfs./*from www . j ava 2s. c o m*/ * * @param fs * @param fileUri * @return fixed uri * @throws URISyntaxException */ static URI getFixedUri(final FileSystem fs, final URI fileUri) throws URISyntaxException { if (fs.getUri().getScheme().equals(fileUri.getScheme()) && (fs.getUri().getHost().equals(fileUri.getHost()) || fileUri.getHost() == null) && (fs.getUri().getPort() == -1 || fileUri.getPort() == -1 || fs.getUri().getPort() == fileUri.getPort())) { return new URI(fs.getUri().getScheme(), fileUri.getUserInfo(), fs.getUri().getHost(), fs.getUri().getPort(), fileUri.getPath(), fileUri.getQuery(), fileUri.getFragment()); } return fileUri; }
From source file:org.apache.oozie.service.TestShareLibService.java
License:Apache License
@Test public void testShareLibLoadFile() throws Exception { services = new Services(); FileSystem fs = getFileSystem(); setSystemProps();/*w ww .j a v a 2 s . co m*/ createTestShareLibMetaFile(fs); Configuration conf = services.get(ConfigurationService.class).getConf(); conf.set(ShareLibService.SHARELIB_MAPPING_FILE, fs.getUri() + "/user/test/config.properties"); conf.set(ShareLibService.SHIP_LAUNCHER_JAR, "true"); try { services.init(); ShareLibService shareLibService = Services.get().get(ShareLibService.class); assertTrue( shareLibService.getShareLibJars("something_new").get(0).getName().endsWith("somethingNew.jar")); assertTrue(shareLibService.getShareLibJars("pig").get(0).getName().endsWith("pig.jar")); fs.delete(new Path("shareLibPath/"), true); } finally { services.destroy(); } }