List of usage examples for org.apache.hadoop.fs FileUtil copy
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, Configuration conf) throws IOException
From source file:org.apache.mahout.graph.components.FindComponentsJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from w w w .j a v a 2 s .com addOutputOption(); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path tempDirPath = new Path(parsedArgs.get("--tempDir")); Path inputPath = getInputPath(); Path outputPath = getOutputPath(); AtomicInteger currentPhase = new AtomicInteger(); Path edgesPath = inputPath; Path zoneAssignmentsPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* * Prepare Input */ Job prepareAssignments = prepareJob(edgesPath, zoneAssignmentsPath, SequenceFileInputFormat.class, PrepareAssignmentsFileMapper.class, Vertex.class, Vertex.class, PrepareAssignmentsFileReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); prepareAssignments.waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* * As long as there may be zones connected */ while (true) { Path scatterEdgesAndAssignZoneOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Scatter edges and forward zone assignments, * assign one zone to edges */ Job scatterEdgesAndAssignZone = prepareJob( new Path(zoneAssignmentsPath.toString() + "," + edgesPath.toString()), scatterEdgesAndAssignZoneOutputPath, SequenceFileInputFormat.class, ScatterEdgesAndForwardZoneAssignmentsMapper.class, JoinableVertex.class, FlaggedVertex.class, AssignOneZoneToEdgesReducer.class, UndirectedEdge.class, Vertex.class, SequenceFileOutputFormat.class); scatterEdgesAndAssignZone.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class); scatterEdgesAndAssignZone.waitForCompletion(true); Path findInterzoneEdgesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Find interzone edges */ Job findInterzoneEdges = prepareJob(scatterEdgesAndAssignZoneOutputPath, findInterzoneEdgesOutputPath, SequenceFileInputFormat.class, Mapper.class, UndirectedEdge.class, Vertex.class, FindInterzoneEdgesReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); findInterzoneEdges.waitForCompletion(true); /* * Break if there are no new interzone edges */ if (findInterzoneEdges.getCounters().findCounter(Counter.ZONES_CONNECTED).getValue() == 0L) { break; } Path assignNewZonesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Assign new zones */ Job assignNewZones = prepareJob( new Path(zoneAssignmentsPath.toString() + "," + findInterzoneEdgesOutputPath.toString()), assignNewZonesOutputPath, SequenceFileInputFormat.class, BinZoneAssignmentsAndInterzoneEdgesMapper.class, JoinableVertex.class, FlaggedVertex.class, AssignNewZonesToVerticesReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); assignNewZones.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class); assignNewZones.waitForCompletion(true); zoneAssignmentsPath = assignNewZonesOutputPath; } } FileSystem system = FileSystem.get(getConf()); FileUtil.copy(system, zoneAssignmentsPath, system, outputPath, false, getConf()); return 0; }
From source file:org.apache.nifi.processors.hadoop.MoveHDFS.java
License:Apache License
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context, final ProcessSession session, FlowFile parentFlowFile) { Preconditions.checkState(parentFlowFile != null, "No parent flowfile for this batch was provided"); // process the batch of files final Configuration conf = getConfiguration(); final FileSystem hdfs = getFileSystem(); final UserGroupInformation ugi = getUserGroupInformation(); if (conf == null || ugi == null) { getLogger().error("Configuration or UserGroupInformation not configured properly"); session.transfer(parentFlowFile, REL_FAILURE); context.yield();/*from w w w . j a va 2 s .c o m*/ return; } for (final Path file : files) { ugi.doAs(new PrivilegedAction<Object>() { @Override public Object run() { FlowFile flowFile = session.create(parentFlowFile); try { final String originalFilename = file.getName(); final Path configuredRootOutputDirPath = processorConfig.getOutputDirectory(); final Path newFile = new Path(configuredRootOutputDirPath, originalFilename); final boolean destinationExists = hdfs.exists(newFile); // If destination file already exists, resolve that // based on processor configuration if (destinationExists) { switch (processorConfig.getConflictResolution()) { case REPLACE_RESOLUTION: if (hdfs.delete(file, false)) { getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { file, flowFile }); } break; case IGNORE_RESOLUTION: session.transfer(flowFile, REL_SUCCESS); getLogger().info( "transferring {} to success because file with same name already exists", new Object[] { flowFile }); return null; case FAIL_RESOLUTION: session.transfer(session.penalize(flowFile), REL_FAILURE); getLogger().warn( "penalizing {} and routing to failure because file with same name already exists", new Object[] { flowFile }); return null; default: break; } } // Create destination directory if it does not exist try { if (!hdfs.getFileStatus(configuredRootOutputDirPath).isDirectory()) { throw new IOException(configuredRootOutputDirPath.toString() + " already exists and is not a directory"); } } catch (FileNotFoundException fe) { if (!hdfs.mkdirs(configuredRootOutputDirPath)) { throw new IOException( configuredRootOutputDirPath.toString() + " could not be created"); } changeOwner(context, hdfs, configuredRootOutputDirPath); } boolean moved = false; for (int i = 0; i < 10; i++) { // try to rename multiple // times. if (processorConfig.getOperation().equals("move")) { if (hdfs.rename(file, newFile)) { moved = true; break;// rename was successful } } else { if (FileUtil.copy(hdfs, file, hdfs, newFile, false, conf)) { moved = true; break;// copy was successful } } Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve } if (!moved) { throw new ProcessException("Could not move file " + file + " to its final filename"); } changeOwner(context, hdfs, newFile); final String outputPath = newFile.toString(); final String newFilename = newFile.getName(); final String hdfsPath = newFile.getParent().toString(); flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename); flowFile = session.putAttribute(flowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath); final String transitUri = (outputPath.startsWith("/")) ? "hdfs:/" + outputPath : "hdfs://" + outputPath; session.getProvenanceReporter().send(flowFile, transitUri); session.transfer(flowFile, REL_SUCCESS); } catch (final Throwable t) { getLogger().error("Failed to rename on HDFS due to {}", new Object[] { t }); session.transfer(session.penalize(flowFile), REL_FAILURE); context.yield(); } return null; } }); } }
From source file:org.apache.phoenix.end2end.index.LocalIndexIT.java
License:Apache License
private void copyLocalIndexHFiles(Configuration conf, HRegionInfo fromRegion, HRegionInfo toRegion, boolean move) throws IOException { Path root = FSUtils.getRootDir(conf); Path seondRegion = new Path(HTableDescriptor.getTableDir(root, fromRegion.getTableName()) + Path.SEPARATOR + fromRegion.getEncodedName() + Path.SEPARATOR + "L#0/"); Path hfilePath = FSUtils.getCurrentFileSystem(conf).listFiles(seondRegion, true).next().getPath(); Path firstRegionPath = new Path(HTableDescriptor.getTableDir(root, toRegion.getTableName()) + Path.SEPARATOR + toRegion.getEncodedName() + Path.SEPARATOR + "L#0/"); FileSystem currentFileSystem = FSUtils.getCurrentFileSystem(conf); assertTrue(FileUtil.copy(currentFileSystem, hfilePath, currentFileSystem, firstRegionPath, move, conf)); }
From source file:org.apache.pig.backend.hadoop.datastorage.HPath.java
License:Apache License
public void copy(ElementDescriptor dstName, Properties dstConfiguration, boolean removeSrc) throws IOException { FileSystem srcFS = this.fs.getHFS(); FileSystem dstFS = ((HPath) dstName).fs.getHFS(); Path srcPath = this.path; Path dstPath = ((HPath) dstName).path; boolean result = FileUtil.copy(srcFS, srcPath, dstFS, dstPath, false, new Configuration()); if (!result) { int errCode = 2097; String msg = "Failed to copy from: " + this.toString() + " to: " + dstName.toString(); throw new ExecException(msg, errCode, PigException.BUG); }/* w ww .ja va 2 s . c o m*/ }
From source file:org.apache.tez.client.LocalClient.java
License:Apache License
protected Thread createDAGAppMaster(final ApplicationSubmissionContext appContext) { Thread thread = new Thread(new Runnable() { @Override// www . ja va2 s. c o m public void run() { try { ApplicationId appId = appContext.getApplicationId(); // Set up working directory for DAGAppMaster Path staging = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString()); Path userDir = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString() + "_wd"); LOG.info("Using working directory: " + userDir.toUri().getPath()); FileSystem fs = FileSystem.get(conf); // copy data from staging directory to working directory to simulate the resource localizing FileUtil.copy(fs, staging, fs, userDir, false, conf); // Prepare Environment Path logDir = new Path(userDir, "localmode-log-dir"); Path localDir = new Path(userDir, "localmode-local-dir"); fs.mkdirs(logDir); fs.mkdirs(localDir); UserGroupInformation.setConfiguration(conf); // Add session specific credentials to the AM credentials. ByteBuffer tokens = appContext.getAMContainerSpec().getTokens(); Credentials amCredentials; if (tokens != null) { amCredentials = TezCommonUtils.parseCredentialsBytes(tokens.array()); } else { amCredentials = new Credentials(); } // Construct, initialize, and start the DAGAppMaster ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(appId, 0); ContainerId cId = ContainerId.newInstance(applicationAttemptId, 1); String currentHost = InetAddress.getLocalHost().getHostName(); int nmPort = YarnConfiguration.DEFAULT_NM_PORT; int nmHttpPort = YarnConfiguration.DEFAULT_NM_WEBAPP_PORT; long appSubmitTime = System.currentTimeMillis(); dagAppMaster = createDAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, new SystemClock(), appSubmitTime, isSession, userDir.toUri().getPath(), new String[] { localDir.toUri().getPath() }, new String[] { logDir.toUri().getPath() }, amCredentials, UserGroupInformation.getCurrentUser().getShortUserName()); clientHandler = new DAGClientHandler(dagAppMaster); DAGAppMaster.initAndStartAppMaster(dagAppMaster, conf); } catch (Throwable t) { LOG.fatal("Error starting DAGAppMaster", t); if (dagAppMaster != null) { dagAppMaster.stop(); } amFailException = t; } } }); thread.setName("DAGAppMaster Thread"); LOG.info("DAGAppMaster thread has been created"); return thread; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java
License:Apache License
@Override public boolean cp(final String sourceLocation, final String targetLocation) { try {/*w ww. j a v a 2s . co m*/ return FileUtil.copy(this.fs, new Path(sourceLocation), this.fs, new Path(targetLocation), false, new Configuration()); } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.cgc.wfx.impl.FileSystemProxy.java
License:Open Source License
@Override public boolean copyPath(String srcPath, String destPath) { log.debug("Try to rename path " + srcPath + " to new path " + destPath); Path fsrcPath = new Path(srcPath); Path fdestPath = new Path(destPath); try {/*w ww .j a va 2s .c o m*/ return FileUtil.copy(this.fileSystem, fsrcPath, this.fileSystem, fdestPath, false, fileSystem.getConf()); } catch (IOException ioEx) { log.error("FAIL on renaming path " + fsrcPath + " to new path " + fdestPath, ioEx); throw new WfxHdfsException(ioEx); } }
From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
/** * copy a whole directory tree from the local directory on the node back to * a directory on hdfs/* w ww.j a va 2s. co m*/ * * @param results_dir * @param dest * @throws IOException */ private void copyDir(Path results_dir, Path dest) throws IOException { // System.out.println("Copying stuff from " + results_dir + " to " + dest); // Copy output only if not empty if (this.localFS.exists(results_dir) && this.localFS.listStatus(results_dir).length > 0) { FileSystem.get(this.job).mkdirs(dest); // copy the whole directory tree FileUtil.copy(this.localFS, results_dir, FileSystem.get(this.job), dest, true, this.job); } }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java
License:Open Source License
/** * Process list of resources./* ww w . j a v a 2 s .com*/ * * @param jobLocDir Job working directory. * @param files Array of {@link java.net.URI} or {@link org.apache.hadoop.fs.Path} to process resources. * @param download {@code true}, if need to download. Process class path only else. * @param extract {@code true}, if need to extract archive. * @param clsPathUrls Collection to add resource as classpath resource. * @param rsrcNameProp Property for resource name array setting. * @throws IOException If failed. */ private void processFiles(File jobLocDir, @Nullable Object[] files, boolean download, boolean extract, @Nullable Collection<URL> clsPathUrls, @Nullable String rsrcNameProp) throws IOException { if (F.isEmptyOrNulls(files)) return; Collection<String> res = new ArrayList<>(); for (Object pathObj : files) { String locName = null; Path srcPath; if (pathObj instanceof URI) { URI uri = (URI) pathObj; locName = uri.getFragment(); srcPath = new Path(uri); } else srcPath = (Path) pathObj; if (locName == null) locName = srcPath.getName(); File dstPath = new File(jobLocDir.getAbsolutePath(), locName); res.add(locName); rsrcList.add(dstPath); if (clsPathUrls != null) clsPathUrls.add(dstPath.toURI().toURL()); if (!download) continue; JobConf cfg = ctx.getJobConf(); FileSystem dstFs = FileSystem.getLocal(cfg); FileSystem srcFs = srcPath.getFileSystem(cfg); if (extract) { File archivesPath = new File(jobLocDir.getAbsolutePath(), ".cached-archives"); if (!archivesPath.exists() && !archivesPath.mkdir()) throw new IOException( "Failed to create directory " + "[path=" + archivesPath + ", jobId=" + jobId + ']'); File archiveFile = new File(archivesPath, locName); FileUtil.copy(srcFs, srcPath, dstFs, new Path(archiveFile.toString()), false, cfg); String archiveNameLC = archiveFile.getName().toLowerCase(); if (archiveNameLC.endsWith(".jar")) RunJar.unJar(archiveFile, dstPath); else if (archiveNameLC.endsWith(".zip")) FileUtil.unZip(archiveFile, dstPath); else if (archiveNameLC.endsWith(".tar.gz") || archiveNameLC.endsWith(".tgz") || archiveNameLC.endsWith(".tar")) FileUtil.unTar(archiveFile, dstPath); else throw new IOException("Cannot unpack archive [path=" + srcPath + ", jobId=" + jobId + ']'); } else FileUtil.copy(srcFs, srcPath, dstFs, new Path(dstPath.toString()), false, cfg); } if (!res.isEmpty() && rsrcNameProp != null) ctx.getJobConf().setStrings(rsrcNameProp, res.toArray(new String[res.size()])); }
From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java
License:Apache License
@Override public boolean copy(String from, String to) { // ?? ? ? ?? ?? . String target = null;/*from ww w . j av a 2s . com*/ if ("/".equals(to)) { target = to + FileUtils.getFilename(from); } else { target = to + SystemUtils.FILE_SEPARATOR + FileUtils.getFilename(from); } if (exists(target)) throw new FileSystemException(bundle.message("S_FS", "ALREADY_NOT_COPY", target)); try { if (fs.isFile(new Path(from))) { FSDataInputStream fis = fs.open(new Path(from)); FSDataOutputStream fos = fs.create(new Path(target)); org.springframework.util.FileCopyUtils.copy(fis, fos); IOUtils.closeQuietly(fos); IOUtils.closeQuietly(fis); } else { FileUtil.copy(fs, new Path(from), fs, new Path(to), false, new Configuration()); } return true; } catch (Exception ex) { throw new FileSystemException(bundle.message("S_FS", "CANNOT_COPY", from, to), ex); } }