Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:org.apache.mahout.graph.components.FindComponentsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();//from  w  w  w  .j a  v  a  2  s  .com
    addOutputOption();

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));

    Path inputPath = getInputPath();
    Path outputPath = getOutputPath();

    AtomicInteger currentPhase = new AtomicInteger();

    Path edgesPath = inputPath;
    Path zoneAssignmentsPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis()));

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Prepare Input
         */
        Job prepareAssignments = prepareJob(edgesPath, zoneAssignmentsPath, SequenceFileInputFormat.class,
                PrepareAssignmentsFileMapper.class, Vertex.class, Vertex.class,
                PrepareAssignmentsFileReducer.class, Vertex.class, FlaggedVertex.class,
                SequenceFileOutputFormat.class);

        prepareAssignments.waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {

        /*
         * As long as there may be zones connected
         */
        while (true) {

            Path scatterEdgesAndAssignZoneOutputPath = new Path(tempDirPath,
                    String.valueOf(System.currentTimeMillis()));

            /*
             * Scatter edges and forward zone assignments,
             * assign one zone to edges
             */
            Job scatterEdgesAndAssignZone = prepareJob(
                    new Path(zoneAssignmentsPath.toString() + "," + edgesPath.toString()),
                    scatterEdgesAndAssignZoneOutputPath, SequenceFileInputFormat.class,
                    ScatterEdgesAndForwardZoneAssignmentsMapper.class, JoinableVertex.class,
                    FlaggedVertex.class, AssignOneZoneToEdgesReducer.class, UndirectedEdge.class, Vertex.class,
                    SequenceFileOutputFormat.class);
            scatterEdgesAndAssignZone.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class);
            scatterEdgesAndAssignZone.waitForCompletion(true);

            Path findInterzoneEdgesOutputPath = new Path(tempDirPath,
                    String.valueOf(System.currentTimeMillis()));

            /*
             * Find interzone edges
             */
            Job findInterzoneEdges = prepareJob(scatterEdgesAndAssignZoneOutputPath,
                    findInterzoneEdgesOutputPath, SequenceFileInputFormat.class, Mapper.class,
                    UndirectedEdge.class, Vertex.class, FindInterzoneEdgesReducer.class, Vertex.class,
                    FlaggedVertex.class, SequenceFileOutputFormat.class);

            findInterzoneEdges.waitForCompletion(true);

            /*
             * Break if there are no new interzone edges
             */
            if (findInterzoneEdges.getCounters().findCounter(Counter.ZONES_CONNECTED).getValue() == 0L) {
                break;
            }

            Path assignNewZonesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis()));

            /*
             * Assign new zones
             */
            Job assignNewZones = prepareJob(
                    new Path(zoneAssignmentsPath.toString() + "," + findInterzoneEdgesOutputPath.toString()),
                    assignNewZonesOutputPath, SequenceFileInputFormat.class,
                    BinZoneAssignmentsAndInterzoneEdgesMapper.class, JoinableVertex.class, FlaggedVertex.class,
                    AssignNewZonesToVerticesReducer.class, Vertex.class, FlaggedVertex.class,
                    SequenceFileOutputFormat.class);

            assignNewZones.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class);
            assignNewZones.waitForCompletion(true);

            zoneAssignmentsPath = assignNewZonesOutputPath;
        }
    }
    FileSystem system = FileSystem.get(getConf());
    FileUtil.copy(system, zoneAssignmentsPath, system, outputPath, false, getConf());
    return 0;
}

From source file:org.apache.nifi.processors.hadoop.MoveHDFS.java

License:Apache License

protected void processBatchOfFiles(final List<Path> files, final ProcessContext context,
        final ProcessSession session, FlowFile parentFlowFile) {
    Preconditions.checkState(parentFlowFile != null, "No parent flowfile for this batch was provided");

    // process the batch of files
    final Configuration conf = getConfiguration();
    final FileSystem hdfs = getFileSystem();
    final UserGroupInformation ugi = getUserGroupInformation();

    if (conf == null || ugi == null) {
        getLogger().error("Configuration or UserGroupInformation not configured properly");
        session.transfer(parentFlowFile, REL_FAILURE);
        context.yield();/*from   w w w  .  j  a va 2 s  .c o m*/
        return;
    }

    for (final Path file : files) {

        ugi.doAs(new PrivilegedAction<Object>() {
            @Override
            public Object run() {
                FlowFile flowFile = session.create(parentFlowFile);
                try {
                    final String originalFilename = file.getName();
                    final Path configuredRootOutputDirPath = processorConfig.getOutputDirectory();
                    final Path newFile = new Path(configuredRootOutputDirPath, originalFilename);
                    final boolean destinationExists = hdfs.exists(newFile);
                    // If destination file already exists, resolve that
                    // based on processor configuration
                    if (destinationExists) {
                        switch (processorConfig.getConflictResolution()) {
                        case REPLACE_RESOLUTION:
                            if (hdfs.delete(file, false)) {
                                getLogger().info("deleted {} in order to replace with the contents of {}",
                                        new Object[] { file, flowFile });
                            }
                            break;
                        case IGNORE_RESOLUTION:
                            session.transfer(flowFile, REL_SUCCESS);
                            getLogger().info(
                                    "transferring {} to success because file with same name already exists",
                                    new Object[] { flowFile });
                            return null;
                        case FAIL_RESOLUTION:
                            session.transfer(session.penalize(flowFile), REL_FAILURE);
                            getLogger().warn(
                                    "penalizing {} and routing to failure because file with same name already exists",
                                    new Object[] { flowFile });
                            return null;
                        default:
                            break;
                        }
                    }

                    // Create destination directory if it does not exist
                    try {
                        if (!hdfs.getFileStatus(configuredRootOutputDirPath).isDirectory()) {
                            throw new IOException(configuredRootOutputDirPath.toString()
                                    + " already exists and is not a directory");
                        }
                    } catch (FileNotFoundException fe) {
                        if (!hdfs.mkdirs(configuredRootOutputDirPath)) {
                            throw new IOException(
                                    configuredRootOutputDirPath.toString() + " could not be created");
                        }
                        changeOwner(context, hdfs, configuredRootOutputDirPath);
                    }

                    boolean moved = false;
                    for (int i = 0; i < 10; i++) { // try to rename multiple
                        // times.
                        if (processorConfig.getOperation().equals("move")) {
                            if (hdfs.rename(file, newFile)) {
                                moved = true;
                                break;// rename was successful
                            }
                        } else {
                            if (FileUtil.copy(hdfs, file, hdfs, newFile, false, conf)) {
                                moved = true;
                                break;// copy was successful
                            }
                        }
                        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
                    }
                    if (!moved) {
                        throw new ProcessException("Could not move file " + file + " to its final filename");
                    }

                    changeOwner(context, hdfs, newFile);
                    final String outputPath = newFile.toString();
                    final String newFilename = newFile.getName();
                    final String hdfsPath = newFile.getParent().toString();
                    flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename);
                    flowFile = session.putAttribute(flowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                    final String transitUri = (outputPath.startsWith("/")) ? "hdfs:/" + outputPath
                            : "hdfs://" + outputPath;
                    session.getProvenanceReporter().send(flowFile, transitUri);
                    session.transfer(flowFile, REL_SUCCESS);

                } catch (final Throwable t) {
                    getLogger().error("Failed to rename on HDFS due to {}", new Object[] { t });
                    session.transfer(session.penalize(flowFile), REL_FAILURE);
                    context.yield();
                }
                return null;
            }
        });
    }
}

From source file:org.apache.phoenix.end2end.index.LocalIndexIT.java

License:Apache License

private void copyLocalIndexHFiles(Configuration conf, HRegionInfo fromRegion, HRegionInfo toRegion,
        boolean move) throws IOException {
    Path root = FSUtils.getRootDir(conf);

    Path seondRegion = new Path(HTableDescriptor.getTableDir(root, fromRegion.getTableName()) + Path.SEPARATOR
            + fromRegion.getEncodedName() + Path.SEPARATOR + "L#0/");
    Path hfilePath = FSUtils.getCurrentFileSystem(conf).listFiles(seondRegion, true).next().getPath();
    Path firstRegionPath = new Path(HTableDescriptor.getTableDir(root, toRegion.getTableName()) + Path.SEPARATOR
            + toRegion.getEncodedName() + Path.SEPARATOR + "L#0/");
    FileSystem currentFileSystem = FSUtils.getCurrentFileSystem(conf);
    assertTrue(FileUtil.copy(currentFileSystem, hfilePath, currentFileSystem, firstRegionPath, move, conf));
}

From source file:org.apache.pig.backend.hadoop.datastorage.HPath.java

License:Apache License

public void copy(ElementDescriptor dstName, Properties dstConfiguration, boolean removeSrc) throws IOException {
    FileSystem srcFS = this.fs.getHFS();
    FileSystem dstFS = ((HPath) dstName).fs.getHFS();

    Path srcPath = this.path;
    Path dstPath = ((HPath) dstName).path;

    boolean result = FileUtil.copy(srcFS, srcPath, dstFS, dstPath, false, new Configuration());

    if (!result) {
        int errCode = 2097;
        String msg = "Failed to copy from: " + this.toString() + " to: " + dstName.toString();
        throw new ExecException(msg, errCode, PigException.BUG);
    }/* w ww  .ja va  2 s .  c o  m*/
}

From source file:org.apache.tez.client.LocalClient.java

License:Apache License

protected Thread createDAGAppMaster(final ApplicationSubmissionContext appContext) {
    Thread thread = new Thread(new Runnable() {
        @Override// www .  ja  va2  s. c  o m
        public void run() {
            try {
                ApplicationId appId = appContext.getApplicationId();

                // Set up working directory for DAGAppMaster
                Path staging = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString());
                Path userDir = TezCommonUtils.getTezSystemStagingPath(conf, appId.toString() + "_wd");
                LOG.info("Using working directory: " + userDir.toUri().getPath());

                FileSystem fs = FileSystem.get(conf);
                // copy data from staging directory to working directory to simulate the resource localizing
                FileUtil.copy(fs, staging, fs, userDir, false, conf);
                // Prepare Environment
                Path logDir = new Path(userDir, "localmode-log-dir");
                Path localDir = new Path(userDir, "localmode-local-dir");
                fs.mkdirs(logDir);
                fs.mkdirs(localDir);

                UserGroupInformation.setConfiguration(conf);
                // Add session specific credentials to the AM credentials.
                ByteBuffer tokens = appContext.getAMContainerSpec().getTokens();

                Credentials amCredentials;
                if (tokens != null) {
                    amCredentials = TezCommonUtils.parseCredentialsBytes(tokens.array());
                } else {
                    amCredentials = new Credentials();
                }

                // Construct, initialize, and start the DAGAppMaster
                ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(appId, 0);
                ContainerId cId = ContainerId.newInstance(applicationAttemptId, 1);
                String currentHost = InetAddress.getLocalHost().getHostName();
                int nmPort = YarnConfiguration.DEFAULT_NM_PORT;
                int nmHttpPort = YarnConfiguration.DEFAULT_NM_WEBAPP_PORT;
                long appSubmitTime = System.currentTimeMillis();

                dagAppMaster = createDAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort,
                        new SystemClock(), appSubmitTime, isSession, userDir.toUri().getPath(),
                        new String[] { localDir.toUri().getPath() }, new String[] { logDir.toUri().getPath() },
                        amCredentials, UserGroupInformation.getCurrentUser().getShortUserName());
                clientHandler = new DAGClientHandler(dagAppMaster);
                DAGAppMaster.initAndStartAppMaster(dagAppMaster, conf);

            } catch (Throwable t) {
                LOG.fatal("Error starting DAGAppMaster", t);
                if (dagAppMaster != null) {
                    dagAppMaster.stop();
                }
                amFailException = t;
            }
        }
    });

    thread.setName("DAGAppMaster Thread");
    LOG.info("DAGAppMaster thread has been created");

    return thread;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage.java

License:Apache License

@Override
public boolean cp(final String sourceLocation, final String targetLocation) {
    try {/*w ww. j a v a  2s . co m*/
        return FileUtil.copy(this.fs, new Path(sourceLocation), this.fs, new Path(targetLocation), false,
                new Configuration());
    } catch (final IOException e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.cgc.wfx.impl.FileSystemProxy.java

License:Open Source License

@Override
public boolean copyPath(String srcPath, String destPath) {
    log.debug("Try to rename path " + srcPath + " to new path " + destPath);
    Path fsrcPath = new Path(srcPath);
    Path fdestPath = new Path(destPath);
    try {/*w ww  .j a  va  2s .c  o  m*/
        return FileUtil.copy(this.fileSystem, fsrcPath, this.fileSystem, fdestPath, false,
                fileSystem.getConf());
    } catch (IOException ioEx) {
        log.error("FAIL on renaming path " + fsrcPath + " to new path " + fdestPath, ioEx);
        throw new WfxHdfsException(ioEx);
    }
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

/**
 * copy a whole directory tree from the local directory on the node back to
 * a directory on hdfs/*  w  ww.j  a va  2s. co  m*/
 * 
 * @param results_dir
 * @param dest
 * @throws IOException
 */
private void copyDir(Path results_dir, Path dest) throws IOException {
    //      System.out.println("Copying stuff from " + results_dir + " to " + dest);
    // Copy output only if not empty
    if (this.localFS.exists(results_dir) && this.localFS.listStatus(results_dir).length > 0) {
        FileSystem.get(this.job).mkdirs(dest);
        // copy the whole directory tree
        FileUtil.copy(this.localFS, results_dir, FileSystem.get(this.job), dest, true, this.job);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Open Source License

/**
 * Process list of resources./* ww  w .  j  a  v a  2  s .com*/
 *
 * @param jobLocDir Job working directory.
 * @param files Array of {@link java.net.URI} or {@link org.apache.hadoop.fs.Path} to process resources.
 * @param download {@code true}, if need to download. Process class path only else.
 * @param extract {@code true}, if need to extract archive.
 * @param clsPathUrls Collection to add resource as classpath resource.
 * @param rsrcNameProp Property for resource name array setting.
 * @throws IOException If failed.
 */
private void processFiles(File jobLocDir, @Nullable Object[] files, boolean download, boolean extract,
        @Nullable Collection<URL> clsPathUrls, @Nullable String rsrcNameProp) throws IOException {
    if (F.isEmptyOrNulls(files))
        return;

    Collection<String> res = new ArrayList<>();

    for (Object pathObj : files) {
        String locName = null;
        Path srcPath;

        if (pathObj instanceof URI) {
            URI uri = (URI) pathObj;

            locName = uri.getFragment();

            srcPath = new Path(uri);
        } else
            srcPath = (Path) pathObj;

        if (locName == null)
            locName = srcPath.getName();

        File dstPath = new File(jobLocDir.getAbsolutePath(), locName);

        res.add(locName);

        rsrcList.add(dstPath);

        if (clsPathUrls != null)
            clsPathUrls.add(dstPath.toURI().toURL());

        if (!download)
            continue;

        JobConf cfg = ctx.getJobConf();

        FileSystem dstFs = FileSystem.getLocal(cfg);

        FileSystem srcFs = srcPath.getFileSystem(cfg);

        if (extract) {
            File archivesPath = new File(jobLocDir.getAbsolutePath(), ".cached-archives");

            if (!archivesPath.exists() && !archivesPath.mkdir())
                throw new IOException(
                        "Failed to create directory " + "[path=" + archivesPath + ", jobId=" + jobId + ']');

            File archiveFile = new File(archivesPath, locName);

            FileUtil.copy(srcFs, srcPath, dstFs, new Path(archiveFile.toString()), false, cfg);

            String archiveNameLC = archiveFile.getName().toLowerCase();

            if (archiveNameLC.endsWith(".jar"))
                RunJar.unJar(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".zip"))
                FileUtil.unZip(archiveFile, dstPath);
            else if (archiveNameLC.endsWith(".tar.gz") || archiveNameLC.endsWith(".tgz")
                    || archiveNameLC.endsWith(".tar"))
                FileUtil.unTar(archiveFile, dstPath);
            else
                throw new IOException("Cannot unpack archive [path=" + srcPath + ", jobId=" + jobId + ']');
        } else
            FileUtil.copy(srcFs, srcPath, dstFs, new Path(dstPath.toString()), false, cfg);
    }

    if (!res.isEmpty() && rsrcNameProp != null)
        ctx.getJobConf().setStrings(rsrcNameProp, res.toArray(new String[res.size()]));
}

From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java

License:Apache License

@Override
public boolean copy(String from, String to) {
    // ??    ? ? ?? ??  .
    String target = null;/*from  ww w .  j av  a 2s . com*/
    if ("/".equals(to)) {
        target = to + FileUtils.getFilename(from);
    } else {
        target = to + SystemUtils.FILE_SEPARATOR + FileUtils.getFilename(from);
    }

    if (exists(target))
        throw new FileSystemException(bundle.message("S_FS", "ALREADY_NOT_COPY", target));

    try {
        if (fs.isFile(new Path(from))) {
            FSDataInputStream fis = fs.open(new Path(from));
            FSDataOutputStream fos = fs.create(new Path(target));

            org.springframework.util.FileCopyUtils.copy(fis, fos);

            IOUtils.closeQuietly(fos);
            IOUtils.closeQuietly(fis);
        } else {
            FileUtil.copy(fs, new Path(from), fs, new Path(to), false, new Configuration());
        }

        return true;
    } catch (Exception ex) {
        throw new FileSystemException(bundle.message("S_FS", "CANNOT_COPY", from, to), ex);
    }
}