Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, FileStatus srcStatus, FileSystem dstFS, Path dst,
        boolean deleteSource, boolean overwrite, Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Copy file from a path to another path.
 * @param srcPath source path// w w  w .  j a  va 2s .  c o m
 * @param destPath destination path
 * @param overwrite true if existing files must be overwritten
 * @param conf Configuration
 * @return true if the copy is successful
 * @throws IOException if an error occurs while copying
 */
public static final boolean copy(final Path srcPath, final Path destPath, final boolean overwrite,
        final Configuration conf) throws IOException {

    if (srcPath == null) {
        throw new NullPointerException("The source path is null.");
    }

    if (destPath == null) {
        throw new NullPointerException("The destination path is null");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null");
    }

    final FileSystem srcFs = srcPath.getFileSystem(conf);
    final FileSystem destFs = destPath.getFileSystem(conf);

    return FileUtil.copy(srcFs, srcPath, destFs, destPath, false, overwrite, conf);
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Move file from a path to another path.
 * @param srcPath source path/*from w  w  w . ja va 2  s  . co  m*/
 * @param destPath destination path
 * @param overwrite true if existing files must be overwritten
 * @param conf Configuration
 * @return true if the copy is successful
 * @throws IOException if an error occurs while copying
 */
public static final boolean move(final Path srcPath, final Path destPath, final boolean overwrite,
        final Configuration conf) throws IOException {

    if (srcPath == null) {
        throw new NullPointerException("The source path is null.");
    }

    if (destPath == null) {
        throw new NullPointerException("The destination path is null");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null");
    }

    final FileSystem srcFs = srcPath.getFileSystem(conf);
    final FileSystem destFs = destPath.getFileSystem(conf);

    return FileUtil.copy(srcFs, srcPath, destFs, destPath, true, overwrite, conf);
}

From source file:gobblin.compaction.HdfsWriter.java

License:Open Source License

public static void moveSelectFiles(String extension, String source, String destination) throws IOException {
    FileSystem fs = getFileSystem();
    fs.mkdirs(new Path(destination));
    //RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(new Path(source), false);
    FileStatus[] fileStatuses = fs.listStatus(new Path(source));
    for (FileStatus fileStatus : fileStatuses) {
        Path path = fileStatus.getPath();
        if (!fileStatus.isDir() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) {
            FileUtil.copy(fs, path, fs, new Path(destination), false, true, getConfiguration());
        }// w w w. ja v  a2s  .  c om
    }
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

private static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean deleteSource,
        boolean overwrite, Configuration conf) throws IOException {

    Preconditions.checkArgument(srcFs.exists(src),
            String.format("Cannot copy from %s to %s because src does not exist", src, dst));
    Preconditions.checkArgument(overwrite || !dstFs.exists(dst),
            String.format("Cannot copy from %s to %s because dst exists", src, dst));

    try {//from  w  w  w  . j a va 2  s  .co m
        boolean isSourceFileSystemLocal = srcFs instanceof LocalFileSystem
                || srcFs instanceof RawLocalFileSystem;
        if (isSourceFileSystemLocal) {
            try {
                dstFs.copyFromLocalFile(deleteSource, overwrite, src, dst);
            } catch (IOException e) {
                throw new IOException(String.format("Failed to copy %s to %s", src, dst), e);
            }
        } else if (!FileUtil.copy(srcFs, src, dstFs, dst, deleteSource, overwrite, conf)) {
            throw new IOException(String.format("Failed to copy %s to %s", src, dst));
        }
    } catch (Throwable t1) {
        try {
            deleteIfExists(dstFs, dst, true);
        } catch (Throwable t2) {
            // Do nothing
        }
        throw t1;
    }
}

From source file:org.apache.accumulo.server.tabletserver.BulkFailedCopyProcessor.java

License:Apache License

@Override
public void process(String workID, byte[] data) {

    String paths[] = new String(data).split(",");

    Path orig = new Path(paths[0]);
    Path dest = new Path(paths[1]);
    Path tmp = new Path(dest.getParent(), dest.getName() + ".tmp");

    try {/* w  ww. j  ava  2  s.  com*/
        FileSystem fs = TraceFileSystem.wrap(org.apache.accumulo.core.file.FileUtil
                .getFileSystem(CachedConfiguration.getInstance(), ServerConfiguration.getSiteConfiguration()));

        FileUtil.copy(fs, orig, fs, tmp, false, true, CachedConfiguration.getInstance());
        fs.rename(tmp, dest);
        log.debug("copied " + orig + " to " + dest);
    } catch (IOException ex) {
        try {
            FileSystem fs = TraceFileSystem.wrap(org.apache.accumulo.core.file.FileUtil.getFileSystem(
                    CachedConfiguration.getInstance(), ServerConfiguration.getSiteConfiguration()));

            fs.create(dest).close();
            log.warn(" marked " + dest + " failed", ex);
        } catch (IOException e) {
            log.error("Unable to create failure flag file " + dest, e);
        }
    }

}

From source file:org.apache.accumulo.test.randomwalk.shard.ExportIndex.java

License:Apache License

@Override
public void visit(State state, Environment env, Properties props) throws Exception {

    String indexTableName = (String) state.get("indexTableName");
    String tmpIndexTableName = indexTableName + "_tmp";

    String exportDir = "/tmp/shard_export/" + indexTableName;
    String copyDir = "/tmp/shard_export/" + tmpIndexTableName;

    FileSystem fs = FileSystem.get(CachedConfiguration.getInstance());

    fs.delete(new Path("/tmp/shard_export/" + indexTableName), true);
    fs.delete(new Path("/tmp/shard_export/" + tmpIndexTableName), true);

    // disable spits, so that splits can be compared later w/o worrying one table splitting and the other not
    env.getConnector().tableOperations().setProperty(indexTableName, Property.TABLE_SPLIT_THRESHOLD.getKey(),
            "20G");

    long t1 = System.currentTimeMillis();

    env.getConnector().tableOperations().flush(indexTableName, null, null, true);
    env.getConnector().tableOperations().offline(indexTableName);

    long t2 = System.currentTimeMillis();

    env.getConnector().tableOperations().exportTable(indexTableName, exportDir);

    long t3 = System.currentTimeMillis();

    // copy files
    BufferedReader reader = new BufferedReader(
            new InputStreamReader(fs.open(new Path(exportDir, "distcp.txt")), UTF_8));
    String file = null;//from ww  w.  j a  va2 s. c o  m
    while ((file = reader.readLine()) != null) {
        Path src = new Path(file);
        Path dest = new Path(new Path(copyDir), src.getName());
        FileUtil.copy(fs, src, fs, dest, false, true, CachedConfiguration.getInstance());
    }

    reader.close();

    long t4 = System.currentTimeMillis();

    env.getConnector().tableOperations().online(indexTableName);
    env.getConnector().tableOperations().importTable(tmpIndexTableName, copyDir);

    long t5 = System.currentTimeMillis();

    fs.delete(new Path(exportDir), true);
    fs.delete(new Path(copyDir), true);

    HashSet<Text> splits1 = new HashSet<>(env.getConnector().tableOperations().listSplits(indexTableName));
    HashSet<Text> splits2 = new HashSet<>(env.getConnector().tableOperations().listSplits(tmpIndexTableName));

    if (!splits1.equals(splits2))
        throw new Exception("Splits not equals " + indexTableName + " " + tmpIndexTableName);

    HashMap<String, String> props1 = new HashMap<>();
    for (Entry<String, String> entry : env.getConnector().tableOperations().getProperties(indexTableName))
        props1.put(entry.getKey(), entry.getValue());

    HashMap<String, String> props2 = new HashMap<>();
    for (Entry<String, String> entry : env.getConnector().tableOperations().getProperties(tmpIndexTableName))
        props2.put(entry.getKey(), entry.getValue());

    if (!props1.equals(props2))
        throw new Exception("Props not equals " + indexTableName + " " + tmpIndexTableName);

    // unset the split threshold
    env.getConnector().tableOperations().removeProperty(indexTableName,
            Property.TABLE_SPLIT_THRESHOLD.getKey());
    env.getConnector().tableOperations().removeProperty(tmpIndexTableName,
            Property.TABLE_SPLIT_THRESHOLD.getKey());

    log.debug("Imported " + tmpIndexTableName + " from " + indexTableName + " flush: " + (t2 - t1)
            + "ms export: " + (t3 - t2) + "ms copy:" + (t4 - t3) + "ms import:" + (t5 - t4) + "ms");

}

From source file:org.apache.accumulo.testing.core.randomwalk.shard.ExportIndex.java

License:Apache License

@Override
public void visit(State state, RandWalkEnv env, Properties props) throws Exception {

    String indexTableName = (String) state.get("indexTableName");
    String tmpIndexTableName = indexTableName + "_tmp";

    String exportDir = "/tmp/shard_export/" + indexTableName;
    String copyDir = "/tmp/shard_export/" + tmpIndexTableName;

    FileSystem fs = FileSystem.get(env.getHadoopConfiguration());

    fs.delete(new Path("/tmp/shard_export/" + indexTableName), true);
    fs.delete(new Path("/tmp/shard_export/" + tmpIndexTableName), true);

    // disable spits, so that splits can be compared later w/o worrying one
    // table splitting and the other not
    env.getAccumuloConnector().tableOperations().setProperty(indexTableName,
            Property.TABLE_SPLIT_THRESHOLD.getKey(), "20G");

    long t1 = System.currentTimeMillis();

    env.getAccumuloConnector().tableOperations().flush(indexTableName, null, null, true);
    env.getAccumuloConnector().tableOperations().offline(indexTableName);

    long t2 = System.currentTimeMillis();

    env.getAccumuloConnector().tableOperations().exportTable(indexTableName, exportDir);

    long t3 = System.currentTimeMillis();

    // copy files
    BufferedReader reader = new BufferedReader(
            new InputStreamReader(fs.open(new Path(exportDir, "distcp.txt")), UTF_8));
    String file = null;// ww  w.  j a va2s .  co  m
    while ((file = reader.readLine()) != null) {
        Path src = new Path(file);
        Path dest = new Path(new Path(copyDir), src.getName());
        FileUtil.copy(fs, src, fs, dest, false, true, env.getHadoopConfiguration());
    }

    reader.close();

    long t4 = System.currentTimeMillis();

    env.getAccumuloConnector().tableOperations().online(indexTableName);
    env.getAccumuloConnector().tableOperations().importTable(tmpIndexTableName, copyDir);

    long t5 = System.currentTimeMillis();

    fs.delete(new Path(exportDir), true);
    fs.delete(new Path(copyDir), true);

    HashSet<Text> splits1 = new HashSet<>(
            env.getAccumuloConnector().tableOperations().listSplits(indexTableName));
    HashSet<Text> splits2 = new HashSet<>(
            env.getAccumuloConnector().tableOperations().listSplits(tmpIndexTableName));

    if (!splits1.equals(splits2))
        throw new Exception("Splits not equals " + indexTableName + " " + tmpIndexTableName);

    HashMap<String, String> props1 = new HashMap<>();
    for (Entry<String, String> entry : env.getAccumuloConnector().tableOperations()
            .getProperties(indexTableName))
        props1.put(entry.getKey(), entry.getValue());

    HashMap<String, String> props2 = new HashMap<>();
    for (Entry<String, String> entry : env.getAccumuloConnector().tableOperations()
            .getProperties(tmpIndexTableName))
        props2.put(entry.getKey(), entry.getValue());

    if (!props1.equals(props2))
        throw new Exception("Props not equals " + indexTableName + " " + tmpIndexTableName);

    // unset the split threshold
    env.getAccumuloConnector().tableOperations().removeProperty(indexTableName,
            Property.TABLE_SPLIT_THRESHOLD.getKey());
    env.getAccumuloConnector().tableOperations().removeProperty(tmpIndexTableName,
            Property.TABLE_SPLIT_THRESHOLD.getKey());

    log.debug("Imported " + tmpIndexTableName + " from " + indexTableName + " flush: " + (t2 - t1)
            + "ms export: " + (t3 - t2) + "ms copy:" + (t4 - t3) + "ms import:" + (t5 - t4) + "ms");

}

From source file:org.apache.accumulo.tserver.BulkFailedCopyProcessor.java

License:Apache License

@Override
public void process(String workID, byte[] data) {

    String paths[] = new String(data, UTF_8).split(",");

    Path orig = new Path(paths[0]);
    Path dest = new Path(paths[1]);
    Path tmp = new Path(dest.getParent(), dest.getName() + ".tmp");

    try {//from  w  ww.  java  2  s .  c o  m
        VolumeManager vm = VolumeManagerImpl.get(SiteConfiguration.getInstance());
        FileSystem origFs = vm.getVolumeByPath(orig).getFileSystem();
        FileSystem destFs = vm.getVolumeByPath(dest).getFileSystem();

        FileUtil.copy(origFs, orig, destFs, tmp, false, true, CachedConfiguration.getInstance());
        destFs.rename(tmp, dest);
        log.debug("copied " + orig + " to " + dest);
    } catch (IOException ex) {
        try {
            VolumeManager vm = VolumeManagerImpl.get(SiteConfiguration.getInstance());
            FileSystem destFs = vm.getVolumeByPath(dest).getFileSystem();
            destFs.create(dest).close();
            log.warn(" marked " + dest + " failed", ex);
        } catch (IOException e) {
            log.error("Unable to create failure flag file " + dest, e);
        }
    }

}

From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java

License:Apache License

@Override
protected void copy(List<HadoopResourceId> srcResourceIds, List<HadoopResourceId> destResourceIds)
        throws IOException {
    for (int i = 0; i < srcResourceIds.size(); ++i) {
        // this enforces src and dest file systems to match
        final org.apache.hadoop.fs.FileSystem fs = srcResourceIds.get(i).toPath().getFileSystem(configuration);
        // Unfortunately HDFS FileSystems don't support a native copy operation so we are forced
        // to use the inefficient implementation found in FileUtil which copies all the bytes through
        // the local machine.
        ////from  w  w  w.  j av a  2 s .co  m
        // HDFS FileSystem does define a concat method but could only find the DFSFileSystem
        // implementing it. The DFSFileSystem implemented concat by deleting the srcs after which
        // is not what we want. Also, all the other FileSystem implementations I saw threw
        // UnsupportedOperationException within concat.
        final boolean success = FileUtil.copy(fs, srcResourceIds.get(i).toPath(), fs,
                destResourceIds.get(i).toPath(), false, true, fs.getConf());
        if (!success) {
            // Defensive coding as this should not happen in practice
            throw new IOException(String.format(
                    "Unable to copy resource %s to %s. No further information provided by underlying filesystem.",
                    srcResourceIds.get(i).toPath(), destResourceIds.get(i).toPath()));
        }
    }
}

From source file:org.apache.crunch.io.avro.AvroPathPerKeyTarget.java

License:Apache License

@Override
public void handleOutputs(Configuration conf, Path workingPath, int index) throws IOException {
    FileSystem srcFs = workingPath.getFileSystem(conf);
    Path base = new Path(workingPath, PlanningParameters.MULTI_OUTPUT_PREFIX + index);
    Path[] keys = FileUtil.stat2Paths(srcFs.listStatus(base), base);
    FileSystem dstFs = path.getFileSystem(conf);
    if (!dstFs.exists(path)) {
        dstFs.mkdirs(path);//ww w .  j  a va 2s  . com
    }
    boolean sameFs = isCompatible(srcFs, path);
    for (Path key : keys) {
        Path[] srcs = FileUtil.stat2Paths(srcFs.listStatus(key), key);
        Path targetPath = new Path(path, key.getName());
        dstFs.mkdirs(targetPath);
        for (Path s : srcs) {
            Path d = getDestFile(conf, s, targetPath, s.getName().contains("-m-"));
            if (sameFs) {
                srcFs.rename(s, d);
            } else {
                FileUtil.copy(srcFs, s, dstFs, d, true, true, conf);
            }
        }
    }
    dstFs.create(getSuccessIndicator(), true).close();
}