Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

/**
 * copy a whole directory tree from the local directory on the node back to a directory on hdfs
 * /*w w  w.j a  v  a2  s.  c  o m*/
 * @param results_dir
 * @param dest
 * @throws IOException
 */
private void copyDir(Path results_dir, Path dest) throws IOException {
    // Copy output only if not empty
    if (this.localFS.exists(results_dir) && this.localFS.listFiles(results_dir, false).hasNext()) {
        FileSystem.get(this.job).mkdirs(dest);
        // copy the whole directory tree
        FileUtil.copy(this.localFS, results_dir, FileSystem.get(this.job), dest, true, this.job);
    }
}

From source file:edu.purdue.cybercenter.dm.storage.HdfsStorageFileManager.java

private void transferFile(FileSystem sourceType, String source, FileSystem destFS, String dest, FileId fileId,
        boolean makeDir) throws IOException {
    if (makeDir) {
        destFS.mkdirs(new Path(dest.substring(0, dest.lastIndexOf("/"))));
    }//from w w  w.  jav a  2 s  .c  om
    // FileUtil.copy(sourceType,new
    // Path("c://TEMP/springsource.exe"),destType,new
    // Path("Z://springsource.exe"),false,conf);
    boolean success = false;
    Exception e1 = null;
    try {
        FileUtil.copy(sourceType, new Path(source), destFS, new Path(dest), false, configuration);
        success = true;
        System.out.println("file: " + source + " transfered");
    } catch (Exception e) {
        success = false;
        e.printStackTrace();
        e1 = e;
    } finally {
        if (!success) {
            if (fileId != null) {
                removeStorageFileEntry(fileId.getFileId());
            }
            if (e1 != null) {
                throw new IOException(e1);
            } else {
                throw new IOException("File not transfered");
            }
        }
    }

}

From source file:edu.purdue.cybercenter.dm.storage.HdfsStorageFileManager.java

private StorageFile moveFile(StorageFile file, Storage storage) throws Exception {
    FileSystem destFS = getFileSystemType(storage.getType());
    String destPath = storage.getLocation() + file.getLocation();
    String sourcePath = file.getStorageId().getLocation() + file.getLocation();
    FileSystem sourceFs = getFileSystemType(file.getStorageId().getType());
    destFS.mkdirs(new Path(destPath.substring(0, destPath.lastIndexOf("/"))));
    FileUtil.copy(getFileSystemType(file.getStorageId().getType()), new Path(sourcePath), destFS,
            new Path(destPath), false, configuration);
    FileUtil.fullyDelete(sourceFs, new Path(sourcePath));
    file.setStorageId(storage);//from  w ww.j av a  2 s . co  m
    file.persist();
    System.out.println("move from ");
    System.out.println(sourcePath);
    System.out.println("To ");
    System.out.println(destPath);
    return file;
}

From source file:edu.purdue.cybercenter.dm.storage.HdfsStorageFileManager.java

private StorageFile updateFile(String source, StorageFile target) throws IOException {
    String fileSysType = getStorageTypeFromFilePath(source);
    source = getAbsoultuePath(fileSysType, source);
    FileUtil.fullyDelete(getFileSystemType(fileSysType),
            new Path(target.getStorageId().getLocation() + target.getLocation()));
    FileUtil.copy(getFileSystemType(target.getStorageId().getType()), new Path(source),
            getFileSystemType(target.getStorageId().getType()),
            new Path(target.getStorageId().getLocation() + target.getLocation()), false, configuration);
    return target;
}

From source file:eml.studio.server.util.HDFSIO.java

License:Open Source License

/**
 * Copy method//from  www . jav  a 2 s.  c  om
 * @param src_uri Source file uri
 * @param dst_uri destination uri
 * @throws Exception
 */
public static void copy(String src_uri, String dst_uri) throws Exception {
    FileUtil.copy(fs, new Path(src_uri), fs, new Path(dst_uri), false, conf);
}

From source file:hdfs.jsr203.HadoopFileSystem.java

License:Apache License

void copyFile(boolean deletesrc, byte[] src, byte[] dst, CopyOption... options) throws IOException {
    checkWritable();//from w  w w  . j  ava  2s .c o  m
    if (Arrays.equals(src, dst))
        return; // do nothing, src and dst are the same

    beginWrite();
    try {
        ensureOpen();
        org.apache.hadoop.fs.Path eSrc_path = new HadoopPath(this, src).getRawResolvedPath();
        FileStatus eSrc = this.fs.getFileStatus(eSrc_path);
        if (!this.fs.exists(eSrc_path))
            throw new NoSuchFileException(getString(src));
        if (eSrc.isDirectory()) { // specification says to create dst directory
            createDirectory(dst);
            return;
        }
        boolean hasReplace = false;
        boolean hasCopyAttrs = false;
        for (CopyOption opt : options) {
            if (opt == REPLACE_EXISTING)
                hasReplace = true;
            else if (opt == COPY_ATTRIBUTES)
                hasCopyAttrs = true;
        }
        org.apache.hadoop.fs.Path eDst_path = new HadoopPath(this, dst).getRawResolvedPath();
        //            FileStatus eDst = this.fs.getFileStatus(eDst_path); //if eDst_path not exist, it will throw an error

        if (fs.exists(eDst_path)) {
            if (!hasReplace)
                throw new FileAlreadyExistsException(getString(dst));

            if (!fs.delete(eDst_path, false)) {
                throw new AccessDeniedException("cannot delete hdfs file " + getString(dst));
            }
        } else {
            //checkParents(dst);
        }

        //Simply use FileUtil.copy here. Can we use DistCp for very big files here? zongjie@novelbio.com
        boolean isCanDeleteSourceFile = FileUtil.copy(fs, eSrc_path, fs, eDst_path, deletesrc, fs.getConf());
        if (!isCanDeleteSourceFile) {
            throw new AccessDeniedException("cannot delete source file " + eSrc_path.toString());
        }

        //            org.apache.hadoop.fs.Path[] srcs = new org.apache.hadoop.fs.Path[] {eSrc_path};
        //         this.fs.concat(eDst_path, srcs);

        /*
        Entry u = new Entry(eSrc, Entry.COPY);    // copy eSrc entry
        u.name(dst);                              // change name
        if (eSrc.type == Entry.NEW || eSrc.type == Entry.FILECH)
        {
        u.type = eSrc.type;    // make it the same type
        if (!deletesrc) {      // if it's not "rename", just take the data
            if (eSrc.bytes != null)
                u.bytes = Arrays.copyOf(eSrc.bytes, eSrc.bytes.length);
            else if (eSrc.file != null) {
                u.file = getTempPathForEntry(null);
                Files.copy(eSrc.file, u.file, REPLACE_EXISTING);
            }
        }
        }
        if (!hasCopyAttrs)
        u.mtime = u.atime= u.ctime = System.currentTimeMillis();
        update(u);
        if (deletesrc)
        updateDelete(eSrc);*/
    } finally {
        endWrite();
    }
}

From source file:io.spring.batch.workflow.configuration.WorkflowConfiguration.java

License:Apache License

@Bean
@StepScope/*w  w  w. j  a v  a 2 s . c o  m*/
public Tasklet archiveTasklet(FileSystem fileSystem, @Value("#jobParameters['inputDir']") String inputDir,
        @Value("#jobParameters['archiveDir'") String archiveDir) {
    return new Tasklet() {
        @Override
        public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
            FileUtil.copy(fileSystem, new Path(inputDir), fileSystem, new Path("/probe/archive/" + archiveDir),
                    true, fileSystem.getConf());
            return RepeatStatus.FINISHED;
        }
    };
}

From source file:ml.shifu.shifu.fs.ShifuFileUtils.java

License:Apache License

/**
 * Copy src file to dst file in the same FileSystem. Such as copy local source to local destination,
 * copy hdfs source to hdfs dest.//from w w  w  .  j a v  a  2  s.  co  m
 *
 * @param srcPath    - source file to copy
 * @param destPath   - destination file
 * @param sourceType - local/hdfs
 * @throws IOException -  if any I/O exception in processing
 */
public static void copy(String srcPath, String destPath, SourceType sourceType) throws IOException {
    if (StringUtils.isEmpty(destPath) || StringUtils.isEmpty(destPath) || sourceType == null) {
        throw new IllegalArgumentException(
                String.format("Null or empty parameters srcDataPath:%s, dstDataPath:%s, sourceType:%s", srcPath,
                        destPath, sourceType));
    }

    FileSystem fs = getFileSystemBySourceType(sourceType);
    // delete all files in dst firstly because of different folder if has dstDataPath
    if (!fs.delete(new Path(destPath), true)) {
        // ignore delete failed, it's ok.
    }

    FileUtil.copy(fs, new Path(srcPath), fs, new Path(destPath), false, new Configuration());
}

From source file:org.apache.accumulo.server.fs.VolumeUtil.java

License:Apache License

private static String decommisionedTabletDir(AccumuloServerContext context, ZooLock zooLock, VolumeManager vm,
        KeyExtent extent, String metaDir) throws IOException {
    Path dir = new Path(metaDir);
    if (isActiveVolume(dir))
        return metaDir;

    if (!dir.getParent().getParent().getName().equals(ServerConstants.TABLE_DIR)) {
        throw new IllegalArgumentException("Unexpected table dir " + dir);
    }// w w  w  .  j  ava  2  s. co m

    Path newDir = new Path(vm.choose(Optional.of(extent.getTableId()), ServerConstants.getBaseUris())
            + Path.SEPARATOR + ServerConstants.TABLE_DIR + Path.SEPARATOR + dir.getParent().getName()
            + Path.SEPARATOR + dir.getName());

    log.info("Updating directory for " + extent + " from " + dir + " to " + newDir);
    if (extent.isRootTablet()) {
        // the root tablet is special case, its files need to be copied if its dir is changed

        // this code needs to be idempotent

        FileSystem fs1 = vm.getVolumeByPath(dir).getFileSystem();
        FileSystem fs2 = vm.getVolumeByPath(newDir).getFileSystem();

        if (!same(fs1, dir, fs2, newDir)) {
            if (fs2.exists(newDir)) {
                Path newDirBackup = getBackupName(fs2, newDir);
                // never delete anything because were dealing with the root tablet
                // one reason this dir may exist is because this method failed previously
                log.info("renaming " + newDir + " to " + newDirBackup);
                if (!fs2.rename(newDir, newDirBackup)) {
                    throw new IOException("Failed to rename " + newDir + " to " + newDirBackup);
                }
            }

            // do a lot of logging since this is the root tablet
            log.info("copying " + dir + " to " + newDir);
            if (!FileUtil.copy(fs1, dir, fs2, newDir, false, CachedConfiguration.getInstance())) {
                throw new IOException("Failed to copy " + dir + " to " + newDir);
            }

            // only set the new location in zookeeper after a successful copy
            log.info("setting root tablet location to " + newDir);
            MetadataTableUtil.setRootTabletDir(newDir.toString());

            // rename the old dir to avoid confusion when someone looks at filesystem... its ok if we fail here and this does not happen because the location in
            // zookeeper is the authority
            Path dirBackup = getBackupName(fs1, dir);
            log.info("renaming " + dir + " to " + dirBackup);
            fs1.rename(dir, dirBackup);

        } else {
            log.info("setting root tablet location to " + newDir);
            MetadataTableUtil.setRootTabletDir(newDir.toString());
        }

        return newDir.toString();
    } else {
        MetadataTableUtil.updateTabletDir(extent, newDir.toString(), context, zooLock);
        return newDir.toString();
    }
}

From source file:org.apache.accumulo.test.ImportExportIT.java

License:Apache License

@Test
public void testExportImportThenScan() throws Exception {
    Connector conn = getConnector();//w w  w  .  j  a  v a 2s.co  m

    String[] tableNames = getUniqueNames(2);
    String srcTable = tableNames[0], destTable = tableNames[1];
    conn.tableOperations().create(srcTable);

    BatchWriter bw = conn.createBatchWriter(srcTable, new BatchWriterConfig());
    for (int row = 0; row < 1000; row++) {
        Mutation m = new Mutation(Integer.toString(row));
        for (int col = 0; col < 100; col++) {
            m.put(Integer.toString(col), "", Integer.toString(col * 2));
        }
        bw.addMutation(m);
    }

    bw.close();

    conn.tableOperations().compact(srcTable, null, null, true, true);

    // Make a directory we can use to throw the export and import directories
    // Must exist on the filesystem the cluster is running.
    FileSystem fs = cluster.getFileSystem();
    Path tmp = cluster.getTemporaryPath();
    log.info("Using FileSystem: " + fs);
    Path baseDir = new Path(tmp, getClass().getName());
    if (fs.exists(baseDir)) {
        log.info("{} exists on filesystem, deleting", baseDir);
        assertTrue("Failed to deleted " + baseDir, fs.delete(baseDir, true));
    }
    log.info("Creating {}", baseDir);
    assertTrue("Failed to create " + baseDir, fs.mkdirs(baseDir));
    Path exportDir = new Path(baseDir, "export");
    Path importDir = new Path(baseDir, "import");
    for (Path p : new Path[] { exportDir, importDir }) {
        assertTrue("Failed to create " + baseDir, fs.mkdirs(p));
    }

    log.info("Exporting table to {}", exportDir);
    log.info("Importing table from {}", importDir);

    // Offline the table
    conn.tableOperations().offline(srcTable, true);
    // Then export it
    conn.tableOperations().exportTable(srcTable, exportDir.toString());

    // Make sure the distcp.txt file that exporttable creates is available
    Path distcp = new Path(exportDir, "distcp.txt");
    Assert.assertTrue("Distcp file doesn't exist", fs.exists(distcp));
    FSDataInputStream is = fs.open(distcp);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));

    // Copy each file that was exported to the import directory
    String line;
    while (null != (line = reader.readLine())) {
        Path p = new Path(line.substring(5));
        Assert.assertTrue("File doesn't exist: " + p, fs.exists(p));

        Path dest = new Path(importDir, p.getName());
        Assert.assertFalse("Did not expect " + dest + " to exist", fs.exists(dest));
        FileUtil.copy(fs, p, fs, dest, false, fs.getConf());
    }

    reader.close();

    log.info("Import dir: {}", Arrays.toString(fs.listStatus(importDir)));

    // Import the exported data into a new table
    conn.tableOperations().importTable(destTable, importDir.toString());

    // Get the table ID for the table that the importtable command created
    final String tableId = conn.tableOperations().tableIdMap().get(destTable);
    Assert.assertNotNull(tableId);

    // Get all `file` colfams from the metadata table for the new table
    log.info("Imported into table with ID: {}", tableId);
    Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
    s.setRange(MetadataSchema.TabletsSection.getRange(tableId));
    s.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME);
    MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(s);

    // Should find a single entry
    for (Entry<Key, Value> fileEntry : s) {
        Key k = fileEntry.getKey();
        String value = fileEntry.getValue().toString();
        if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME)) {
            // The file should be an absolute URI (file:///...), not a relative path (/b-000.../I000001.rf)
            String fileUri = k.getColumnQualifier().toString();
            Assert.assertFalse("Imported files should have absolute URIs, not relative: " + fileUri,
                    looksLikeRelativePath(fileUri));
        } else if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.ServerColumnFamily.NAME)) {
            Assert.assertFalse("Server directory should have absolute URI, not relative: " + value,
                    looksLikeRelativePath(value));
        } else {
            Assert.fail("Got expected pair: " + k + "=" + fileEntry.getValue());
        }
    }

    // Online the original table before we verify equivalence
    conn.tableOperations().online(srcTable, true);

    verifyTableEquality(conn, srcTable, destTable);
}