List of usage examples for org.apache.hadoop.fs FileUtil copy
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, Configuration conf) throws IOException
From source file:audr.text.utils.FileUtils.java
License:Open Source License
/** * hadoop/*www .j a va 2 s .c o m*/ * * @param localFile * * @param hadoopFile * hadoop * @return */ public static void uploadFile2HDFS(String localFile, String hadoopFile) { try { Configuration conf = new Configuration(); FileSystem src = FileSystem.getLocal(conf); FileSystem dst = FileSystem.get(conf); Path srcpath = new Path(localFile); Path dstpath = new Path(hadoopFile); FileUtil.copy(src, srcpath, dst, dstpath, false, conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void putMeta() throws IOException { LOG.info(MessageFormat.format("setting patched properties: {0} -> {1}", storage.getPatchProperties(), getNextDirectory()));// w w w. jav a 2 s . c o m FileUtil.copy(storage.getFileSystem(), storage.getPatchProperties(), storage.getFileSystem(), getNextProperties(), false, storage.getConfiguration()); }
From source file:com.cloudera.flume.collector.CopyToHdfs.java
License:Apache License
public static void main(String[] argv) throws IOException { if (argv.length < 2) { System.out.println("Need to specify arguments <src> <dst>"); System.exit(-1);/*www. j a v a 2 s. co m*/ } Configuration conf = new Configuration(); Path srcPath = new Path(argv[0]); FileSystem srcFs = srcPath.getFileSystem(conf); Path dstPath = new Path(argv[1]); FileSystem dstFs = dstPath.getFileSystem(conf); // dfs.copyFromLocalFile(false, psrc, pdst); FileUtil.copy(srcFs, srcPath, dstFs, dstPath, false, conf); }
From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java
License:Apache License
@DriverInit public void copyFilesToWorking() throws IOException { // Copy the input files into the 'workingDir' FileSystem fs = FileSystem.get(getConf()); this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString()); fs.mkdirs(workingDirectory);/*from w w w . j a v a 2 s .co m*/ FileStatus[] files = fs.globStatus(new Path(context.getInput())); for (FileStatus file : files) { Path dest = new Path(workingDirectory, file.getPath().getName()); FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf()); } }
From source file:com.datasalt.pangool.utils.DCUtils.java
License:Apache License
/** * Utility method for serializing an object and saving it in the Distributed Cache. * <p>//from ww w .j a v a2 s.c o m * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance. * * @param obj The obj instance to serialize using Java serialization. * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed. * @param conf The Hadoop Configuration. * @throws FileNotFoundException * @throws IOException * @throws URISyntaxException */ public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf) throws FileNotFoundException, IOException, URISyntaxException { File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir")); if (!hadoopTmpDir.exists()) { hadoopTmpDir.mkdir(); } File file = new File(hadoopTmpDir, serializeToLocalFile); FileSystem fS = FileSystem.get(conf); ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file)); out.writeObject(obj); out.close(); if (fS.equals(FileSystem.getLocal(conf))) { return; } String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF); if (tmpHdfsFolder == null) { // set the temporary folder for Pangool instances to the temporary of the user that is running the Job // This folder will be used across the cluster for location the instances. This way, tasktrackers // that are being run as different user will still be able to locate this folder tmpHdfsFolder = conf.get("hadoop.tmp.dir"); conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder); } Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile); if (fS.exists(toHdfs)) { // Optionally, copy to DFS if fS.delete(toHdfs, false); } FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf); DistributedCache.addCacheFile(toHdfs.toUri(), conf); }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); }/*from w w w .j a v a 2 s .c om*/ InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } }
From source file:com.linkedin.pinot.filesystem.HadoopPinotFS.java
License:Apache License
/** * Note that this method copies within a cluster. If you want to copy outside the cluster, you will * need to create a new configuration and filesystem. Keeps files if copy/move is partial. *///from w w w . j a va2 s.co m @Override public boolean copy(URI srcUri, URI dstUri) throws IOException { Path source = new Path(srcUri); Path target = new Path(dstUri); RemoteIterator<LocatedFileStatus> sourceFiles = hadoopFS.listFiles(source, true); if (sourceFiles != null) { while (sourceFiles.hasNext()) { boolean succeeded = FileUtil.copy(hadoopFS, sourceFiles.next().getPath(), hadoopFS, target, true, hadoopConf); if (!succeeded) { return false; } } } return true; }
From source file:com.mvdb.etl.actions.ActionUtils.java
License:Apache License
public static void copyLocalDirectoryToHdfsDirectory(String localDirectory, String hdfsDirectory) throws Throwable { String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER, ConfigurationKeys.GLOBAL_HADOOP_HOME); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.addResource(new Path(hdfsHome + "/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf); FileSystem localFileSystem = FileSystem.get(new org.apache.hadoop.conf.Configuration()); Path localDirectoryPath = new Path(localDirectory); Path hdfsDirectoryPath = new Path(hdfsDirectory); if (hdfsFileSystem.exists(hdfsDirectoryPath)) { boolean deleteSuccess = hdfsFileSystem.delete(hdfsDirectoryPath, true); if (deleteSuccess == false) { throw new RuntimeException("Unable to delete " + hdfsDirectoryPath.toString()); }// ww w . j a v a 2 s. c o m } if (!localFileSystem.exists(localDirectoryPath)) { throw new RuntimeException("Input directory " + localDirectoryPath + " not found"); } FileStatus fileStatus1 = localFileSystem.getFileStatus(localDirectoryPath); if (!fileStatus1.isDir()) { throw new RuntimeException("Input " + localDirectoryPath + " should be a directory"); } if (hdfsFileSystem.exists(hdfsDirectoryPath)) { throw new RuntimeException("Output " + hdfsDirectoryPath + "already exists"); } logger.info("Attempting Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString()); FileUtil.copy(localFileSystem, localDirectoryPath, hdfsFileSystem, hdfsDirectoryPath, false, conf); logger.info("-Completed Copy " + localDirectoryPath.toString() + " to " + hdfsDirectoryPath.toString()); }
From source file:com.pegasus.ResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { Configuration conf = getConf(); final FileSystem fs = FileSystem.get(conf); edge_path = new Path(conf.get("edge_path")); all_vertices = new Path(conf.get("all_vertices")); curbm_path = new Path(conf.get("iteration_state")); tempbm_path = new Path(conf.get("stage1out")); nextbm_path = new Path(conf.get("stage2out")); output_path = new Path(conf.get("stage3out")); grapherOut_path = new Path(conf.get("grapherout")); nreducers = Integer.parseInt(conf.get("num_reducers")); local_output_path = conf.get("local_output"); // initital cleanup fs.delete(tempbm_path, true);// ww w .ja va 2 s . c om fs.delete(nextbm_path, true); fs.delete(output_path, true); fs.delete(curbm_path, true); fs.delete(grapherOut_path, true); FileUtil.fullyDelete(new File(local_output_path)); fs.mkdirs(curbm_path); //fs.mkdirs(grapherOut_path); FileStatus[] statusArray = fs.listStatus(all_vertices); for (int index = 0; index < statusArray.length; index++) { Path temp = statusArray[index].getPath(); FileUtil.copy(fs, temp, fs, curbm_path, false, conf); } make_symmetric = 1; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); // Iteratively calculate neighborhood function. // rotate directory for (int i = cur_iter; i < MAX_ITERATIONS; i++) { cur_iter++; System.out.println("configStage1"); JobClient.runJob(configStage1()); System.out.println("configStage2"); JobClient.runJob(configStage2()); System.out.println("configStage3"); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); ResultInfo ri = readIterationOutput(new_path); changed_nodes[iter_counter] = ri.changed; changed_nodes[iter_counter] = ri.unchanged; iter_counter++; System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged); fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); // Stop when the minimum neighborhood doesn't change if (ri.changed == 0) { System.out.println("All the component ids converged. Finishing..."); fs.rename(curbm_path, grapherOut_path); break; } } FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // finishing. System.out.println("\n[PEGASUS] Connected component computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); return 0; }
From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java
License:Apache License
public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException { FileUtil.copy(localFs, src, this, dst, delSrc, getConf()); }