List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.dutir.lucene.io.HadoopUtility.java
License:Mozilla Public License
protected static void saveClassPathToJob(JobConf jobConf) throws IOException { String[] jars = findJarFiles( new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") }); for (String jarFile : jars) { Path srcJarFilePath = new Path("file:///" + jarFile); String filename = srcJarFilePath.getName(); Path tmpJarFilePath = makeTemporaryFile(jobConf, filename); FileSystem defFS = FileSystem.get(jobConf); defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath); DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf); }//from www . ja v a 2 s .com DistributedCache.createSymlink(jobConf); }
From source file:org.gbif.ocurrence.index.solr.SolrOutputFormat.java
License:Apache License
public static void setupSolrHomeCache(File solrHome, Configuration jobConf) throws IOException { if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) { throw new IOException("Invalid solr.home: " + solrHome); }//from ww w . j a va 2s . c o m File tmpZip = File.createTempFile("solr", "zip"); createZip(solrHome, tmpZip); // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; jobConf.set(ZIP_NAME, hdfsZipName); Path zipPath = new Path("/tmp", getZipName(jobConf)); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + getZipName(jobConf)); DistributedCache.addCacheArchive(baseZipUrl, jobConf); LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(jobConf))); // Actually send the path for the configuration zip file jobConf.set(SETUP_OK, zipPath.toString()); }
From source file:org.hdl.caffe.yarn.app.Client.java
License:Apache License
private String copyLocalFileToDfs(FileSystem fs, String appId, String srcFilePath, String dstFileName) throws IOException { String suffix = CaffeYarnConstants.APP_NAME + "/" + appId + "/" + dstFileName; Path dst = new Path(fs.getHomeDirectory(), suffix); if (srcFilePath != null) { fs.copyFromLocalFile(new Path(srcFilePath), dst); }//from w w w .j av a 2 s . co m LOG.info("Copy " + srcFilePath + " to " + dst.toString()); return dst.toString(); }
From source file:org.hdl.tensorflow.yarn.util.Utils.java
License:Apache License
public static Path copyLocalFileToDfs(FileSystem fs, String appId, Path srcPath, String dstFileName) throws IOException { Path dstPath = new Path(fs.getHomeDirectory(), Constants.DEFAULT_APP_NAME + Path.SEPARATOR + appId + Path.SEPARATOR + dstFileName); LOG.info("Copying " + srcPath + " to " + dstPath); fs.copyFromLocalFile(srcPath, dstPath); return dstPath; }
From source file:org.icgc.dcc.submission.validation.key.KVTestUtils.java
License:Open Source License
public static void copyDirectory(FileSystem fileSystem, File sourceDir, Path targetDir) throws IOException { for (val file : sourceDir.listFiles()) { val source = new Path(file.toURI()); val target = new Path(targetDir, file.getName()); log.info("Copying file: from '{}' to '{}'", source, target); fileSystem.copyFromLocalFile(source, target); }//w ww . j a v a2s .c o m }
From source file:org.janusgraph.hadoop.config.job.AbstractDistCacheConfigurer.java
License:Apache License
protected Path uploadFileIfNecessary(FileSystem localFS, Path localPath, FileSystem destFS) throws IOException { // Fast path for local FS -- DistributedCache + local JobRunner seems copy/link files automatically if (destFS.equals(localFS)) { log.debug("Skipping file upload for {} (destination filesystem {} equals local filesystem)", localPath, destFS);/*from ww w .j av a2 s.co m*/ return localPath; } Path destPath = new Path(destFS.getHomeDirectory() + "/" + HDFS_TMP_LIB_DIR + "/" + localPath.getName()); Stats fileStats = null; try { fileStats = compareModtimes(localFS, localPath, destFS, destPath); } catch (IOException e) { log.warn("Unable to read or stat file: localPath={}, destPath={}, destFS={}", localPath, destPath, destFS); } if (fileStats != null && !fileStats.isRemoteCopyCurrent()) { log.debug("Copying {} to {}", localPath, destPath); destFS.copyFromLocalFile(localPath, destPath); if (null != fileStats.local) { final long mtime = fileStats.local.getModificationTime(); log.debug("Setting modtime on {} to {}", destPath, mtime); destFS.setTimes(destPath, mtime, -1); // -1 means leave atime alone } } return destPath; }
From source file:org.kitesdk.cli.commands.TestCreateDatasetWithExistingData.java
License:Apache License
@BeforeClass public static void createDatasetFromCSV() throws Exception { String sample = "target/users.csv"; String avsc = "target/user.avsc"; BufferedWriter writer = Files.newWriter(new File(sample), CSVSchemaCommand.SCHEMA_CHARSET); writer.append("id,username,email\n"); writer.append("1,test,test@example.com\n"); writer.append("2,user,user@example.com\n"); writer.close();// w w w . ja va2 s . c om TestUtil.run("delete", "dataset:file:target/data/users"); TestUtil.run("-v", "csv-schema", sample, "-o", avsc, "--class", "User"); TestUtil.run("-v", "create", "dataset:file:target/data/users", "-s", avsc, "-f", "parquet"); TestUtil.run("-v", "csv-import", sample, "dataset:file:target/data/users"); USER_SCHEMA = Schemas.fromAvsc(new File(avsc)); FileSystem fs = LocalFileSystem.getInstance(); FileStatus[] stats = fs.listStatus(new Path("target/data/users")); Path parquetFile = null; for (FileStatus stat : stats) { if (stat.getPath().toString().endsWith(".parquet")) { parquetFile = stat.getPath(); break; } } // make a directory with the Parquet file fs.mkdirs(existingDataPath); fs.copyFromLocalFile(parquetFile, existingDataPath); fs.mkdirs(existingPartitionedPathWithPartition); fs.copyFromLocalFile(parquetFile, existingPartitionedPathWithPartition); }
From source file:org.kitesdk.tools.JobClasspathHelper.java
License:Apache License
/** * @param fs//w w w . j a va 2s .c o m * File system where to upload the jar. * @param localJarPath * The local path where we find the jar. * @param md5sum * The MD5 sum of the local jar. * @param remoteJarPath * The remote path where to upload the jar. * @param remoteMd5Path * The remote path where to create the MD5 file. * * @throws IOException */ private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath, Path remoteMd5Path) throws IOException { LOG.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString()); fs.copyFromLocalFile(localJarPath, remoteJarPath); // create the MD5 file for this jar. createMd5SumFile(fs, md5sum, remoteMd5Path); // we need to clean the tmp files that are are created by JarFinder after the JVM exits. if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteJarPath); } // same for the MD5 file. if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteMd5Path); } }
From source file:org.mrgeo.utils.DependencyLoader.java
License:Apache License
private static void addFileToClasspath(Configuration conf, Set<String> existing, FileSystem fs, Path hdfsBase, File file) throws IOException { Path hdfsPath = new Path(hdfsBase, file.getName()); if (!existing.contains(hdfsPath.toString())) { if (fs.exists(hdfsPath)) { // check the timestamp and exit if the one in hdfs is "newer" FileStatus status = fs.getFileStatus(hdfsPath); if (file.lastModified() <= status.getModificationTime()) { log.debug(file.getPath() + " up to date"); DistributedCache.addFileToClassPath(hdfsPath, conf, fs); existing.add(hdfsPath.toString()); return; }/*from ww w .jav a2s. c om*/ } // copy the file... log.debug("Copying " + file.getPath() + " to HDFS for distribution"); fs.copyFromLocalFile(new Path(file.getCanonicalFile().toURI()), hdfsPath); DistributedCache.addFileToClassPath(hdfsPath, conf, fs); existing.add(hdfsPath.toString()); } }
From source file:org.notmysock.tez.BroadcastTest.java
License:Apache License
public boolean run(Configuration conf, boolean doLocalityCheck) throws Exception { System.out.println("Running BroadcastTest"); // conf and UGI TezConfiguration tezConf;/*from ww w . j av a 2s . co m*/ if (conf != null) { tezConf = new TezConfiguration(conf); } else { tezConf = new TezConfiguration(); } tezConf.setBoolean(TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED, true); UserGroupInformation.setConfiguration(tezConf); String user = UserGroupInformation.getCurrentUser().getShortUserName(); // staging dir FileSystem fs = FileSystem.get(tezConf); String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging" + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = fs.makeQualified(stagingDir); Path jobJar = new Path(stagingDir, "job.jar"); fs.copyFromLocalFile(getCurrentJarURL(), jobJar); Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); localResources.put("job.jar", createLocalResource(fs, jobJar)); TezClient tezSession = null; // needs session or else TaskScheduler does not hold onto containers tezSession = TezClient.create("BroadcastTest", tezConf); tezSession.addAppMasterLocalFiles(localResources); tezSession.start(); DAGClient dagClient = null; try { DAG dag = createDAG(fs, tezConf, stagingDir, localResources); dag.addTaskLocalFiles(localResources); tezSession.waitTillReady(); dagClient = tezSession.submitDAG(dag); // monitoring DAGStatus dagStatus = dagClient.waitForCompletionWithStatusUpdates(null); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics()); return false; } return true; } finally { fs.delete(stagingDir, true); tezSession.stop(); } }