List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:com.cloudera.cdk.tools.JobClasspathHelper.java
License:Apache License
/** * @param fs/*from w w w . j ava 2s . c om*/ * File system where to upload the jar. * @param localJarPath * The local path where we find the jar. * @param md5sum * The MD5 sum of the local jar. * @param remoteJarPath * The remote path where to upload the jar. * @param remoteMd5Path * The remote path where to create the MD5 file. * * @throws IOException */ private void copyJarToHDFS(FileSystem fs, Path localJarPath, String md5sum, Path remoteJarPath, Path remoteMd5Path) throws IOException { logger.info("Copying {} to {}", localJarPath.toUri().toASCIIString(), remoteJarPath.toUri().toASCIIString()); fs.copyFromLocalFile(localJarPath, remoteJarPath); // create the MD5 file for this jar. createMd5SumFile(fs, md5sum, remoteMd5Path); // we need to clean the tmp files that are are created by JarFinder after the JVM exits. if (remoteJarPath.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteJarPath); } // same for the MD5 file. if (remoteMd5Path.getName().startsWith(JarFinder.TMP_HADOOP)) { fs.deleteOnExit(remoteMd5Path); } }
From source file:com.cloudera.flume.PerfHdfsIO.java
License:Apache License
@Test public void testCopy() throws IOException, InterruptedException { Benchmark b = new Benchmark("hdfs seqfile copy"); b.mark("begin"); MemorySinkSource mem = FlumeBenchmarkHarness.synthInMem(); b.mark("disk_loaded"); File tmp = File.createTempFile("test", "tmp"); tmp.deleteOnExit();/*from ww w .j a v a 2s.c om*/ SeqfileEventSink sink = new SeqfileEventSink(tmp); sink.open(); b.mark("localdisk_write_started"); EventUtil.dumpAll(mem, sink); b.mark("local_disk_write done"); sink.close(); FlumeConfiguration conf = FlumeConfiguration.get(); Path src = new Path(tmp.getAbsolutePath()); Path dst = new Path("hdfs://localhost/testfile"); FileSystem hdfs = dst.getFileSystem(conf); hdfs.deleteOnExit(dst); b.mark("hdfs_copy_started"); hdfs.copyFromLocalFile(src, dst); b.mark("hdfs_copy_done"); hdfs.close(); b.done(); }
From source file:com.cloudera.livy.test.jobs.SQLGetTweets.java
License:Apache License
@Override public List<String> call(JobContext jc) throws Exception { InputStream source = getClass().getResourceAsStream("/testweet.json"); // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem). URI input;/*from w ww . j a v a 2 s . com*/ File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir()); Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING); FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration()); if ("file".equals(fs.getUri().getScheme())) { input = local.toURI(); } else { String uuid = UUID.randomUUID().toString(); Path target = new Path("/tmp/" + uuid + "-tweets.json"); fs.copyFromLocalFile(new Path(local.toURI()), target); input = target.toUri(); } SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx(); DataFrame df = sqlctx.jsonFile(input.toString()); df.registerTempTable("tweets"); DataFrame topTweets = sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10"); List<String> tweetList = new ArrayList<>(); for (Row r : topTweets.collect()) { tweetList.add(r.toString()); } return tweetList; }
From source file:com.collective.celos.ci.deploy.HdfsDeployer.java
License:Apache License
public void deploy() throws Exception { FileSystem fs = context.getFileSystem(); final String hdfsDirLocalPath = String.format(LOCAL_HDFS_PATTERN, context.getDeployDir()); final File hdfsDirLocal = new File(hdfsDirLocalPath); if (!hdfsDirLocal.exists()) { throw new IllegalStateException(hdfsDirLocalPath + " not found local FS"); }// ww w .j a va 2 s .c o m undeploy(); Path dst = getDestinationHdfsPath(); fs.mkdirs(dst); String[] childFiles = hdfsDirLocal.list(); for (String child : childFiles) { fs.copyFromLocalFile(new Path(hdfsDirLocalPath, child), dst); } }
From source file:com.datasalt.pangool.solr.TupleSolrOutputFormat.java
License:Apache License
private void setupSolrHomeCache(File solrHome, Configuration conf) throws IOException { if (solrHome == null || !(solrHome.exists() && solrHome.isDirectory())) { throw new IOException("Invalid solr.home: " + solrHome); }/* w ww . ja v a 2 s. c o m*/ localSolrHome = solrHome.getAbsolutePath(); File tmpZip = File.createTempFile("solr", "zip"); createZip(solrHome, tmpZip); // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; zipName = hdfsZipName; Path zipPath = new Path("/tmp", zipName); FileSystem fs = FileSystem.get(conf); fs.copyFromLocalFile(new Path(tmpZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve(zipPath.toString() + '#' + zipName); DistributedCache.addCacheArchive(baseZipUrl, conf); LOG.info("Set Solr cache: " + Arrays.asList(DistributedCache.getCacheArchives(conf))); }
From source file:com.datatorrent.stram.client.StramClientUtils.java
License:Apache License
public static void copyFromLocalFileNoChecksum(FileSystem fs, File fromLocal, Path toDFS) throws IOException { // This is to void the hadoop FileSystem API to perform checksum on the local file // This "feature" has caused a lot of headache because the local file can be copied from HDFS and modified, // and the checksum will fail if the file is again copied to HDFS try {//w w w . j a va 2 s . co m new File(fromLocal.getParentFile(), "." + fromLocal.getName() + ".crc").delete(); } catch (Exception ex) { // ignore } fs.copyFromLocalFile(new Path(fromLocal.toURI()), toDFS); }
From source file:com.epam.hadoop.nv.yarn.Client.java
License:Apache License
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {/*w w w . ja v a 2s .c o m*/ ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
From source file:com.github.hdl.tensorflow.yarn.app.Client.java
License:Apache License
private String copyLocalFileToDfs(FileSystem fs, String appId, String srcFilePath, String dstFileName) throws IOException { String suffix = TFYarnConstants.APP_NAME + "/" + appId + "/" + dstFileName; Path dst = new Path(fs.getHomeDirectory(), suffix); if (srcFilePath != null) { fs.copyFromLocalFile(new Path(srcFilePath), dst); }//w w w . j a v a 2 s . co m LOG.info("Copy " + srcFilePath + " to " + dst.toString()); return dst.toString(); }
From source file:com.github.hdl.tensorflow.yarn.app.TFContainer.java
License:Apache License
public void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { execCmd("pwd"); execCmd("ls -l"); String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); LOG.info("copy: " + fileSrcPath + " ===> " + dst.toString()); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {//ww w . ja va2 s .c o m ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(URL.fromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
From source file:com.inmobi.conduit.Conduit.java
License:Apache License
private void copyInputFormatJarToClusterFS(Cluster cluster, String inputFormatSrcJar) throws IOException { FileSystem clusterFS = FileSystem.get(cluster.getHadoopConf()); // create jars path inside /conduit/system/tmp path Path jarsPath = new Path(cluster.getTmpPath(), "jars"); if (!clusterFS.exists(jarsPath)) { clusterFS.mkdirs(jarsPath);// w w w . j a v a2 s . co m } // copy inputFormat source jar into /conduit/system/tmp/jars path Path inputFormatJarDestPath = new Path(jarsPath, "conduit-distcp-current.jar"); if (clusterFS.exists(inputFormatJarDestPath)) { clusterFS.delete(inputFormatJarDestPath, true); } clusterFS.copyFromLocalFile(new Path(inputFormatSrcJar), inputFormatJarDestPath); }