List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:io.dstream.tez.utils.HadoopUtils.java
License:Apache License
/** * *///ww w. j a va2 s .co m private static synchronized void provisioinResourceToFs(FileSystem fs, Path sourcePath, Path destPath) throws Exception { if (logger.isDebugEnabled()) { logger.debug("Provisioning '" + sourcePath + "' to " + destPath); } if (!fs.exists(destPath)) { fs.copyFromLocalFile(sourcePath, destPath); } else { logger.debug("Skipping provisioning of " + destPath + " since it already exists."); } }
From source file:io.gzinga.hadoop.TestHadoopGZipRandomAccess.java
License:Apache License
@Test public void testGZipOutputStream() { try {//from w w w. j a v a2 s . co m Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path("target/test")); GZipOutputStreamRandomAccess gzip = new GZipOutputStreamRandomAccess( fs.create(new Path("target/test/testfile"))); byte[] str = "This is line\n".getBytes(); for (int i = 1; i <= 10000; i++) { if (i % 100 == 0) { gzip.addOffset(i / 100l); } gzip.write(str); } Assert.assertEquals(gzip.getOffsetMap().size(), 100); gzip.close(); fs.copyFromLocalFile(new Path(fs.getWorkingDirectory().toString() + "/target/test-classes/testfile1"), new Path("target/test/testfile1")); FSDataInputStream fin = fs.open(new Path("target/test/testfile")); long len = fs.getFileStatus(new Path("target/test/testfile")).getLen(); SeekableGZipDataInputStream sin = new SeekableGZipDataInputStream(fin, len); Assert.assertTrue(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin)); fin = fs.open(new Path("target/test/testfile1")); sin = new SeekableGZipDataInputStream(fin, len); Assert.assertFalse(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin)); fin = fs.open(new Path("target/test/testfile")); sin = new SeekableGZipDataInputStream(fin, len); GZipInputStreamRandomAccess gzin = new GZipInputStreamRandomAccess(sin); Assert.assertEquals(gzin.getMetadata().size(), 100); Assert.assertTrue(gzin.getMetadata().containsKey(1l)); Assert.assertTrue(gzin.getMetadata().containsKey(100l)); Assert.assertFalse(gzin.getMetadata().containsKey(200l)); gzin.jumpToIndex(50l); int count1 = 0; while (true) { int l = gzin.read(); if (l == -1) { break; } count1++; } gzin.jumpToIndex(60l); int count2 = 0; while (true) { int l = gzin.read(); if (l == -1) { break; } count2++; } Assert.assertTrue(count1 > count2); gzin.close(); } catch (Exception e) { e.printStackTrace(); Assert.fail(); } }
From source file:io.hops.tensorflow.Client.java
License:Apache License
private String addResource(FileSystem fs, ApplicationId appId, String srcPath, String dstDir, String dstName, DistributedCacheList distCache, Map<String, LocalResource> localResources, StringBuilder pythonPath) throws IOException { Path src = new Path(srcPath); if (dstDir == null) { dstDir = "."; }/*from w w w . ja v a2 s. co m*/ if (dstName == null) { dstName = src.getName(); } Path baseDir = new Path(fs.getHomeDirectory(), Constants.YARNTF_STAGING + "/" + appId.toString()); String dstPath; if (dstDir.startsWith(".")) { dstPath = dstName; } else { dstPath = dstDir + "/" + dstName; } Path dst = new Path(baseDir, dstPath); LOG.info("Copying from local filesystem: " + src + " -> " + dst); fs.copyFromLocalFile(src, dst); FileStatus dstStatus = fs.getFileStatus(dst); if (distCache != null) { LOG.info("Adding to distributed cache: " + srcPath + " -> " + dstPath); distCache.add(new DistributedCacheList.Entry(dstPath, dst.toUri(), dstStatus.getLen(), dstStatus.getModificationTime())); } if (localResources != null) { LOG.info("Adding to local environment: " + srcPath + " -> " + dstPath); LocalResource resource = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, dstStatus.getLen(), dstStatus.getModificationTime()); localResources.put(dstPath, resource); } if (pythonPath != null) { pythonPath.append(File.pathSeparator).append(dstPath); } return dstName; }
From source file:jadoop.HadoopGridJob.java
License:Open Source License
/** * Copies the file(s) on the local machine onto the temporary HDFS working * directory and make them available in the hadoop distributed cache so that * they appear in the working directory of the HadoopGridTask(s) when they * are running/* w w w . j a va 2s . co m*/ * * @param fs * the hadoop HDFS filesystem * @param hdfsDirectory * the path to the temporary working directory on the HDFS to * which the files are to be copied. * @throws IOException * if there is a problem copying the files to the HDFS or adding * them to the hadoop distributed cache. * @throws URISyntaxException * if there is a problem generating the URI used to add the file * to the hadoop distributed cache. */ private void copyLocalFileToHDFS(FileSystem fs, Path hdfsDirectory) throws IOException, URISyntaxException { for (File localFile : files) { // get the path to the file on the local file system. Path fileRelativePath = new Path(localFile.getPath()); /* * copy the file from the local file system to the temporary working * directory on the HDFS. */ fs.copyFromLocalFile(fileRelativePath, hdfsDirectory); /* * Build a URI to the file on the HDFS so we can add it to the * working cache. * * The value before the # gives the name of the file on the HDFS, * the value after the # gives the name that the file will have in * the cache (i.e. the working directory of the tasks). */ URI uri = new URI(hdfsDirectory + "/" + localFile.getName() + "#" + localFile.getName()); job.addCacheFile(uri); } }
From source file:jadoop.HadoopGridJob.java
License:Open Source License
/** * Copies the archive file(s) on the local machine into the temporary * working directory on the hadoop HDFS. And also be makes them available in * the distributed working cache so the HadoopGridTask(s) can access them in * their working directory. Once the archive file(s) have been copied onto * the HDFS, a directory with the archive file(s)'s name will be created and * the contents of archive file(s) will unpacked into that directory * /*from w ww . j a va2 s .c om*/ * @param fs * the hadoop HDFS file system * @param hdfsDirectory * path to the temporary working directory on the HDFS to which * the archives are to be copied. * @throws IOException * if there is a problem copying the archives to the HDFS or * adding them to the hadoop distributed cache. * @throws URISyntaxException * if there is a problem generating the URI used to add the * archive to the hadoop distributed cache. */ private void copyLocalArchiveToHDFS(FileSystem fs, Path hdfsDirectory) throws IOException, URISyntaxException { for (File localArchive : archives) { Path archiveRelativePath = new Path(localArchive.getPath()); fs.copyFromLocalFile(archiveRelativePath, hdfsDirectory); URI uri = new URI(hdfsDirectory + "/" + localArchive.getName() + "#" + localArchive.getName()); job.addCacheArchive(uri); } }
From source file:ml.shifu.shifu.util.CommonUtils.java
License:Apache License
/** * Sync up all local configuration files to HDFS. * * @throws IOException If any exception on HDFS IO or local IO. * @throws NullPointerException If parameter {@code modelConfig} is null *//*from www. j av a 2 s . co m*/ public static boolean copyConfFromLocalToHDFS(ModelConfig modelConfig) throws IOException { FileSystem hdfs = HDFSUtils.getFS(); FileSystem localFs = HDFSUtils.getLocalFS(); PathFinder pathFinder = new PathFinder(modelConfig); Path pathModelSet = new Path(pathFinder.getModelSetPath(SourceType.HDFS)); // don't check whether pathModelSet is exists, should be remove by user. hdfs.mkdirs(pathModelSet); // Copy ModelConfig Path srcModelConfig = new Path(pathFinder.getModelConfigPath(SourceType.LOCAL)); Path dstModelConfig = new Path(pathFinder.getModelSetPath(SourceType.HDFS)); hdfs.copyFromLocalFile(srcModelConfig, dstModelConfig); // Copy ColumnConfig Path srcColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.LOCAL)); Path dstColumnConfig = new Path(pathFinder.getColumnConfigPath(SourceType.HDFS)); hdfs.copyFromLocalFile(srcColumnConfig, dstColumnConfig); // copy others Path srcVersion = new Path(pathFinder.getModelVersion(SourceType.LOCAL)); if (localFs.exists(srcVersion)) { Path dstVersion = new Path(pathFinder.getModelVersion(SourceType.HDFS)); hdfs.delete(dstVersion, true); hdfs.copyFromLocalFile(srcVersion, pathModelSet); } // Copy Models Path srcModels = new Path(pathFinder.getModelsPath(SourceType.LOCAL)); if (localFs.exists(srcModels)) { Path dstModels = new Path(pathFinder.getModelsPath(SourceType.HDFS)); hdfs.delete(dstModels, true); hdfs.copyFromLocalFile(srcModels, pathModelSet); } // Copy EvalSets Path evalsPath = new Path(pathFinder.getEvalsPath(SourceType.LOCAL)); if (localFs.exists(evalsPath)) { for (FileStatus evalset : localFs.listStatus(evalsPath)) { EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalset.getPath().getName()); if (evalConfig != null) { copyEvalDataFromLocalToHDFS(modelConfig, evalConfig.getName()); } } } return true; }
From source file:ml.shifu.shifu.util.CommonUtils.java
License:Apache License
/** * Sync-up the evalulation data into HDFS * * @param modelConfig//from w w w . ja va 2s . c om * @param evalName * @throws IOException */ public static void copyEvalDataFromLocalToHDFS(ModelConfig modelConfig, String evalName) throws IOException { EvalConfig evalConfig = modelConfig.getEvalConfigByName(evalName); if (evalConfig != null) { FileSystem hdfs = HDFSUtils.getFS(); FileSystem localFs = HDFSUtils.getLocalFS(); PathFinder pathFinder = new PathFinder(modelConfig); Path evalDir = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.LOCAL)); Path dst = new Path(pathFinder.getEvalSetPath(evalConfig, SourceType.HDFS)); if (localFs.exists(evalDir) // local evaluation folder exists && localFs.getFileStatus(evalDir).isDir() // is directory && !hdfs.exists(dst)) { hdfs.copyFromLocalFile(evalDir, dst); } if (StringUtils.isNotBlank(evalConfig.getScoreMetaColumnNameFile())) { hdfs.copyFromLocalFile(new Path(evalConfig.getScoreMetaColumnNameFile()), new Path(pathFinder.getEvalSetPath(evalConfig))); } } }
From source file:name.abhijitsarkar.hadoop.distributedcache.CitationRetriever.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); /*/*from ww w .j a v a 2s .c om*/ * Create an archive using the supplied codec name. The codec name is currently unused and the archive returned * is always Gzip. See the method for details */ final Path compressedFilePath = new Path(compressFile(new Path(args[2]).toUri(), "gzip", conf)); final FileSystem fs = FileSystem.get(conf); final String remoteLocation = File.separator + compressedFilePath.getName(); LOGGER.debug("Copying from: {} to {}.", compressedFilePath.toUri(), remoteLocation); /* * Copy it to the HDFS from where it is distributed to all task nodes. GenericOptionsParser, if used, does it * behind the scenes */ fs.copyFromLocalFile(compressedFilePath, new Path(remoteLocation)); /* * There are easier ways to put files in the Distributed Cache, like using the GenericOptionsParser command line * arguments '-archives' but that will be too easy, won't it? */ /* Create a symlink */ final String[] symlinks = new String[] { removeExtension(compressedFilePath.getName()) }; conf.set("symlinks", StringUtils.arrayToString(symlinks)); DistributedCache.addCacheArchive(new URI(remoteLocation + "#" + symlinks[0]), conf); DistributedCache.addCacheArchive(new URI(remoteLocation), conf); DistributedCache.createSymlink(conf); /* * Set the citation number in the configuration to be later used by the Mapper */ conf.set("citationNum", args[3]); Job job = new Job(conf, "distributed-cache"); job.setMapperClass(CitationMapper.class); job.setReducerClass(CitationReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJarByClass(getClass()); return job.waitForCompletion(true) ? 0 : 1; }
From source file:nl.gridline.zieook.tasks.DataFileImportTaskTest.java
License:Apache License
@Test @Ignore/*from w w w.j a v a2 s .c o m*/ public void executeMovielensImport() throws Exception { HBaseManager manager = HBaseManager.getInstance(Config.getInstance().getZooKeeperHost()); new HBaseCollectionTable(manager).drop(CP); new HBaseCollectionTable(manager).create(CP); new HBaseUserTable(manager).drop(CP); new HBaseUserTable(manager).create(CP); // drop HBase tables // put dat in hbase: try { LOG.info("copy data from local to HDFS"); FileSystem hdfs = FileSystem.get(hdfsConfig); // cleanup: hdfs.delete(input, true); hdfs.delete(output, true); // import: hdfs.copyFromLocalFile(new Path("test-data-big/movies.dat"), new Path(input, "movies.dat")); hdfs.copyFromLocalFile(new Path("test-data-big/ratings.dat"), new Path(input, "ratings.dat")); hdfs.copyFromLocalFile(new Path("test-data-big/users.dat"), new Path(input, "users.dat")); } catch (IOException e) { LOG.error("", e); fail(e.getMessage()); } // import data into HBase: DataFileImportTask imp = new DataFileImportTask(); imp.setConfig(importMovielensConfig); LOG.info("Create data import tasks"); try { importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_COLLECTION); importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "movies.dat").toString()); imp.call(); LOG.info("COLLECTION data imported"); importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_RATINGS); importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "ratings.dat").toString()); imp.call(); LOG.info("RATINGS data imported"); importMovielensConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_USERS); importMovielensConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "users.dat").toString()); imp.call(); LOG.info("USER data imported"); } catch (Exception e) { LOG.error("", e); fail(e.getMessage()); } // DataPrepareTool prepare = new DataPrepareTool(); // prepare.set(TaskConfig.COLLECTION, COLLECTION); // prepare.set(TaskConfig.CP, CP); // prepare.configure(HBaseTableConstants.USERTABLE_NAME + CP, output.toString()); // boolean result = prepare.execute(); // assertTrue(result); // test output... }
From source file:nl.gridline.zieook.tasks.DataImportExportTest.java
License:Apache License
@Test @Ignore//w w w . j av a2 s. c om public void recommenderTool() throws Exception { try { LOG.info("copy data from local to HDFS"); FileSystem hdfs = FileSystem.get(hdfsConfig); // import: hdfs.copyFromLocalFile(new Path("test-data-small/movies.dat"), new Path(input, "movies.dat")); hdfs.copyFromLocalFile(new Path("test-data-small/ratings.dat"), new Path(input, "ratings.dat")); hdfs.copyFromLocalFile(new Path("test-data-small/users.dat"), new Path(input, "users.dat")); } catch (IOException e) { LOG.error("", e); fail(e.getMessage()); } // import data into HBase: DataFileImportTask imp = new DataFileImportTask(); imp.setConfig(inxConfig); LOG.info("Create data import tasks"); inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_COLLECTION); inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "movies.dat").toString()); imp.call(); LOG.info("COLLECTION data imported"); inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_RATINGS); inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "ratings.dat").toString()); imp.call(); LOG.info("RATINGS data imported"); inxConfig.setProperty(TaskConfig.DATA_PART, TaskConfig.DATA_PART_USERS); inxConfig.setProperty(TaskConfig.INPUT_PATH, new Path(input, "users.dat").toString()); imp.call(); LOG.info("USER data imported"); }