List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtil.java
License:Apache License
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value appropriately * to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If source * is a folder its contents will be copied into dest. For more info see * {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. *///from ww w. j ava 2s .co m public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtil.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); Path local = new Path(source.getURL().getPath()); fs.copyFromLocalFile(local, dest); fs.setPermission(dest, CACHED_FILE_PERMISSION); fs.setReplication(dest, replication); }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java
License:Apache License
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging * the archive.//from www . j a v a 2s . c om * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into * dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If * source is a folder its contents will be copied into dest. For more info see {@link * FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic) throws IOException, KettleFileException { if (!source.exists()) { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source)); } if (fs.exists(dest)) { if (overwrite) { // It is a directory, clear it out fs.delete(dest, true); } else { throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath())); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10); if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) { copyConfigProperties(source, fs, dest); } else { Path local = new Path(source.getURL().getPath()); fs.copyFromLocalFile(local, dest); } if (isPublic) { fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION); } else { fs.setPermission(dest, CACHED_FILE_PERMISSION); } fs.setReplication(dest, replication); }
From source file:org.springframework.data.hadoop.mapreduce.StreamingTest.java
License:Apache License
private void cleanOutput(ApplicationContext ctx) throws Exception { FileSystem fs = FileSystem.get(ctx.getBean(Configuration.class)); fs.copyFromLocalFile(new Path("../build.gradle"), new Path("test/")); fs.delete(new Path("output"), true); }
From source file:org.starschema.hadoop.yarn.applications.distributedshell.Client.java
License:Apache License
private void addToLocalResourcesCompressed(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {/*ww w . j a va 2 s .com*/ ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
protected static void saveClassPathToJob(JobConf jobConf) throws IOException { logger.info("Copying classpath to job"); if (jobConf.getBoolean("terrier.classpath.copied", false)) { return;//from www.j a v a2s .com } jobConf.setBoolean("terrier.classpath.copied", true); final String[] jars = findJarFiles( new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") }); final FileSystem defFS = FileSystem.get(jobConf); for (String jarFile : jars) { //logger.debug("Adding " + jarFile + " to job class path"); Path srcJarFilePath = new Path("file:///" + jarFile); String filename = srcJarFilePath.getName(); Path tmpJarFilePath = makeTemporaryFile(jobConf, filename); defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath); DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf); } DistributedCache.createSymlink(jobConf); }
From source file:org.trommel.trommel.mapreduce.TrommelDriver.java
License:Apache License
private static int processScript(Level logLevel, int numOfReducers, String trommelScriptFilePath) throws Exception { int exitCode = 0; FrontEndInterpreter frontEndInterpreter = null; FileSystem fileSystem = null; Path cachedScript = null;//from w ww. j a va 2 s. c om logger.setLevel(logLevel); try { logger.info(String.format("Loading and parsing TommelScript file %1$s ...", trommelScriptFilePath)); Lexer lexer = new Lexer( new PushbackReader(new BufferedReader(new FileReader(trommelScriptFilePath)), 4096)); Parser parser = new Parser(lexer); Start ast = parser.parse(); ValidationInterpreter validationInterpreter = new ValidationInterpreter(); logger.info(String.format("Validating TommelScript...", trommelScriptFilePath)); ast.apply(validationInterpreter); if (validationInterpreter.getSemanticErrors().size() != 0) { // Validation of script failed logger.info(String.format("TommelScript failed validation with following errors:", trommelScriptFilePath)); for (String errorMessage : validationInterpreter.getSemanticErrors()) { logger.info(errorMessage); } return exitCode; } logger.info(String.format("Interpreting TommelScript...", trommelScriptFilePath)); frontEndInterpreter = new FrontEndInterpreter(logger, DEFAULT_HDFS_PATH); ast.apply(frontEndInterpreter); logger.debug("Creating Job object"); Job job = new Job(); job.setJarByClass(TrommelDriver.class); // Copy TrommelScript file from local file system to HDFS and added to distributed cache fileSystem = FileSystem.get(job.getConfiguration()); Path src = new Path(trommelScriptFilePath); cachedScript = new Path(String.format("/tmp/%1$s_%2$s", src.getName(), UUID.randomUUID().toString())); fileSystem.copyFromLocalFile(src, cachedScript); logger.debug(String.format("Moved TrommelScript file to HDFS as %1$s.", cachedScript.toString())); logger.debug("Adding TrommelScript file to DistibutedCachce."); DistributedCache.addCacheFile(new URI(cachedScript.toString()), job.getConfiguration()); logger.debug(String.format("Setting LOGGING_LEVEL_CONFIG_PROP to %1$s", logLevel.toString())); job.getConfiguration().set(LOGGING_LEVEL_CONFIG_PROP, logLevel.toString()); // Specify HDFS input/output locations logger.debug(String.format("Calling FileInputFormat.addInputPath() with %1$s.", frontEndInterpreter.getHdfsInputFilePath())); FileInputFormat.addInputPath(job, new Path(frontEndInterpreter.getHdfsInputFilePath())); logger.debug(String.format("Calling FileOutputFormat.setOutputPath() with %1$s.", frontEndInterpreter.getHdfsOutputFilePath())); FileOutputFormat.setOutputPath(job, new Path(frontEndInterpreter.getHdfsOutputFilePath())); // Hadoop setup job.setMapperClass(TrommelMapper.class); if (frontEndInterpreter.samplingData()) { logger.debug("Trommel is sampling data, 0 Reducers set."); job.setNumReduceTasks(0); } else { logger.debug(String.format("Setting number of Reducers to %1$s.", numOfReducers)); job.setReducerClass(TrommelReducer.class); job.setNumReduceTasks(numOfReducers); } job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); logger.debug("Running job"); if (!job.waitForCompletion(true)) { exitCode = 1; } else if (frontEndInterpreter.getLocalFilePath() != null) { // User would like data exported to local file system logger.debug(String.format("Exporting Trommel output from %1$s to %2$s.", frontEndInterpreter.getHdfsOutputFilePath(), frontEndInterpreter.getLocalFilePath())); Path mergeFilePath = new Path(String.format("/tmp/%1$s", UUID.randomUUID())); FSDataOutputStream mergeFileStream = fileSystem.create(mergeFilePath); Path localFilePath = new Path(frontEndInterpreter.getLocalFilePath()); FileStatus[] outputFileStatuses = fileSystem .listStatus(new Path(frontEndInterpreter.getHdfsOutputFilePath())); FSDataInputStream outputFileStream = null; String fileNameFilter = (frontEndInterpreter.samplingData() ? "part-m" : "part-r"); try { // Loop through the output, merging any reducer output file for export to local file system for (FileStatus outputFileStatus : outputFileStatuses) { if (!outputFileStatus.isDir() && outputFileStatus.getPath().getName().contains(fileNameFilter)) { logger.debug(String.format("Merging file %1$s into local file system output.", outputFileStatus.getPath().toString())); outputFileStream = fileSystem.open(outputFileStatus.getPath()); byte[] buffer = new byte[(int) outputFileStatus.getLen()]; outputFileStream.read(buffer); mergeFileStream.write(buffer); outputFileStream.close(); } } } finally { if (mergeFileStream != null) { mergeFileStream.close(); fileSystem.copyToLocalFile(mergeFilePath, localFilePath); fileSystem.delete(mergeFilePath, true); } } } } finally { try { if (fileSystem != null) { // Clean up any temp files if needed if (frontEndInterpreter.getHdfsOutputFilePath().equals(DEFAULT_HDFS_PATH)) { logger.debug(String.format("Deleting temp files from /tmp/Trommel")); fileSystem.delete(new Path(DEFAULT_HDFS_PATH), true); } // Clean up the cached file logger.debug(String.format("Deleting cached TrommelScript file %1$s", cachedScript.toString())); fileSystem.delete(cachedScript, true); } } catch (IOException ioe) { // Couldn't delete file for some reason, alert user logger.error(String.format( "Exception encountered deleting cached TommelScript file %1$s. Error message: %2$s", cachedScript.toString(), ioe.getMessage())); } } return exitCode; }
From source file:org.unigram.likelike.lsh.LSHRecommendations.java
License:Apache License
/** * Save keys.//from www.j av a2 s .co m * @param keys hash keys * @param inputFile input file * @param conf configuration * @throws IOException - */ private void saveKeys(final String keys, final String inputFile, final Configuration conf) throws IOException { /* save to local fs */ String tempKeyFile = new String("keys.tmp"); try { FileOutputStream fos = new FileOutputStream(tempKeyFile); OutputStreamWriter osw = new OutputStreamWriter(fos, "UTF-8"); BufferedWriter bw = new BufferedWriter(osw); bw.write(keys + "\n"); bw.close(); osw.close(); fos.close(); } catch (Exception e) { e.printStackTrace(); } /* put local file to hdfs */ FileSystem fs = FileSystem.get(conf); Path localKeyFilePath = new Path(tempKeyFile); Path hdfsKeyFilePath = new Path(inputFile + ".keys"); fs.copyFromLocalFile(localKeyFilePath, hdfsKeyFilePath); /* remove local file*/ fs.delete(localKeyFilePath, true); return; }
From source file:org.warcbase.index.IndexerRunner.java
License:Apache License
private void cacheSolrHome(JobConf conf, String solrHomeZipName) throws IOException { File tmpSolrHomeDir = new File("src/main/solr").getAbsoluteFile(); // Create a ZIP file. File solrHomeLocalZip = File.createTempFile("tmp-", solrHomeZipName); Zipper.zipDir(tmpSolrHomeDir, solrHomeLocalZip); // Add to HDFS. FileSystem fs = FileSystem.get(conf); String hdfsSolrHomeDir = fs.getHomeDirectory() + "/solr/tempHome/" + solrHomeZipName; fs.copyFromLocalFile(new Path(solrHomeLocalZip.toString()), new Path(hdfsSolrHomeDir)); final URI baseZipUrl = fs.getUri().resolve(hdfsSolrHomeDir + '#' + solrHomeZipName); // Cache it.//from w w w . j a v a 2 s .c o m DistributedCache.addCacheArchive(baseZipUrl, conf); }
From source file:oz.hadoop.yarn.api.core.ApplicationMasterLauncherImpl.java
License:Apache License
/** * *///from w w w . j a va2 s .c o m private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId, Map<String, LocalResource> localResources) { String suffix = this.applicationName + "_master/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); try { Path sourcePath = new Path(fileSrcPath); if (logger.isDebugEnabled()) { logger.debug("Copying '" + sourcePath + "' to " + dst); } fs.copyFromLocalFile(sourcePath, dst); FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); } catch (Exception e) { throw new IllegalStateException("Failed to communicate with FileSystem: " + fs, e); } }
From source file:pl.edu.icm.coansys.heeut.TestMapReduce.java
License:Apache License
@Test(timeout = 1800000) public void testWordCount() throws Exception { String prefix = getCurrentDateAppended("wordcount"); String inputDirName = prefix + "-input"; String outputDirName = prefix + "-output"; FileSystem dfs = UTIL.getDFSCluster().getFileSystem(); Path inputDir = new Path(inputDirName); Path qualifiedInputDir = dfs.makeQualified(inputDir); dfs.copyFromLocalFile(new Path("src/test/resource/input/wordcount/apache_projects.dat"), qualifiedInputDir); ToolRunner.run(UTIL.getConfiguration(), new WordCount(), new String[] { inputDirName, outputDirName }); InputStream contentStream = dfs.open(new Path(outputDirName + "/part-00000")); BufferedReader contentReader = new BufferedReader(new InputStreamReader(contentStream)); Assert.assertEquals("Apache\t3", contentReader.readLine()); Assert.assertEquals("HBase\t1", contentReader.readLine()); Assert.assertEquals("Hadoop\t1", contentReader.readLine()); Assert.assertEquals("Pig\t1", contentReader.readLine()); Assert.assertNull(contentReader.readLine()); contentReader.close();/* w ww. j ava 2 s . c o m*/ }