List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:edu.uci.ics.pregelix.example.util.TestCluster.java
License:Apache License
private void startHDFS() throws IOException { conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileSystem lfs = FileSystem.getLocal(new Configuration()); lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null); FileSystem dfs = FileSystem.get(conf); Path src = new Path(DATA_PATH); Path dest = new Path(HDFS_PATH); dfs.mkdirs(dest);/*from w w w .ja va 2 s . c o m*/ dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH2); dest = new Path(HDFS_PATH2); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH3); dest = new Path(HDFS_PATH3); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH4); dest = new Path(HDFS_PATH4); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH5); dest = new Path(HDFS_PATH5); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH))); conf.writeXml(confOutput); confOutput.flush(); confOutput.close(); }
From source file:edu.umn.cs.spatialHadoop.core.SpatialSite.java
License:Open Source License
/** * Ensures that the given class is in the class path of running jobs. * If the jar is not already in the class path, it is added to the * DisributedCache of the given job to ensure the associated job will work * fine./*from w w w . j a va 2 s.c o m*/ * @param conf * @param klass */ public static void addClassToPath(Configuration conf, Class<?> klass) { // Check if we need to add the containing jar to class path String klassJar = findContainingJar(klass); String shadoopJar = findContainingJar(SpatialSite.class); if (klassJar == null || (shadoopJar != null && klassJar.equals(shadoopJar))) return; Path containingJar = new Path(findContainingJar(klass)); Path[] existingClassPaths = DistributedCache.getArchiveClassPaths(conf); if (existingClassPaths != null) { for (Path existingClassPath : existingClassPaths) { if (containingJar.getName().equals(existingClassPath.getName())) return; } } // The containing jar is a new one and needs to be copied to class path try { LOG.info("Adding JAR '" + containingJar.getName() + "' to job class path"); FileSystem defaultFS = FileSystem.get(conf); Path libFolder; if (existingClassPaths != null && existingClassPaths.length > 0) { libFolder = existingClassPaths[0].getParent(); } else { // First jar to be added like this. Create a new lib folder do { libFolder = new Path("lib_" + (int) (Math.random() * 100000)); } while (defaultFS.exists(libFolder)); defaultFS.mkdirs(libFolder); defaultFS.deleteOnExit(libFolder); } defaultFS.copyFromLocalFile(containingJar, libFolder); Path jarFullPath = new Path(libFolder, containingJar.getName()).makeQualified(defaultFS); jarFullPath = jarFullPath.makeQualified(defaultFS); DistributedCache.addArchiveToClassPath(jarFullPath, conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java
License:Apache License
@Override public int run(String[] args) { try {/*from w w w . ja va 2 s.c o m*/ Configuration conf = HBaseConfiguration.create(); //additional output using TextOutputFormat. conf.set("file.names", args[3]); Job job = Job.getInstance(conf); //TableMapReduceUtil.addDependencyJars(job); job.setJarByClass(CompetencesDistanceDriver.class); //This row must be changed job.setJobName("Words Group By Title Driver"); Path inPath = new Path(args[0]); Path outPath = new Path(args[1]); Path competencesPath = new Path(args[2]); Path competencesPathHDFS = competencesPath; FileSystem fs = FileSystem.get(conf); if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { competencesPathHDFS = new Path(competencesPath.getName()); if (!fs.exists(competencesPathHDFS)) { fs.mkdirs(competencesPathHDFS); File[] stats = new File(competencesPath.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) { Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } } } job.addCacheFile(competencesPathHDFS.toUri()); FileInputFormat.setInputPaths(job, inPath); FileOutputFormat.setOutputPath(job, outPath); fs.delete(outPath, true); job.setMapperClass(CompetencesDistanceMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(CompetencesDistanceReducer.class); // job.setOutputFormatClass(TableOutputFormat.class); // job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); String[] fileNames = args[3].split(","); for (String n : fileNames) { MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class); } return (job.waitForCompletion(true) ? 0 : 1); } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException | ClassNotFoundException ex) { Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex); } return 0; }
From source file:eu.edisonproject.classification.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); Job job = Job.getInstance(jobconf);// www.ja v a2s . com FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path dictionary = new Path(args[0]); Path dictionaryHdfs = dictionary; Path localDocs = new Path(args[2]); Path hdfsDocs = localDocs; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { dictionaryHdfs = new Path(dictionary.getName()); if (!fs.exists(dictionaryHdfs)) { fs.copyFromLocalFile(dictionary, dictionaryHdfs); } hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); fs.deleteOnExit(hdfsDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, dictionaryHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dictionaryHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.classification.tfidf.mapreduce.WordFrequencyInDocDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // itemset = new LinkedList<String>(); // BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[2]))); // String line; // while ((line = br.readLine()) != null) { // String[] components = line.split("/"); // itemset.add(components[0]); // }/*ww w . j a v a2 s . com*/ Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(WordFrequencyInDocDriver.class); job.setJobName("Word Frequency In Doc Driver"); FileSystem fs = FileSystem.get(conf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in; Path dictionaryLocal = new Path(args[2]); Path dictionaryHDFS = dictionaryLocal; Path stopwordsLocal = new Path(args[3]); Path stopwordsHDFS = stopwordsLocal; if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); dictionaryHDFS = new Path(dictionaryLocal.getName()); if (!fs.exists(dictionaryHDFS)) { fs.copyFromLocalFile(dictionaryLocal, dictionaryHDFS); } stopwordsHDFS = new Path(stopwordsLocal.getName()); if (!fs.exists(stopwordsHDFS)) { fs.copyFromLocalFile(stopwordsLocal, stopwordsHDFS); } } FileStatus dictionaryStatus = fs.getFileStatus(dictionaryHDFS); dictionaryHDFS = dictionaryStatus.getPath(); job.addCacheFile(dictionaryHDFS.toUri()); FileStatus stopwordsStatus = fs.getFileStatus(stopwordsHDFS); stopwordsHDFS = stopwordsStatus.getPath(); job.addCacheFile(stopwordsHDFS.toUri()); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(WordFrequencyInDocMapper.class); AvroJob.setInputKeySchema(job, Document.getClassSchema()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(WordFrequencyInDocReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.edisonproject.training.tfidf.mapreduce.TermWordFrequency.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration jobconf = getConf(); FileSystem fs = FileSystem.get(jobconf); fs.delete(new Path(args[1]), true); Path in = new Path(args[0]); Path inHdfs = in;/*w ww . j a v a 2s . c om*/ if (!jobconf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) { inHdfs = new Path(in.getName()); fs.delete(inHdfs, true); fs.copyFromLocalFile(in, inHdfs); fs.deleteOnExit(inHdfs); FileStatus inHdfsStatus = fs.getFileStatus(inHdfs); // Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Copied: {0} to: {1}", new Object[]{in.toUri(), inHdfsStatus.getPath().toUri()}); } Job job = Job.getInstance(jobconf); Path stopwordsLocal = new Path(args[3]); stopwords = new Path(stopwordsLocal.getName()); fs.delete(stopwords, true); fs.copyFromLocalFile(stopwordsLocal, stopwords); fs.deleteOnExit(stopwords); FileStatus stopwordsStatus = fs.getFileStatus(stopwords); stopwords = stopwordsStatus.getPath(); job.addCacheFile(stopwords.toUri()); Path localDocs = new Path(args[2]); Path hdfsDocs = new Path(localDocs.getName()); fs.mkdirs(hdfsDocs); hdfsDocs = fs.getFileStatus(hdfsDocs).getPath(); fs.delete(hdfsDocs, true); // FileStatus[] stats = fs.listStatus(localDocs); File[] stats = new File(localDocs.toString()).listFiles(); for (File stat : stats) { // for (FileStatus stat : stats) { Path filePath = new Path(stat.getAbsolutePath()); if (FilenameUtils.getExtension(filePath.getName()).endsWith("txt")) { Path dest = new Path(hdfsDocs.toUri() + "/" + filePath.getName()); fs.copyFromLocalFile(filePath, dest); } } job.addCacheFile(hdfsDocs.toUri()); job.setJarByClass(TermWordFrequency.class); job.setJobName("Word Frequency Term Driver"); FileInputFormat.setInputPaths(job, inHdfs); FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, inHdfs); NLineInputFormat.setNumLinesPerSplit(job, Integer.valueOf(args[4])); NLineInputFormat.setMaxInputSplitSize(job, 500); Logger.getLogger(TermWordFrequency.class.getName()).log(Level.INFO, "Num. of lines: {0}", NLineInputFormat.getNumLinesPerSplit(job)); job.setMapperClass(TermWordFrequencyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Integer.class); job.setReducerClass(TermWordFrequencyReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:eu.larkc.iris.imports.Importer.java
License:Apache License
public void processNTriple(String inPath, String importName) throws IOException { //FIXME//ww w . j a v a2 s . co m //configuration.jobConf.set("mapreduce.input.fileinputformat.split.minsize", "268435456"); FileSystem fs = FileSystem.get(configuration.hadoopConfiguration); String fileName = configuration.project + "/" + DistributedFileSystemManager.TMP_FOLDER + "/" + inPath.substring(inPath.lastIndexOf("/") + 1); logger.info("copy start: " + inPath + " to hdfs : " + fileName); fs.copyFromLocalFile(new Path(inPath), new Path(fileName)); logger.info("copy done : " + inPath + " to hdfs : " + fileName); //Tap source = new Lfs(new TextLine(), inPath); Tap source = new Hfs(new TextLine(), fileName); SequenceFile sinkScheme = new SequenceFile(new Fields(0, 1, 2)); //sinkScheme.setNumSinkParts(1); String importPath = distributedFileSystemManager.getImportPath(importName); Tap sink = new Hfs(sinkScheme, importPath, true); int[] groups = { 2, 1, 3 }; RegexParser parser = new RegexParser(Fields.UNKNOWN, "^(<[^\\s]+>|_:node\\w+)\\s*(<[^\\s]+>|_:node\\w+)\\s*([<\"].*[^\\s]|_:node\\w+)\\s*.\\s*$", groups); //_ is for generated nodes like _:node15n67q1f2x14 Pipe sourcePipe = new Each("sourcePipe", new Fields("line"), parser); sourcePipe = new Each(sourcePipe, Fields.ALL, new TextImporterFunction()); Flow aFlow = new FlowConnector(configuration.flowProperties).connect(source, sink, sourcePipe); aFlow.complete(); fs.delete(new Path(fileName), false); if (configuration.doPredicateIndexing) { processIndexing(importName); //FileSystem fs = FileSystem.get(configuration.hadoopConfiguration); fs.delete(new Path(importPath), true); } }
From source file:eu.qualimaster.dataManagement.storage.hdfs.HdfsUtils.java
License:Apache License
/** * Stores the data file to the HDFS (alternative) using the Dfs path as prefix. * // w w w . ja va 2 s . c o m * @return the target path if successful, <b>null</b> else * @throws IOException in case that I/O fails */ public static String storeToHdfs(File dataFile) throws IOException { String dataPath = null; if (!DataManagementConfiguration.isEmpty(DataManagementConfiguration.getHdfsUrl())) { String basePath = DataManagementConfiguration.getDfsPath() + "/"; FileSystem fs = HdfsUtils.getFilesystem(); Path target = new Path(basePath, dataFile.getName()); fs.copyFromLocalFile(new Path(dataFile.getAbsolutePath()), target); dataPath = target.toString(); } return dataPath; }
From source file:eu.qualimaster.dataManagement.storage.hdfs.HdfsUtils.java
License:Apache License
/** * Copies <code>source</code> to <code>fs</code> and <code>basePath</code>. * // ww w.j a v a 2 s . c om * @param fs the file system * @param basePath the actual base path * @param source the source file/directory * @throws IOException in case that copying fails */ private static void copy(FileSystem fs, String basePath, File source) throws IOException { if (source.isDirectory()) { String bp = basePath + "/" + source.getName(); fs.create(new Path(bp)); File[] files = source.listFiles(); if (null != files) { for (File f : files) { copy(fs, bp, f); } } } else { Path target = new Path(basePath, source.getName()); fs.copyFromLocalFile(new Path(source.getAbsolutePath()), target); } }
From source file:eu.stratosphere.yarn.Utils.java
License:Apache License
/** * //ww w. j a v a 2 s . co m * @return Path to remote file (usually hdfs) * @throws IOException */ public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir) throws IOException { // copy to HDFS String suffix = ".stratosphere/" + appId + "/" + localRsrcPath.getName(); Path dst = new Path(homedir, suffix); LOG.info("Copying from " + localRsrcPath + " to " + dst); fs.copyFromLocalFile(localRsrcPath, dst); registerLocalResource(fs, dst, appMasterJar); return dst; }