List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java
/** * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4 * * @param queensSize//from ww w . j a va 2 s.co m * @throws IOException */ private void setWorkingFolder(int queensSize, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) { System.exit(0); // ja foi processado anteriormente nao processa de novo } String lastSolution = null; Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); while (dirsFound.hasNext()) { LocatedFileStatus path = dirsFound.next(); if (lastSolution == null) { lastSolution = path.getPath().getName(); inputPath = path.getPath(); } else { String currentDir = path.getPath().getName(); if (lastSolution.compareToIgnoreCase(currentDir) < 0) { lastSolution = currentDir; inputPath = path.getPath(); } } } } int currentSolutionSet = 0; if (inputPath == null) { inputPath = new Path("/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + "#"); seedFile.close(); } } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (lastSolution != null) { String[] solution = lastSolution.split("-"); if (solution[0].equalsIgnoreCase("solution_" + queensSize)) { currentSolutionSet = Integer.parseInt(solution[1]) + 4; if (currentSolutionSet >= queensSize) { outputPath = new Path("/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-" + currentSolutionSet); } } } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4"); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); }
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;//from w w w.ja va 2 s. co m if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:br.ufpr.inf.hpath.HPath.java
License:Apache License
/** * Execute the XPath query as a Hadoop job * @param xpath_query XPath query submitted by the user via cli. * @param inputFile XML file which has all data. * @param outputFile Query's result is stored in this file. * @throws Exception//from ww w. j a va 2 s. co m */ public static void main(String[] args) throws Exception { if (args.length < 1) { System.out.println("USAGE: hpath [xpath_query] [input_file] [<output_dir>]"); System.exit(-1); } System.out.println("***************"); System.out.println(" Query -> " + args[2]); System.out.println(" Input -> " + args[0]); System.out.println(" Output -> " + args[1]); System.out.println("***************"); String xpath_query = args[2]; String inputFile = args[0]; String outputFile = args[1]; String tag = ""; // tag = getFisrtQueryTag(xpath_query); tag = getLastQueryTag(xpath_query); Configuration conf = new Configuration(); conf.set("xmlinput.start", "<" + tag); conf.set("xmlinput.end", "</" + tag + ">"); conf.set("xpath.query", xpath_query); @SuppressWarnings("deprecation") Job job = new Job(conf, "HPath"); FileSystem fs = FileSystem.get(conf); Path inFile = new Path(inputFile); Path outFile = new Path(outputFile); if (!fs.exists(inFile)) { System.out.println("error: Input file not found."); System.exit(-1); } if (!fs.isFile(inFile)) { System.out.println("error: Input should be a file."); System.exit(-1); } if (fs.exists(outFile)) { System.out.println("error: Output already exists."); System.exit(-1); } job.setJarByClass(HPath.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(XmlItemInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, inFile); FileOutputFormat.setOutputPath(job, outFile); job.waitForCompletion(true); }
From source file:br.ufrj.nce.recureco.distributedindex.search.controller.DocumentViewerServlet.java
License:Open Source License
protected void doGet(javax.servlet.http.HttpServletRequest request, javax.servlet.http.HttpServletResponse response) throws javax.servlet.ServletException, IOException { String doc = request.getParameter("doc"); if (doc != null && doc.trim().length() > 0) { try {/*from w ww.ja v a2s.c o m*/ String filePath = DIR_DOWNLOAD + doc; Configuration conf = new Configuration(); conf.addResource(new Path(DIR_HADOOP_CONF + "core-site.xml")); conf.addResource(new Path(DIR_HADOOP_CONF + "hdfs-site.xml")); conf.addResource(new Path(DIR_HADOOP_CONF + "mapred-site.xml")); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(filePath); if (!fileSystem.exists(path)) { response.getWriter().print("File not found."); return; } FSDataInputStream in = fileSystem.open(path); response.setContentType("text/plain"); int read = 0; byte[] bytes = new byte[BYTES_DOWNLOAD]; OutputStream os = response.getOutputStream(); while ((read = in.read(bytes)) != -1) { os.write(bytes, 0, read); } os.flush(); os.close(); } catch (FileNotFoundException e) { response.getWriter().print("File not found."); } } else { //print invalid document response.getWriter().print("File not informed."); } }
From source file:bulkload.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from www. j av a2 s.co m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } TableName tableName = TableName.valueOf(args[0]); if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(TsvImporter.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TsvImporter.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { try (HTable table = (HTable) connection.getTable(tableName)) { Path outputDir = new Path(hfileOutPath); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputDir)) { if (!fs.delete(outputDir, true)) { throw new IllegalStateException("delete path:" + outputDir + " failed"); } } FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setReducerClass(PutSortReducer.class); HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); // TableMapReduceUtil.addDependencyJars(job); // TableMapReduceUtil.addDependencyJars(job.getConfiguration(), // com.google.common.base.Function.class /* Guava used by TsvParser */); } // Workaround to remove unnecessary hadoop dependencies String[] jars = job.getConfiguration().get("tmpjars").split(",", -1); StringBuilder filteredJars = new StringBuilder(); for (String j : jars) { String[] parts = j.split("/", -1); String fileName = parts[parts.length - 1]; if (fileName.indexOf("hadoop-") != 0) { filteredJars.append(j); filteredJars.append(","); } } job.getConfiguration().set("tmpjars", filteredJars.toString()); } } return job; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];/* www.j ava 2 s . c o m*/ Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.uwaterloo.iss4e.spark.pointperrow.CosineMain.java
License:Open Source License
public void fetch(JavaSparkContext sc, String source) { try {//from w w w.j a v a 2 s . c o m FileSystem fs = FileSystem.get(new URI(source), new Configuration()); Path src = new Path(source); if (fs.exists(src)) { FileStatus[] lists = fs.listStatus(src); readFiles(sc, fs, lists); } } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
private static Map<Path, Path> getCopyPaths(Configuration config, Map<Path, Path> commonPaths) { Map<Path, Path> copyPaths = new HashMap<Path, Path>(); FileSystem remoteFS = getDefaultFS(config); FileSystem localFS = getLocalFS(config); for (Map.Entry<Path, Path> entry : commonPaths.entrySet()) { Path localPath = entry.getKey(); Path remotePath = entry.getValue(); try {// ww w.ja va 2 s. c o m boolean localExists = localFS.exists(localPath); boolean remoteExist = remoteFS.exists(remotePath); if (localExists && !remoteExist) { copyPaths.put(localPath, remotePath); } else if (localExists) { long localModTime = localFS.getFileStatus(localPath).getModificationTime(); long remoteModTime = remoteFS.getFileStatus(remotePath).getModificationTime(); if (localModTime > remoteModTime) copyPaths.put(localPath, remotePath); } } catch (IOException exception) { throw new FlowException("unable to get handle to underlying filesystem", exception); } } return copyPaths; }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
private static boolean exists(FileSystem fileSystem, Path path) { try {/*from ww w. ja va2 s.c om*/ return fileSystem.exists(path); } catch (IOException exception) { throw new FlowException("could not test file exists: " + path); } }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
public static boolean needsTaskCommit(JobConf conf) throws IOException { String workpath = conf.get("mapred.work.output.dir"); if (workpath == null) return false; Path taskOutputPath = new Path(workpath); if (taskOutputPath != null) { FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return false; if (fs.exists(taskOutputPath)) return true; }//from w w w. ja v a 2 s .c om return false; }