List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.google.mr4c.hadoop.HadoopTestUtils.java
License:Open Source License
private static void startDFSCluster() throws IOException { // wipe out the space if it exists FileSystem localFS = FileSystem.get(new Configuration()); Path dfsRoot = new Path(DFS_DIR); if (localFS.exists(dfsRoot)) { if (localFS.exists(dfsRoot) && !localFS.delete(dfsRoot, true)) { throw new IOException(String.format("Couldn't delete DFS root [%s]", dfsRoot)); }/* ww w. j av a2 s. c o m*/ } // create the name and data directories File nameDir = createDFSDir("name"); File dataDir = createDFSDir("data"); File tmpDir = createDFSDir("tmp"); Configuration conf = new Configuration(); conf.set("dfs.name.dir", nameDir.getAbsolutePath()); conf.set("dfs.data.dir", dataDir.getAbsolutePath()); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); // Namespace ID is stored in the "tmp" directory. // If we don't get it inside our DFS, we'll get an error on restart when the id doesn't match. // See //www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-multi-node-cluster/#caveats if (System.getProperty("hadoop.log.dir") == null) { System.setProperty("hadoop.log.dir", "/tmp/mr4c/hadoop/log"); } MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); builder.manageNameDfsDirs(false); // we created the name node directory builder.manageDataDfsDirs(false); // we created the data node directory builder.startupOption(StartupOption.FORMAT); // might not matter, just in case // take defaults on everything else s_dfsCluster = builder.build(); }
From source file:com.hadoop.compression.lzo.LzoIndexer.java
License:Open Source License
/** * Lzo index a given path, calling recursively to index directories when encountered. * Files are only indexed if they end in .lzo and have no existing .lzo.index file. * /* ww w . java2 s. c o m*/ * @param lzoPath The base path to index. * @param nestingLevel For pretty printing, the nesting level. * @throws IOException */ private void indexInternal(Path lzoPath, int nestingLevel) throws IOException { FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_); FileStatus fileStatus = fs.getFileStatus(lzoPath); // Recursively walk if (fileStatus.isDir()) { LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "..."); FileStatus[] statuses = fs.listStatus(lzoPath); for (FileStatus childStatus : statuses) { indexInternal(childStatus.getPath(), nestingLevel + 1); } } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) { Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (fs.exists(lzoIndexPath)) { LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n"); } else { long startTime = System.currentTimeMillis(); long fileSize = fileStatus.getLen(); LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size " + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB..."); if (indexSingleFile(fs, lzoPath)) { long indexSize = fs.getFileStatus(lzoIndexPath).getLen(); double elapsed = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed) + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed)) + " MB/s). Index size is " + df_.format(indexSize / 1024.0) + " KB.\n"); } } } }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
/** * Read the index of the lzo file./*from w w w. j av a 2 s . c om*/ * * @param split * Read the index of this file. * @param fs * The index file is on this file system. * @throws IOException */ private LzoIndex readIndex(Path file, FileSystem fs) throws IOException { FSDataInputStream indexIn = null; try { Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX); if (!fs.exists(indexFile)) { // return empty index, fall back to the unsplittable mode return new LzoIndex(); } long indexLen = fs.getFileStatus(indexFile).getLen(); int blocks = (int) (indexLen / 8); LzoIndex index = new LzoIndex(blocks); indexIn = fs.open(indexFile); for (int i = 0; i < blocks; i++) { index.set(i, indexIn.readLong()); } return index; } finally { if (indexIn != null) { indexIn.close(); } } }
From source file:com.hadoop.secondarysort.SecondarySortDESC.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: secondarysrot <in> <out>"); // System.exit(2); // }//from w w w .j a v a 2s . c o m // JobConf jobConf = new JobConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySortDESC.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // job.setSortComparatorClass(SecondGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); FileSystem fileSystem = FileSystem.get(conf); if (fileSystem.exists(new Path(outPath))) { fileSystem.delete(new Path(outPath)); } System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hdfs.concat.clean.TestClean.java
License:Apache License
@Test public void testAge() throws Exception { Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path(ROOT_DIR, "a")); fs.mkdirs(new Path(new Path(ROOT_DIR, "a"), "1")); fs.mkdirs(new Path(ROOT_DIR, "b")); fs.mkdirs(new Path(ROOT_DIR, "c")); fs.mkdirs(new Path(new Path(ROOT_DIR, "c"), "2")); Path oldFile = new Path(new Path(new Path(ROOT_DIR, "a"), "1"), "oldfile"); FSDataOutputStream out = fs.create(oldFile); out.write("bla".getBytes()); out.close();//from ww w . j a v a 2 s .c o m Path cFile = new Path(new Path(new Path(ROOT_DIR, "c"), "1"), "cfile"); FSDataOutputStream out2 = fs.create(cFile); out2.write("wah".getBytes()); out2.close(); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanWarn = new Clean(); Configuration warnConf = createJobConf(); warnConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); warnConf.set(Clean.TARGET_EXPR, "cfile"); warnConf.set(Clean.WARN_MODE, "true"); ToolRunner.run(warnConf, cleanWarn, new String[] {}); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanReg = new Clean(); Configuration regConf = createJobConf(); regConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); regConf.set(Clean.TARGET_EXPR, "cfile"); ToolRunner.run(regConf, cleanReg, new String[] {}); assertEquals(false, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean clean = new Clean(); Configuration cleanConf = createJobConf(); cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000); cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf, clean, new String[] {}); assertEquals(true, fs.exists(oldFile)); Thread.sleep(3); Clean clean2 = new Clean(); Configuration cleanConf2 = createJobConf(); cleanConf2.setLong(Clean.CUTTOFF_MILLIS, 1); cleanConf2.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf2, clean2, new String[] {}); assertEquals(false, fs.exists(oldFile)); }
From source file:com.hotels.plunger.TapDataWriter.java
License:Apache License
private void deleteTemporaryPath(Path outputPath, FileSystem fileSystem) throws IOException { if (fileSystem.exists(outputPath)) { Path tmpDir = new Path(outputPath, Hadoop18TapUtil.TEMPORARY_PATH); if (fileSystem.exists(tmpDir)) { fileSystem.delete(tmpDir, true); }// w w w. j av a 2 s .com } }
From source file:com.hp.hpl.jena.sparql.algebra.MyOpVisitor.java
License:Open Source License
public void execute() { Configuration conf = new Configuration(); FileSystem fs = null; try {//from w ww.j ava 2 s . com fs = FileSystem.get(conf); Path out = new Path("output"); if (!fs.exists(out)) { fs.delete(out, true); fs.mkdirs(out); } } catch (IOException e) { e.printStackTrace(); } Triple[] Q = new Triple[0]; Q = opBGP.getPattern().getList().toArray(Q); Set<Var> vars = PatternVars.vars(query.getQueryPattern()); JoinPlaner.setid(id); JoinPlaner.newVaRS(vars); try { JoinPlaner.form(Q); JoinPlaner.removeNonJoiningVaribles(Q); int i = 0; while (!JoinPlaner.isEmpty()) { String v = JoinPlaner.getNextJoin(); System.out.println(v); i++; } if (i == 0) { Path outFile = new Path("output/Join_" + id + "_" + 0); OutputBuffer out = new OutputBuffer(outFile, fs); //if (fs.exists(outFile)) { // fs.delete(outFile,true); //} //fs.create(outFile); QueryProcessor.executeSelect(Q[0], out, "P0"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java
License:Apache License
@Override public void open(String filePath) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem fileSystem = dstPath.getFileSystem(conf); //2.2Hadoop, dfs.append.support ??? if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) { outStream = fileSystem.append(dstPath); } else {/*from ww w .j av a2s . co m*/ outStream = fileSystem.create(dstPath); } writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream), SequenceFile.Writer.keyClass(serializer.getKeyClass()), SequenceFile.Writer.valueClass(serializer.getValueClass())); registerCurrentStream(outStream, fileSystem, dstPath); }
From source file:com.ibm.bi.dml.hops.recompile.Recompiler.java
License:Open Source License
/** * /*from www .j ava2 s. co m*/ * @param fname * @return * @throws DMLRuntimeException */ private static void tryReadMetaDataFileMatrixCharacteristics(DataOp dop) throws DMLRuntimeException { try { //get meta data filename String mtdname = DataExpression.getMTDFileName(dop.getFileName()); JobConf job = ConfigurationManager.getCachedJobConf(); FileSystem fs = FileSystem.get(job); Path path = new Path(mtdname); if (fs.exists(path)) { BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(path))); JSONObject mtd = JSONHelper.parse(br); DataType dt = DataType .valueOf(String.valueOf(mtd.get(DataExpression.DATATYPEPARAM)).toUpperCase()); dop.setDataType(dt); dop.setValueType(ValueType .valueOf(String.valueOf(mtd.get(DataExpression.VALUETYPEPARAM)).toUpperCase())); dop.setDim1((dt == DataType.MATRIX) ? Long.parseLong(mtd.get(DataExpression.READROWPARAM).toString()) : 0); dop.setDim2((dt == DataType.MATRIX) ? Long.parseLong(mtd.get(DataExpression.READCOLPARAM).toString()) : 0); } finally { if (br != null) br.close(); } } } catch (Exception ex) { throw new DMLRuntimeException(ex); } }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
/** * //from w w w . j a va 2s . co m * @param filename * @return * @throws LanguageException */ public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException { JSONObject retVal = null; boolean exists = false; FileSystem fs = null; try { fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); } catch (Exception e) { raiseValidateError("could not read the configuration file: " + e.getMessage(), false); } Path pt = new Path(filename); try { if (fs.exists(pt)) { exists = true; } } catch (Exception e) { exists = false; } boolean isDirBoolean = false; try { if (exists && fs.getFileStatus(pt).isDirectory()) isDirBoolean = true; else isDirBoolean = false; } catch (Exception e) { raiseValidateError( "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(), conditional); } // CASE: filename is a directory -- process as a directory if (exists && isDirBoolean) { // read directory contents retVal = new JSONObject(); FileStatus[] stats = null; try { stats = fs.listStatus(pt); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading directory with MTD file " + pt.toString() + ": " + e.getMessage(), conditional); } for (FileStatus stat : stats) { Path childPath = stat.getPath(); // gives directory name if (childPath.getName().startsWith("part")) { BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(childPath))); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } JSONObject childObj = null; try { childObj = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("for MTD file in directory, error parsing part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } for (Object obj : childObj.entrySet()) { @SuppressWarnings("unchecked") Entry<Object, Object> e = (Entry<Object, Object>) obj; Object key = e.getKey(); Object val = e.getValue(); retVal.put(key, val); } } } // end for } // CASE: filename points to a file else if (exists) { BufferedReader br = null; // try reading MTD file try { br = new BufferedReader(new InputStreamReader(fs.open(pt))); } catch (Exception e) { raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } // try parsing MTD file try { retVal = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } } return retVal; }