List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.ibm.bi.dml.yarn.DMLYarnClient.java
License:Open Source License
/** * /* w w w .j a va 2s .c o m*/ * @param conf * @param yconf * @param appId * @return */ private String readMessageToHDFSWorkingDir(DMLConfig conf, YarnConfiguration yconf, ApplicationId appId) { String ret = null; //construct working directory (consistent with client) String hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(conf, appId); Path msgPath = new Path(hdfsWD, DMLYarnClient.DML_STOPMSG_NAME); //write given message to hdfs try { FileSystem fs = FileSystem.get(yconf); if (fs.exists(msgPath)) { FSDataInputStream fin = fs.open(msgPath); BufferedReader br = new BufferedReader(new InputStreamReader(fin)); ret = br.readLine(); fin.close(); LOG.debug("Stop message read from HDFS file " + msgPath + ": " + ret); } } catch (Exception ex) { LOG.error("Failed to read stop message from HDFS file: " + msgPath, ex); } return ret; }
From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem ignored, JobConf conf) throws IOException { for (int i = 0; i < outputs.length; i++) { outputs[i].checkOutputSpecs(ignored, subconfs[i]); // HACK: Hadoop 0.18 has hacks that specialize FileOutputFormat handling. In particular, // the temporary directory is created by the Task or LocalJobRunner; they also promote // the temporary files to the parent upon completion. We create the temporary file here, // if it doesn't already exist. On Path outputPath = FileOutputFormat.getOutputPath(subconfs[i]); if (outputPath != null) { final String TEMP_DIR_NAME = "_temporary"; // MRConstants isn't public... Path jobTmpDir = new Path(outputPath, TEMP_DIR_NAME); // MRConstants.TEMP_DIR_NAME FileSystem fs = jobTmpDir.getFileSystem(subconfs[i]); if (!fs.exists(jobTmpDir)) { fs.mkdirs(jobTmpDir);/* w ww .ja va 2s.c om*/ } } } }
From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java
License:Apache License
public void setSequential(JobConf conf) throws Exception { registerSerializers(conf);//from www.j a va 2 s .c o m // For an expression, the location is the final file name Path outPath = new Path(location); FileSystem fs = outPath.getFileSystem(conf); outPath = outPath.makeQualified(fs); if (fs.exists(outPath)) { // TODO: Jaql currently has overwrite semantics; add flag to control this if (fs.isFile(outPath)) { fs.delete(outPath, false); } else { // Look for a map-reduce output directory FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() { boolean onlyOne = true; public boolean accept(Path path) { String name = path.getName(); if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) { return false; } if (onlyOne) { onlyOne = false; return true; } return false; } }); if (nonMR.length > 0) { throw new IOException( "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath()); } fs.delete(outPath, true); } } // In sequential mode, we will write directly to the output file // and bypass the _temporary directory and rename of the standard // FileOutputCommitter by using our own DirectFileOutputCommitter. FileOutputFormat.setOutputPath(conf, outPath.getParent()); conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class); }
From source file:com.ibm.jaql.lang.expr.system.RUtil.java
License:Apache License
/** * Function that puts a local file into HDFS. * @param localPath/*from ww w .ja v a 2 s. c o m*/ * @param hdfsPath * @return */ public static boolean saveToHDFS(String localPath, String hdfsPath) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int bufferSize = 4 * 1024; byte[] buffer = new byte[bufferSize]; InputStream input = new BufferedInputStream(new FileInputStream(localPath), bufferSize); Path outputPath = new Path(hdfsPath); if (fs.exists(outputPath)) { if (!fs.isFile(outputPath)) { throw new IOException("Output path is a directory that already exists."); } LOG.info("Output path" + outputPath + " already exists. Overwriting it."); } FSDataOutputStream output = fs.create(outputPath, true); int numBytesRead; while ((numBytesRead = input.read(buffer)) > 0) { output.write(buffer, 0, numBytesRead); } input.close(); output.close(); return true; } catch (IOException e) { LOG.info("Error in writing file to HDFS.", e); return false; } }
From source file:com.ibm.jaql.UtilForTest.java
License:Apache License
/** * @param dir// w ww.jav a 2 s. co m * @throws IOException */ public static void cleanUpHDFS(String dir) throws IOException { if ("true".equals(System.getProperty("test.cleanup"))) { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path p = new Path(dir); if (fs.exists(p)) { fs.delete(p, true); } } }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
/** * Assert that a path exists -but make no assertions as to the * type of that entry//w w w . j a v a 2s . c o m * * @param fileSystem filesystem to examine * @param message message to include in the assertion failure message * @param path path in the filesystem * @throws IOException IO problems */ public static void assertPathExists(FileSystem fileSystem, String message, Path path) throws IOException { if (!fileSystem.exists(path)) { //failure, report it fail(message + ": not found " + path + " in " + path.getParent()); } }
From source file:com.idvp.platform.hdfs.BucketWriter.java
License:Apache License
/** * Rename bucketPath file from .tmp to permanent location. */// www . ja v a 2 s . c om // When this bucket writer is rolled based on rollCount or // rollSize, the same instance is reused for the new file. But if // the previous file was not closed/renamed, // the bucket writer fields no longer point to it and hence need // to be passed in from the thread attempting to close it. Even // when the bucket writer is closed due to close timeout, // this method can get called from the scheduled thread so the // file gets closed later - so an implicit reference to this // bucket writer would still be alive in the Callable instance. private void renameBucket(String bucketPath, String targetPath, final FileSystem fs) throws IOException, InterruptedException { if (bucketPath.equals(targetPath)) { return; } final Path srcPath = new Path(bucketPath); final Path dstPath = new Path(targetPath); callWithTimeout((CallRunner<Void>) () -> { if (fs.exists(srcPath)) { // could block LOG.info("Renaming " + srcPath + " to " + dstPath); renameTries.incrementAndGet(); fs.rename(srcPath, dstPath); // could block } return null; }); }
From source file:com.iflytek.spider.crawl.CrawlDb.java
License:Apache License
public void update(Path crawlDb, Path[] segments, boolean additionsAllowed, boolean force) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(getConf()); Path lock = new Path(crawlDb, LOCK_NAME); LockUtil.createLockFile(fs, lock, force); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: starting"); LOG.info("CrawlDb update: db: " + crawlDb); LOG.info("CrawlDb update: segments: " + Arrays.asList(segments)); LOG.info("CrawlDb update: additions allowed: " + additionsAllowed); }/* ww w . j a v a 2 s . c o m*/ Job job = CrawlDb.createJob(getConf(), crawlDb); job.getConfiguration().setBoolean(CRAWLDB_ADDITIONS_ALLOWED, additionsAllowed); for (int i = 0; i < segments.length; i++) { Path fetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME); Path parse = new Path(segments[i], CrawlDatum.PARSE_DIR_NAME); if (fs.exists(fetch)) { FileInputFormat.addInputPath(job, fetch); } if (fs.exists(parse)) { FileInputFormat.addInputPath(job, parse); } else { LOG.info(" - skipping invalid segment " + segments[i]); } } if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: Merging segment data into db."); } try { job.waitForCompletion(true); } catch (IOException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } catch (InterruptedException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } catch (ClassNotFoundException e) { LockUtil.removeLockFile(fs, lock); Path outPath = FileOutputFormat.getOutputPath(job); if (fs.exists(outPath)) fs.delete(outPath, true); throw e; } CrawlDb.install(job, crawlDb); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); } }
From source file:com.iflytek.spider.crawl.CrawlDb.java
License:Apache License
public static void install(Job job, Path crawlDb) throws IOException { Path newCrawlDb = FileOutputFormat.getOutputPath(job); FileSystem fs = FileSystem.get(job.getConfiguration()); Path old = new Path(crawlDb, "old"); Path current = new Path(crawlDb, CURRENT_NAME); if (fs.exists(current)) { if (fs.exists(old)) fs.delete(old, true);//from w w w . j a v a 2s.com fs.rename(current, old); } fs.mkdirs(crawlDb); fs.rename(newCrawlDb, current); if (fs.exists(old)) fs.delete(old, true); Path lock = new Path(crawlDb, LOCK_NAME); LockUtil.removeLockFile(fs, lock); }
From source file:com.iflytek.spider.parse.ParseSegment.java
License:Apache License
public int run(String[] args) throws Exception { String usage = "Usage: ParseSegment segments"; if (args.length == 0) { System.err.println(usage); System.exit(-1);/*from w w w .ja v a2 s . c o m*/ } FileSystem fs = FileSystem.get(getConf()); for (FileStatus p : fs.listStatus(new Path(args[0]))) { if (fs.exists(new Path(p.getPath(), "crawl_parse"))) fs.delete(new Path(p.getPath(), "crawl_parse"), true); if (fs.exists(new Path(p.getPath(), "parse_data"))) fs.delete(new Path(p.getPath(), "parse_data"), true); parse(p.getPath()); } return 0; }