List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.datatorrent.stram.client.StramAppLauncher.java
License:Apache License
/** * Submit application to the cluster and return the app id. * Sets the context class loader for application dependencies. * * @param appConfig//from w w w.ja va 2s . c om * @return ApplicationId * @throws Exception */ public ApplicationId launchApp(AppFactory appConfig) throws Exception { loadDependencies(); Configuration conf = propertiesBuilder.conf; conf.setEnum(StreamingApplication.ENVIRONMENT, StreamingApplication.Environment.CLUSTER); LogicalPlan dag = appConfig.createApp(propertiesBuilder); String hdfsTokenMaxLifeTime = conf.get(StramClientUtils.HDFS_TOKEN_MAX_LIFE_TIME); if (hdfsTokenMaxLifeTime != null && hdfsTokenMaxLifeTime.trim().length() > 0) { dag.setAttribute(LogicalPlan.HDFS_TOKEN_LIFE_TIME, Long.parseLong(hdfsTokenMaxLifeTime)); } String rmTokenMaxLifeTime = conf.get(StramClientUtils.RM_TOKEN_MAX_LIFE_TIME); if (rmTokenMaxLifeTime != null && rmTokenMaxLifeTime.trim().length() > 0) { dag.setAttribute(LogicalPlan.RM_TOKEN_LIFE_TIME, Long.parseLong(rmTokenMaxLifeTime)); } if (conf.get(StramClientUtils.KEY_TAB_FILE) != null) { dag.setAttribute(LogicalPlan.KEY_TAB_FILE, conf.get(StramClientUtils.KEY_TAB_FILE)); } else if (conf.get(StramUserLogin.DT_AUTH_KEYTAB) != null) { Path localKeyTabPath = new Path(conf.get(StramUserLogin.DT_AUTH_KEYTAB)); FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path destPath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), localKeyTabPath.getName()); if (!fs.exists(destPath)) { fs.copyFromLocalFile(false, false, localKeyTabPath, destPath); } dag.setAttribute(LogicalPlan.KEY_TAB_FILE, destPath.toString()); } finally { fs.close(); } } String tokenRefreshFactor = conf.get(StramClientUtils.TOKEN_ANTICIPATORY_REFRESH_FACTOR); if (tokenRefreshFactor != null && tokenRefreshFactor.trim().length() > 0) { dag.setAttribute(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR, Double.parseDouble(tokenRefreshFactor)); } StramClient client = new StramClient(conf, dag); try { client.start(); LinkedHashSet<String> libjars = Sets.newLinkedHashSet(); String libjarsCsv = conf.get(LIBJARS_CONF_KEY_NAME); if (libjarsCsv != null) { String[] jars = StringUtils.splitByWholeSeparator(libjarsCsv, StramClient.LIB_JARS_SEP); libjars.addAll(Arrays.asList(jars)); } if (deployJars != null) { for (File deployJar : deployJars) { libjars.add(deployJar.getAbsolutePath()); } } client.setResources(libjars); client.setFiles(conf.get(FILES_CONF_KEY_NAME)); client.setArchives(conf.get(ARCHIVES_CONF_KEY_NAME)); client.setOriginalAppId(conf.get(ORIGINAL_APP_ID)); client.setQueueName(conf.get(QUEUE_NAME)); client.startApplication(); return client.getApplicationReport().getApplicationId(); } finally { client.stop(); } }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); }/* w w w . j ava 2s .c o m*/ InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } }
From source file:com.datatorrent.stram.util.FSUtil.java
License:Apache License
public static boolean mkdirs(FileSystem fs, Path dest) throws IOException { try {/*from www . ja v a 2s. c o m*/ return fs.mkdirs(dest); } catch (IOException e) { // some file system (MapR) throw exception if folder exists if (!fs.exists(dest)) { throw e; } else { return false; } } }
From source file:com.davidgildeh.hadoop.utils.FileUtils.java
License:Apache License
/** * Check if a file exists, if not will throw a FileNotFoundException * //from www. jav a 2 s . co m * @param path The path of the file to check * @throws IOException */ private static void checkFileExists(FileSystem fileSystem, Path path) throws IOException { // Check file exists if (!fileSystem.exists(path)) { LOG.error("Path '" + path.toString() + "' does not exist."); fileSystem.close(); throw new FileNotFoundException("Path '" + path.toString() + "' does not exist."); } }
From source file:com.digitalpebble.behemoth.gate.GATEDriver.java
License:Apache License
public int run(String[] args) throws Exception { final FileSystem fs = FileSystem.get(getConf()); if (args.length < 3 | args.length > 4) { String syntax = "com.digitalpebble.behemoth.gate.GATEDriver in out path_gate_file [-XML]"; System.err.println(syntax); return -1; }/*from w w w . j a va2 s . com*/ boolean dumpGATEXML = false; for (String arg : args) { if (arg.equalsIgnoreCase("-xml")) dumpGATEXML = true; } Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); String zip_application_path = args[2]; // check that the GATE application has been stored on HDFS Path zap = new Path(zip_application_path); if (fs.exists(zap) == false) { System.err .println("The GATE application " + zip_application_path + "can't be found on HDFS - aborting"); return -1; } JobConf job = new JobConf(getConf()); // MUST not forget the line below job.setJarByClass(this.getClass()); job.setJobName("Processing " + args[0] + " with GATE application from " + zip_application_path); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); if (dumpGATEXML) { job.setOutputValueClass(Text.class); job.setMapperClass(GATEXMLMapper.class); } else { job.setOutputValueClass(BehemothDocument.class); job.setMapperClass(GATEMapper.class); } // detect if any filters have been defined // and activate the reducer accordingly boolean isFilterRequired = BehemothReducer.isRequired(job); if (isFilterRequired) job.setReducerClass(BehemothReducer.class); else { job.setNumReduceTasks(0); } FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); // push the zipped_gate_application onto the DistributedCache DistributedCache.addCacheArchive(new URI(zip_application_path), job); job.set("gate.application.path", zip_application_path.toString()); try { long start = System.currentTimeMillis(); JobClient.runJob(job); long finish = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("GATEDriver completed. Timing: " + (finish - start) + " ms"); } } catch (Exception e) { LOG.error("Exception caught", e); // leave even partial output // fs.delete(outputPath, true); } finally { } return 0; }
From source file:com.digitalpebble.behemoth.uima.UIMADriver.java
License:Apache License
public int run(String[] args) throws Exception { final FileSystem fs = FileSystem.get(getConf()); if (args.length != 3) { String syntax = "com.digitalpebble.behemoth.uima.UIMADriver in out path_pear_file"; System.err.println(syntax); return -1; }/*w ww . j a v a2 s . c om*/ Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); String pearPath = args[2]; // check that the GATE application has been stored on HDFS Path zap = new Path(pearPath); if (fs.exists(zap) == false) { System.err.println("The UIMA application " + pearPath + "can't be found on HDFS - aborting"); return -1; } JobConf job = new JobConf(getConf()); job.setJarByClass(this.getClass()); job.setJobName("Processing with UIMA application : " + pearPath); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BehemothDocument.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BehemothDocument.class); job.setMapperClass(UIMAMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); // push the UIMA pear onto the DistributedCache DistributedCache.addCacheFile(new URI(pearPath), job); job.set("uima.pear.path", pearPath); try { long start = System.currentTimeMillis(); JobClient.runJob(job); long finish = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("UIMADriver completed. Timing: " + (finish - start) + " ms"); } } catch (Exception e) { LOG.error("Exception", e); fs.delete(outputPath, true); } finally { } return 0; }
From source file:com.digitalpebble.behemoth.util.ContentExtractor.java
License:Apache License
private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException { Path input = new Path(inputf); Path dirPath = new Path(outputf); FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf()); if (fsout.exists(dirPath) == false) fsout.mkdirs(dirPath);//from w w w . java 2s . c o m else { System.err.println("Output " + outputf + " already exists"); return -1; } // index file Path indexPath = new Path(dirPath, "index"); if (fsout.exists(indexPath) == false) { fsout.createNewFile(indexPath); } maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000); index = fsout.create(indexPath); createArchive(dirPath); FileSystem fs = input.getFileSystem(getConf()); FileStatus[] statuses = fs.listStatus(input); int count[] = { 0 }; for (int i = 0; i < statuses.length; i++) { FileStatus status = statuses[i]; Path suPath = status.getPath(); if (suPath.getName().equals("_SUCCESS")) continue; generateDocs(suPath, dirPath, count); } if (index != null) index.close(); if (currentArchive != null) { currentArchive.finish(); currentArchive.close(); } return 0; }
From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java
License:Apache License
/** * Create a tuple criterion that only accepts tuples when the value * of the <code>column</code> are presented in the given <code>file</code> * <p>//from w w w . j av a 2 s.c om * * The assumption of the file is that, it's single column and one to many * line text file. Each line is read into a case insensitive set, and * using the set to check the value of the <code>column</code> within * the set or not. * * * @param column the name of a column to be tested that whether its value is in * the given <code>file</code> or not * * @param file a single column and multiple lines of file that contains strings/numbers, * each line is treated as a single unit. * * @return an instance of {@link TupleCriterion} that extracts only the records * when the value of its <code>column</code> are presented in the given * <code>file</code>. * * @throws FileNotFoundException if the given file cannot be found. */ public static TupleCriterion within(final String column, File file) throws FileNotFoundException { final File f = TupleRestrictions.checkFileExist(file); return new TupleCriterion() { private static final long serialVersionUID = -1121221619118915652L; private Set<String> set; @Override public void setConf(Configuration conf) { try { if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf)); } else { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles")); } } catch (IOException e) { throw new IllegalArgumentException(e); } } /** * COPIED FROM org.apache.hadoop.util.GenericOptionsParser */ private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; Path path = new Path(tmp); URI pathURI = path.toUri(); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); try { fs.close(); } catch (IOException e) { } ; } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); } @Override protected boolean evaluate(Tuple tuple, Configuration configuration) { if (set == null) { set = new CaseInsensitiveTreeSet(); BufferedReader br = null; try { br = new BufferedReader(new FileReader(new File(f.getName()))); String newLine = null; while ((newLine = br.readLine()) != null) { this.set.add(newLine); } } catch (IOException e) { throw new RuntimeException(e); } finally { try { br.close(); } catch (Throwable e) { } } } String value = tuple.getString(column); if (value != null) { return this.set.contains(value); } else { return false; } } @Override public String[] getInvolvedColumns() { return new String[] { column }; } }; }
From source file:com.ebay.erl.mobius.core.JobSetup.java
License:Apache License
private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); outputFolder = fs.makeQualified(outputFolder); if (fs.exists(outputFolder)) { LOGGER.info("Deleting " + outputFolder.toString()); fs.delete(outputFolder, true);/*from w w w .ja va 2 s . co m*/ } }
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
private static void writePartitionFile(JobConf job, Sampler sampler) { try {//from w w w.ja v a 2s .c om //////////////////////////////////////////////// // first, getting samples from the data sources //////////////////////////////////////////////// LOGGER.info("Running local sampling for job [" + job.getJobName() + "]"); InputFormat inf = job.getInputFormat(); Object[] samples = sampler.getSample(inf, job); LOGGER.info("Samples retrieved, sorting..."); //////////////////////////////////////////////// // sort the samples //////////////////////////////////////////////// RawComparator comparator = job.getOutputKeyComparator(); Arrays.sort(samples, comparator); if (job.getBoolean("mobius.print.sample", false)) { PrintWriter pw = new PrintWriter( new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream( new File(job.get("mobius.sample.file", "./samples.txt.gz"))))))); for (Object obj : samples) { pw.println(obj); } pw.flush(); pw.close(); } //////////////////////////////////////////////// // start to write partition files //////////////////////////////////////////////// FileSystem fs = FileSystem.get(job); Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job))); while (fs.exists(partitionFile)) { partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis()); } fs.deleteOnExit(partitionFile); TotalOrderPartitioner.setPartitionFile(job, partitionFile); LOGGER.info("write partition file to:" + partitionFile.toString()); int reducersNbr = job.getNumReduceTasks(); Set<Object> wroteSamples = new HashSet<Object>(); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class, NullWritable.class); float avgReduceSize = samples.length / reducersNbr; int lastBegin = 0; for (int i = 0; i < samples.length;) { // trying to distribute the load for every reducer evenly, // dividing the <code>samples</code> into a set of blocks // separated by boundaries, objects that selected from the // <code>samples</code> array, and each blocks should have // about the same size. // find the last index of element that equals to samples[i], as // such element might appear multiple times in the samples. int upperBound = Util.findUpperBound(samples, samples[i], comparator); int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator); // the repeat time of samples[i], if the key itself is too big // select it as boundary int currentElemSize = upperBound - lowerBound + 1; if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size { // the current element is too big, greater than // two times of the <code>avgReduceSize</code>, // put itself as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); // immediate put the next element to the boundary, // the next element starts at <code> upperBound+1 // </code>, to prevent the current one consume even // more. if (upperBound + 1 < samples.length) { writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey()); //pw.println(samples[upperBound+1]); // move on to the next element of <code>samples[upperBound+1]/code> lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1; i = lastBegin; } else { break; } } else { // current element is small enough to be consider // with previous group int size = upperBound - lastBegin; if (size > avgReduceSize) { // by including the current elements, we have // found a block that's big enough, select it // as boundary writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get()); wroteSamples.add(((DataJoinKey) samples[i]).getKey()); //pw.println(samples[i]); i = upperBound + 1; lastBegin = i; } else { i = upperBound + 1; } } } writer.close(); // if the number of wrote samples doesn't equals to number of // reducer minus one, then it means the key spaces is too small // hence TotalOrderPartitioner won't work, it works only if // the partition boundaries are distinct. // // we need to change the number of reducers if (wroteSamples.size() + 1 != reducersNbr) { LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size() + ", reducer size:" + (reducersNbr)); LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1)); // add 1 because the wrote samples define boundary, ex, if // the sample size is two with two element [300, 1000], then // there should be 3 reducers, one for handling i<300, one // for n300<=i<1000, and another one for 1000<=i job.setNumReduceTasks((wroteSamples.size() + 1)); } samples = null; } catch (IOException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } }