List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:edu.nyu.vida.data_polygamy.utils.MergeFiles.java
License:BSD License
public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass)); for (FileStatus status : fs.listStatus(fromDirectory)) { if (status.isDirectory()) { System.out.println("Skip directory '" + status.getPath().getName() + "'"); continue; }//www . j a v a 2 s .c o m Path file = status.getPath(); if (file.getName().startsWith("_")) { System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.append(key, value); } reader.close(); } writer.close(); }
From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java
License:Apache License
/** * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client * filesystem of the nodes.//from w w w .ja v a2 s . c om * * @throws IOException */ private void localizeDFSResources() throws IOException { //if performing an 'offline' task, skip a lot of resource distribution if (obliterate || backup || restore) { if (appMasterJar == null || ("").equals(appMasterJar)) { //this can happen in a jUnit testing environment. we don't need to set it there. if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { throw new IllegalStateException("AM jar not provided in environment."); } else { return; } } FileSystem fs = FileSystem.get(conf); FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar); LocalResource obliteratorJar = Records.newRecord(LocalResource.class); obliteratorJar.setType(LocalResourceType.FILE); obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE); obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar)); obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime()); obliteratorJar.setSize(appMasterJarStatus.getLen()); localResources.put("asterix-yarn.jar", obliteratorJar); LOG.info(localResources.values()); return; } //otherwise, distribute evertything to start up asterix LocalResource asterixZip = Records.newRecord(LocalResource.class); //this un-tar's the asterix distribution asterixZip.setType(LocalResourceType.ARCHIVE); asterixZip.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath); throw new IOException(e); } asterixZip.setTimestamp(asterixZipTimestamp); asterixZip.setSize(asterixZipLen); localResources.put(ASTERIX_ZIP_NAME, asterixZip); //now let's do the same for the cluster description XML LocalResource asterixConf = Records.newRecord(LocalResource.class); asterixConf.setType(LocalResourceType.FILE); asterixConf.setVisibility(LocalResourceVisibility.PRIVATE); try { asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath))); } catch (URISyntaxException e) { LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath); throw new IOException(e); } //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node. asterixConf.setTimestamp(asterixConfTimestamp); asterixConf.setSize(asterixConfLen); localResources.put("cluster-config.xml", asterixConf); //now add the libraries if there are any try { FileSystem fs = FileSystem.get(conf); Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR); if (fs.exists(p)) { FileStatus[] dataverses = fs.listStatus(p); for (FileStatus d : dataverses) { if (!d.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); FileStatus[] libraries = fs.listStatus(d.getPath()); for (FileStatus l : libraries) { if (l.isDirectory()) throw new IOException("Library configuration directory structure is incorrect"); LocalResource lr = Records.newRecord(LocalResource.class); lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri())); lr.setSize(l.getLen()); lr.setTimestamp(l.getModificationTime()); lr.setType(LocalResourceType.ARCHIVE); lr.setVisibility(LocalResourceVisibility.PRIVATE); localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR + l.getPath().getName().split("\\.")[0], lr); LOG.info("Found library: " + l.getPath().toString()); LOG.info(l.getPath().getName()); } } } } catch (FileNotFoundException e) { LOG.info("No external libraries present"); //do nothing, it just means there aren't libraries. that is possible and ok // it should be handled by the fs.exists(p) check though. } LOG.info(localResources.values()); }
From source file:edu.umd.cloud9.collection.trecweb.RepackTrecWebCollection.java
License:Apache License
/** * Runs this tool.// w w w . j a v a 2s . c o m */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path") .create(COLLECTION_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output path") .create(OUTPUT_OPTION)); options.addOption(OptionBuilder.withArgName("type").hasArg() .withDescription("(required) compression type: 'block', 'record', or 'none'") .create(COMPRESSION_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(COMPRESSION_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String collection = cmdline.getOptionValue(COLLECTION_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String compressionType = cmdline.getOptionValue(COMPRESSION_OPTION); if (!compressionType.equals("block") && !compressionType.equals("record") && !compressionType.equals("none")) { System.err.println("Error: \"" + compressionType + "\" unknown compression type!"); System.exit(-1); } // This is the default block size. int blocksize = 1000000; Job job = new Job(getConf(), RepackTrecWebCollection.class.getSimpleName() + ":" + collection); FileSystem fs = FileSystem.get(job.getConfiguration()); job.setJarByClass(RepackTrecWebCollection.class); LOG.info("Tool name: " + RepackTrecWebCollection.class.getCanonicalName()); LOG.info(" - collection path: " + collection); LOG.info(" - output path: " + output); LOG.info(" - compression type: " + compressionType); if (compressionType.equals("block")) { LOG.info(" - block size: " + blocksize); } Path collectionPath = new Path(collection); for (FileStatus status : fs.listStatus(collectionPath)) { if (status.isDirectory()) { for (FileStatus s : fs.listStatus(status.getPath())) { FileInputFormat.addInputPath(job, s.getPath()); } } else { FileInputFormat.addInputPath(job, status.getPath()); } } // Hack to figure out number of reducers. int numReducers = 100; if (collection.toLowerCase().contains("wt10g")) { numReducers = 50; } else if (collection.toLowerCase().contains("gov2")) { numReducers = 200; } LOG.info(" - number of reducers: " + numReducers); job.setNumReduceTasks(numReducers); FileOutputFormat.setOutputPath(job, new Path(output)); if (compressionType.equals("none")) { SequenceFileOutputFormat.setCompressOutput(job, false); } else { SequenceFileOutputFormat.setCompressOutput(job, true); if (compressionType.equals("record")) { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize); } } job.setInputFormatClass(TrecWebDocumentInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(TrecWebDocument.class); job.setMapperClass(MyMapper.class); // delete the output directory if it exists already fs.delete(new Path(output), true); try { job.waitForCompletion(true); } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:edu.umd.cloud9.collection.trecweb.TrecWebDocnoMappingBuilder.java
License:Apache License
@Override public int run(String[] args) throws IOException { DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args); if (options == null) { return -1; }/*from w w w . jav a 2 s.c o m*/ // Temp directory. String tmpDir = "tmp-" + TrecWebDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000); LOG.info("Tool name: " + TrecWebDocnoMappingBuilder.class.getCanonicalName()); LOG.info(" - input path: " + options.collection); LOG.info(" - output file: " + options.docnoMapping); Job job = new Job(getConf(), TrecWebDocnoMappingBuilder.class.getSimpleName() + ":" + options.collection); FileSystem fs = FileSystem.get(job.getConfiguration()); job.setJarByClass(TrecWebDocnoMappingBuilder.class); job.setNumReduceTasks(1); PathFilter filter = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; // Note: Gov2 and Wt10g raw collections are organized into sub-directories. Path collectionPath = new Path(options.collection); for (FileStatus status : fs.listStatus(collectionPath, filter)) { if (status.isDirectory()) { for (FileStatus s : fs.listStatus(status.getPath(), filter)) { FileInputFormat.addInputPath(job, s.getPath()); } } else { FileInputFormat.addInputPath(job, status.getPath()); } } FileOutputFormat.setOutputPath(job, new Path(tmpDir)); FileOutputFormat.setCompressOutput(job, false); job.setInputFormatClass(options.inputFormat); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpDir), true); try { job.waitForCompletion(true); } catch (Exception e) { throw new RuntimeException(e); } writeMappingData(new Path(tmpDir + "/part-r-00000"), new Path(options.docnoMapping), fs); fs.delete(new Path(tmpDir), true); return 0; }
From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java
License:Open Source License
/** * Read a random sample of up-to count from the input files. * @param files/*from www. j av a 2 s.c o m*/ * @param ratioOrCount * @param output * @param conf * @return * @throws IOException * @throws InterruptedException */ public static long sampleLocal(Path[] files, float ratioOrCount, ResultCollector<Text> output, Configuration conf) throws IOException, InterruptedException { Vector<FileSplit> splits = new Vector<FileSplit>(); for (Path file : files) { FileSystem fs = file.getFileSystem(conf); if (fs.isFile(file)) { // A single file. Include it splits.add(new FileSplit(file, 0, fs.getFileStatus(file).getLen(), new String[0])); } else { // A directory. Include all contents FileStatus[] contents = fs.listStatus(file); for (FileStatus content : contents) { if (!content.isDirectory()) splits.add(new FileSplit(content.getPath(), 0, content.getLen(), new String[0])); } } } return sampleLocal(splits.toArray(new FileSplit[splits.size()]), ratioOrCount, output, conf); }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory// w w w . j a va 2 s . co m * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormat.java
License:Apache License
/** * Gets the rearranged splits for a control file. * * Rearranges the lines of a control file according to the location * the input file references and logically splits the rearranged control file * into splits of about N lines.//from ww w . j av a 2 s .c o m */ public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit>(); Path controlFile = status.getPath(); if (status.isDirectory()) { throw new IOException("Not a file: " + controlFile); } FileSystem fs = controlFile.getFileSystem(conf); CmdLineParser parser = new PipedArgsParser(); String strRepo = conf.get(PropertyNames.REPO_LOCATION); Path fRepo = new Path(strRepo); Repository repo = new ToolRepository(fs, fRepo); LOG.info("Creating location-aware control file"); Map<String, ArrayList<String>> locationMap = createLocationMap(controlFile, conf, repo, parser); Path newControlFile = new Path(controlFile + "-rearranged" + System.currentTimeMillis()); splits = writeNewControlFileAndCreateSplits(newControlFile, fs, locationMap, numLinesPerSplit); LOG.info("Location-aware control file " + newControlFile.toString() + " created"); return splits; }
From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormat.java
License:Apache License
/** * Recursively collects paths in a directory. * * @param fs Hadoop filesystem handle//from w w w . j a v a 2 s . c o m * @param path path, a directory * @return list of paths */ private static List<Path> getFilesInDir(FileSystem fs, Path path) throws FileNotFoundException, IOException { ArrayList<Path> inFiles = new ArrayList<Path>(); for (FileStatus s : fs.listStatus(path)) { if (s.isDirectory()) { inFiles.addAll(getFilesInDir(fs, s.getPath())); } else { inFiles.add(s.getPath()); } } return inFiles; }
From source file:eu.scape_project.tb.chutney.FileTracker.java
License:Apache License
/** * Populates the list of files in HDFS in the class * @param pStorageDir Directory in HDFS for the keyfile * @throws IOException/* w w w . j a v a 2s. co m*/ */ private void generateFileList(Path pStorageDir) throws IOException { //iterate through the files in the storage directory FileStatus[] fileStatus = gFileSystem.listStatus(pStorageDir); if (null == fileStatus) return; for (FileStatus fs : fileStatus) { if (fs.isDirectory()) { generateFileList(fs.getPath()); } else { //i.e. not a directory //add the file to the list if it is not the key file if (!fs.getPath().getName().equals(KEYFILEFILE)) { gHdfsFiles.add(fs.getPath().toString().substring(gHdfsStorageDir.toString().length())); } } } return; }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
@Override public InputStream getData(final DataFile src) throws IOException { final Path path = getPath(src); if (path == null) { throw new NullPointerException("Path to create is null"); }/* w ww . j ava 2 s .c om*/ if (this.conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(this.conf); if (fs == null) { throw new IOException("Unable to create InputSteam, The FileSystem is null"); } final FileStatus fStatus = fs.getFileStatus(path); if (fStatus.isDirectory()) { final List<Path> paths = getPathToConcat(fs, path); if (paths != null && paths.size() > 0) { return new PathConcatInputStream(paths, this.conf); } } return fs.open(path); }