Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:edu.nyu.vida.data_polygamy.utils.MergeFiles.java

License:BSD License

public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile),
            SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass));

    for (FileStatus status : fs.listStatus(fromDirectory)) {
        if (status.isDirectory()) {
            System.out.println("Skip directory '" + status.getPath().getName() + "'");
            continue;
        }//www . j a  v  a  2  s .c o  m

        Path file = status.getPath();

        if (file.getName().startsWith("_")) {
            System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders 
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        while (reader.next(key, value)) {
            writer.append(key, value);
        }

        reader.close();
    }

    writer.close();
}

From source file:edu.uci.ics.asterix.aoya.AsterixApplicationMaster.java

License:Apache License

/**
 * Here I am just pointing the Containers to the exisiting HDFS resources given by the Client
 * filesystem of the nodes.//from w  w w  .ja v a2 s .  c  om
 * 
 * @throws IOException
 */
private void localizeDFSResources() throws IOException {
    //if performing an 'offline' task, skip a lot of resource distribution
    if (obliterate || backup || restore) {
        if (appMasterJar == null || ("").equals(appMasterJar)) {
            //this can happen in a jUnit testing environment. we don't need to set it there. 
            if (!conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
                throw new IllegalStateException("AM jar not provided in environment.");
            } else {
                return;
            }
        }
        FileSystem fs = FileSystem.get(conf);
        FileStatus appMasterJarStatus = fs.getFileStatus(appMasterJar);
        LocalResource obliteratorJar = Records.newRecord(LocalResource.class);
        obliteratorJar.setType(LocalResourceType.FILE);
        obliteratorJar.setVisibility(LocalResourceVisibility.PRIVATE);
        obliteratorJar.setResource(ConverterUtils.getYarnUrlFromPath(appMasterJar));
        obliteratorJar.setTimestamp(appMasterJarStatus.getModificationTime());
        obliteratorJar.setSize(appMasterJarStatus.getLen());
        localResources.put("asterix-yarn.jar", obliteratorJar);
        LOG.info(localResources.values());
        return;
    }
    //otherwise, distribute evertything to start up asterix

    LocalResource asterixZip = Records.newRecord(LocalResource.class);

    //this un-tar's the asterix distribution
    asterixZip.setType(LocalResourceType.ARCHIVE);

    asterixZip.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixZip.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixZipPath)));

    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix zip" + " in env, path=" + asterixZipPath);
        throw new IOException(e);
    }

    asterixZip.setTimestamp(asterixZipTimestamp);
    asterixZip.setSize(asterixZipLen);
    localResources.put(ASTERIX_ZIP_NAME, asterixZip);

    //now let's do the same for the cluster description XML
    LocalResource asterixConf = Records.newRecord(LocalResource.class);
    asterixConf.setType(LocalResourceType.FILE);

    asterixConf.setVisibility(LocalResourceVisibility.PRIVATE);
    try {
        asterixConf.setResource(ConverterUtils.getYarnUrlFromURI(new URI(asterixConfPath)));
    } catch (URISyntaxException e) {
        LOG.error("Error locating Asterix config" + " in env, path=" + asterixConfPath);
        throw new IOException(e);
    }
    //TODO: I could avoid localizing this everywhere by only calling this block on the metadata node. 
    asterixConf.setTimestamp(asterixConfTimestamp);
    asterixConf.setSize(asterixConfLen);
    localResources.put("cluster-config.xml", asterixConf);
    //now add the libraries if there are any
    try {
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path(dfsBasePath, instanceConfPath + File.separator + "library" + Path.SEPARATOR);
        if (fs.exists(p)) {
            FileStatus[] dataverses = fs.listStatus(p);
            for (FileStatus d : dataverses) {
                if (!d.isDirectory())
                    throw new IOException("Library configuration directory structure is incorrect");
                FileStatus[] libraries = fs.listStatus(d.getPath());
                for (FileStatus l : libraries) {
                    if (l.isDirectory())
                        throw new IOException("Library configuration directory structure is incorrect");
                    LocalResource lr = Records.newRecord(LocalResource.class);
                    lr.setResource(ConverterUtils.getYarnUrlFromURI(l.getPath().toUri()));
                    lr.setSize(l.getLen());
                    lr.setTimestamp(l.getModificationTime());
                    lr.setType(LocalResourceType.ARCHIVE);
                    lr.setVisibility(LocalResourceVisibility.PRIVATE);
                    localResources.put("library" + Path.SEPARATOR + d.getPath().getName() + Path.SEPARATOR
                            + l.getPath().getName().split("\\.")[0], lr);
                    LOG.info("Found library: " + l.getPath().toString());
                    LOG.info(l.getPath().getName());
                }
            }
        }
    } catch (FileNotFoundException e) {
        LOG.info("No external libraries present");
        //do nothing, it just means there aren't libraries. that is possible and ok
        // it should be handled by the fs.exists(p) check though.
    }
    LOG.info(localResources.values());

}

From source file:edu.umd.cloud9.collection.trecweb.RepackTrecWebCollection.java

License:Apache License

/**
 * Runs this tool.// w w w  . j  a v a 2s  . c  o m
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path")
            .create(COLLECTION_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output path")
            .create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("type").hasArg()
            .withDescription("(required) compression type: 'block', 'record', or 'none'")
            .create(COMPRESSION_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(COMPRESSION_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String collection = cmdline.getOptionValue(COLLECTION_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String compressionType = cmdline.getOptionValue(COMPRESSION_OPTION);

    if (!compressionType.equals("block") && !compressionType.equals("record")
            && !compressionType.equals("none")) {
        System.err.println("Error: \"" + compressionType + "\" unknown compression type!");
        System.exit(-1);
    }

    // This is the default block size.
    int blocksize = 1000000;

    Job job = new Job(getConf(), RepackTrecWebCollection.class.getSimpleName() + ":" + collection);
    FileSystem fs = FileSystem.get(job.getConfiguration());

    job.setJarByClass(RepackTrecWebCollection.class);

    LOG.info("Tool name: " + RepackTrecWebCollection.class.getCanonicalName());
    LOG.info(" - collection path: " + collection);
    LOG.info(" - output path: " + output);
    LOG.info(" - compression type: " + compressionType);

    if (compressionType.equals("block")) {
        LOG.info(" - block size: " + blocksize);
    }

    Path collectionPath = new Path(collection);
    for (FileStatus status : fs.listStatus(collectionPath)) {
        if (status.isDirectory()) {
            for (FileStatus s : fs.listStatus(status.getPath())) {
                FileInputFormat.addInputPath(job, s.getPath());
            }
        } else {
            FileInputFormat.addInputPath(job, status.getPath());
        }
    }

    // Hack to figure out number of reducers.
    int numReducers = 100;
    if (collection.toLowerCase().contains("wt10g")) {
        numReducers = 50;
    } else if (collection.toLowerCase().contains("gov2")) {
        numReducers = 200;
    }
    LOG.info(" - number of reducers: " + numReducers);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(output));

    if (compressionType.equals("none")) {
        SequenceFileOutputFormat.setCompressOutput(job, false);
    } else {
        SequenceFileOutputFormat.setCompressOutput(job, true);

        if (compressionType.equals("record")) {
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD);
        } else {
            SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
            job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize);
        }
    }

    job.setInputFormatClass(TrecWebDocumentInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(TrecWebDocument.class);

    job.setMapperClass(MyMapper.class);

    // delete the output directory if it exists already
    fs.delete(new Path(output), true);

    try {
        job.waitForCompletion(true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    return 0;
}

From source file:edu.umd.cloud9.collection.trecweb.TrecWebDocnoMappingBuilder.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args);
    if (options == null) {
        return -1;
    }/*from w w w  . jav a  2 s.c  o  m*/

    // Temp directory.
    String tmpDir = "tmp-" + TrecWebDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000);

    LOG.info("Tool name: " + TrecWebDocnoMappingBuilder.class.getCanonicalName());
    LOG.info(" - input path: " + options.collection);
    LOG.info(" - output file: " + options.docnoMapping);

    Job job = new Job(getConf(), TrecWebDocnoMappingBuilder.class.getSimpleName() + ":" + options.collection);
    FileSystem fs = FileSystem.get(job.getConfiguration());

    job.setJarByClass(TrecWebDocnoMappingBuilder.class);

    job.setNumReduceTasks(1);

    PathFilter filter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    // Note: Gov2 and Wt10g raw collections are organized into sub-directories.
    Path collectionPath = new Path(options.collection);
    for (FileStatus status : fs.listStatus(collectionPath, filter)) {
        if (status.isDirectory()) {
            for (FileStatus s : fs.listStatus(status.getPath(), filter)) {
                FileInputFormat.addInputPath(job, s.getPath());
            }
        } else {
            FileInputFormat.addInputPath(job, status.getPath());
        }
    }
    FileOutputFormat.setOutputPath(job, new Path(tmpDir));
    FileOutputFormat.setCompressOutput(job, false);

    job.setInputFormatClass(options.inputFormat);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpDir), true);

    try {
        job.waitForCompletion(true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    writeMappingData(new Path(tmpDir + "/part-r-00000"), new Path(options.docnoMapping), fs);
    fs.delete(new Path(tmpDir), true);

    return 0;
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Read a random sample of up-to count from the input files.
 * @param files/*from  www. j  av a 2  s.c o m*/
 * @param ratioOrCount
 * @param output
 * @param conf
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static long sampleLocal(Path[] files, float ratioOrCount, ResultCollector<Text> output,
        Configuration conf) throws IOException, InterruptedException {
    Vector<FileSplit> splits = new Vector<FileSplit>();
    for (Path file : files) {
        FileSystem fs = file.getFileSystem(conf);
        if (fs.isFile(file)) {
            // A single file. Include it
            splits.add(new FileSplit(file, 0, fs.getFileStatus(file).getLen(), new String[0]));
        } else {
            // A directory. Include all contents
            FileStatus[] contents = fs.listStatus(file);
            for (FileStatus content : contents) {
                if (!content.isDirectory())
                    splits.add(new FileSplit(content.getPath(), 0, content.getLen(), new String[0]));
            }
        }
    }
    return sampleLocal(splits.toArray(new FileSplit[splits.size()]), ratioOrCount, output, conf);
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory// w w  w .  j a  va 2  s  . co m
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormat.java

License:Apache License

/**
 * Gets the rearranged splits for a control file.
 *
 * Rearranges the lines of a control file according to the location
 * the input file references and logically splits the rearranged control file
 * into splits of about N lines.//from   ww w . j  av a 2  s  .c o  m
 */
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit)
        throws IOException {
    List<FileSplit> splits = new ArrayList<FileSplit>();
    Path controlFile = status.getPath();
    if (status.isDirectory()) {
        throw new IOException("Not a file: " + controlFile);
    }
    FileSystem fs = controlFile.getFileSystem(conf);
    CmdLineParser parser = new PipedArgsParser();
    String strRepo = conf.get(PropertyNames.REPO_LOCATION);
    Path fRepo = new Path(strRepo);
    Repository repo = new ToolRepository(fs, fRepo);

    LOG.info("Creating location-aware control file");
    Map<String, ArrayList<String>> locationMap = createLocationMap(controlFile, conf, repo, parser);

    Path newControlFile = new Path(controlFile + "-rearranged" + System.currentTimeMillis());

    splits = writeNewControlFileAndCreateSplits(newControlFile, fs, locationMap, numLinesPerSplit);
    LOG.info("Location-aware control file " + newControlFile.toString() + " created");
    return splits;
}

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormat.java

License:Apache License

/**
 * Recursively collects paths in a directory.
 *
 * @param fs Hadoop filesystem handle//from w w  w . j a  v a 2  s  . c o  m
 * @param path path, a directory
 * @return list of paths
 */
private static List<Path> getFilesInDir(FileSystem fs, Path path) throws FileNotFoundException, IOException {
    ArrayList<Path> inFiles = new ArrayList<Path>();
    for (FileStatus s : fs.listStatus(path)) {
        if (s.isDirectory()) {
            inFiles.addAll(getFilesInDir(fs, s.getPath()));
        } else {
            inFiles.add(s.getPath());
        }
    }
    return inFiles;
}

From source file:eu.scape_project.tb.chutney.FileTracker.java

License:Apache License

/**
 * Populates the list of files in HDFS in the class  
 * @param pStorageDir Directory in HDFS for the keyfile
 * @throws IOException/* w w w  . j  a  v a  2s. co  m*/
 */
private void generateFileList(Path pStorageDir) throws IOException {

    //iterate through the files in the storage directory
    FileStatus[] fileStatus = gFileSystem.listStatus(pStorageDir);
    if (null == fileStatus)
        return;
    for (FileStatus fs : fileStatus) {
        if (fs.isDirectory()) {
            generateFileList(fs.getPath());
        } else { //i.e. not a directory
            //add the file to the list if it is not the key file
            if (!fs.getPath().getName().equals(KEYFILEFILE)) {
                gHdfsFiles.add(fs.getPath().toString().substring(gHdfsStorageDir.toString().length()));
            }
        }
    }
    return;
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public InputStream getData(final DataFile src) throws IOException {

    final Path path = getPath(src);

    if (path == null) {
        throw new NullPointerException("Path to create is null");
    }/*  w ww . j  ava 2 s  .c  om*/
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to create InputSteam, The FileSystem is null");
    }

    final FileStatus fStatus = fs.getFileStatus(path);

    if (fStatus.isDirectory()) {

        final List<Path> paths = getPathToConcat(fs, path);

        if (paths != null && paths.size() > 0) {
            return new PathConcatInputStream(paths, this.conf);
        }
    }

    return fs.open(path);
}