Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:com.yahoo.glimmer.util.MergeSortTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    SimpleJSAP jsap = new SimpleJSAP(MergeSortTool.class.getName(),
            "Merges alpha numerically sorted text files on HDFS",
            new Parameter[] {
                    new FlaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'i',
                            INPUT_ARG, "input filenames glob eg. .../part-r-?????/sortedlines.text"),
                    new FlaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
                            OUTPUT_ARG, "output filename"),
                    new FlaggedOption(COUNT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c',
                            COUNT_ARG,/*from   w ww .j  av a2s.  c  o m*/
                            "optionally create a file containing a count of the number of lines merged in text"), });

    JSAPResult jsapResult = jsap.parse(args);
    if (!jsapResult.success()) {
        System.err.print(jsap.getUsage());
        System.exit(1);
    }

    // FileSystem fs = FileSystem.get(getConf());
    // CompressionCodecFactory factory = new
    // CompressionCodecFactory(getConf());
    // mergeSort(fs, sourcePaths, outputPath, factory);

    // Maybe quicker to use a MR job with one reducer.. Currently
    // decompression, merge and compression are all done in this thread..

    Path inputGlobPath = new Path(jsapResult.getString(INPUT_ARG));

    Configuration config = getConf();
    FileSystem fs = FileSystem.get(config);

    FileStatus[] sources = fs.globStatus(inputGlobPath);

    if (sources.length == 0) {
        System.err.println("No files matching input glob:" + inputGlobPath.toString());
        return 1;
    }

    List<Path> sourcePaths = new ArrayList<Path>(sources.length);
    for (FileStatus source : sources) {
        if (source.isDirectory()) {
            System.err.println(source.getPath().toString() + " is a directory.");
            return 1;
        }
        sourcePaths.add(source.getPath());
    }

    Path outputPath = new Path(jsapResult.getString(OUTPUT_ARG));

    CompressionCodecFactory factory = new CompressionCodecFactory(config);

    FSDataOutputStream countsOutputStream = null;
    if (jsapResult.contains(COUNT_ARG)) {
        Path countsPath = null;
        countsPath = new Path(jsapResult.getString(COUNT_ARG));
        countsOutputStream = fs.create(countsPath);
    }

    int lineCount = MergeSortTool.mergeSort(fs, sourcePaths, outputPath, factory);
    System.out.println("Merged " + lineCount + " lines into " + outputPath.toString());
    if (countsOutputStream != null) {
        countsOutputStream.writeBytes("" + lineCount + '\n');
    }
    countsOutputStream.flush();
    countsOutputStream.close();

    return 0;
}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java

License:Apache License

public void init(Configuration config, String path, String ext) {
    this.config = config;
    this.filePaths = new ArrayList<String>();
    Path hdfsPath = new Path(path);
    FileSystem fs;/* w  w w  . ja  v a  2  s.c o m*/
    try {
        fs = FileSystem.get(config);
        FileStatus fileStat = fs.getFileStatus(hdfsPath);
        if (fileStat.isDirectory()) {
            Path filterPath = hdfsPath;
            if (ext != null) {
                filterPath = new Path(path.toString(), "*." + ext);
            } else {
                filterPath = new Path(path.toString(), "*");
            }
            FileStatus[] filesInDir = fs.globStatus(filterPath);
            for (int i = 0; i < filesInDir.length; i++) {
                if (filesInDir[i].isFile()) {
                    filePaths.add(filesInDir[i].getPath().toString());
                }
            }
        } else {
            this.filePaths.add(path);
        }
    } catch (IOException ioe) {
        throw new RuntimeException("Failed getting list of files at:" + path, ioe);
    }

    this.currentIndex = -1;
}

From source file:de.huberlin.wbi.hiway.scheduler.Scheduler.java

License:Apache License

protected void parseLogs() {
    String hdfsBaseDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE,
            HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE_DEFAULT);
    String hdfsSandboxDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE,
            HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE_DEFAULT);
    Path hdfsBaseDirectory = new Path(new Path(hdfs.getUri()), hdfsBaseDirectoryName);
    Path hdfsSandboxDirectory = new Path(hdfsBaseDirectory, hdfsSandboxDirectoryName);
    try {/*from w  w w .j  a  v  a 2s  . co m*/
        for (FileStatus appDirStatus : hdfs.listStatus(hdfsSandboxDirectory)) {
            if (appDirStatus.isDirectory()) {
                Path appDir = appDirStatus.getPath();
                for (FileStatus srcStatus : hdfs.listStatus(appDir)) {
                    Path src = srcStatus.getPath();
                    String srcName = src.getName();
                    if (srcName.endsWith(".log")) {
                        Path dest = new Path(appDir.getName());
                        System.out.println("Parsing log " + dest.toString());
                        hdfs.copyToLocalFile(false, src, dest);

                        try (BufferedReader reader = new BufferedReader(
                                new FileReader(new File(dest.toString())))) {
                            String line;
                            while ((line = reader.readLine()) != null) {
                                JsonReportEntry entry = new JsonReportEntry(line);
                                addEntryToDB(entry);
                            }
                        }
                    }
                }
            }
        }
    } catch (IOException | JSONException e) {
        e.printStackTrace();
        System.exit(-1);
    }
}

From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionInputFormat.java

License:Apache License

public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit)
        throws IOException {
    List<FileSplit> splits = new ArrayList<FileSplit>();
    Path fileName = status.getPath();
    if (status.isDirectory()) {
        throw new IOException("Not a file: " + fileName);
    }//from   w  w w  . ja  va2  s  .  c o  m
    FileSystem fs = fileName.getFileSystem(conf);
    LineReader lr = null;
    try {
        FSDataInputStream in = fs.open(fileName);
        lr = new LineReader(in, conf);
        Text line = new Text();
        int numLines = 0;
        long begin = 0;
        long length = 0;
        int num = -1;
        while ((num = lr.readLine(line)) > 0) {
            numLines++;
            length += num;
            if (numLines == numLinesPerSplit) {
                splits.add(createFileSplit(fileName, begin, length));
                begin += length;
                length = 0;
                numLines = 0;
            }
        }
        if (numLines != 0) {
            splits.add(createFileSplit(fileName, begin, length));
        }
    } finally {
        if (lr != null) {
            lr.close();
        }
    }
    return splits;
}

From source file:eagle.security.hdfs.entity.FileStatusEntity.java

License:Apache License

public FileStatusEntity(FileStatus status) throws IOException {
    //this.path = status.getPath();
    this.length = status.getLen();
    this.isdir = status.isDirectory();
    this.block_replication = status.getReplication();
    this.blocksize = status.getBlockSize();
    this.modification_time = status.getModificationTime();
    this.access_time = status.getAccessTime();
    this.permission = status.getPermission();
    this.owner = status.getOwner();
    this.group = status.getGroup();
    if (status.isSymlink()) {
        this.symlink = status.getSymlink();
    }/*from   w  w  w .  ja  va 2  s . co m*/
}

From source file:eagle.service.security.hdfs.HDFSFileSystem.java

License:Apache License

/**
 * Browse Resources of passed Path and its sub directories
 * Note :: Sub directories only to determine the Sensitivity Type of Root Directories
 * @param filePath/*from   w  w w  . j  a va2 s  .c o  m*/
 * @return fileSystemResponseObj{ with SubDirectoryMap and ListOf FileStatus}
 * @throws Exception
 */
public HDFSFileSystemResponseEntity browseResources(String filePath) throws Exception {
    LOG.info("HDFS File Path   :  " + filePath + "   and EndPoint  : " + hdfsEndPoint);
    FileSystem hdfsFileSystem = null;
    FileStatus[] listStatus;
    HDFSFileSystemResponseEntity response = new HDFSFileSystemResponseEntity();
    Map<String, List<String>> subdirectoriesMap = new HashMap<String, List<String>>();
    try {
        Configuration config = createConfig();
        hdfsFileSystem = getFileSystem(config);
        Path path = new Path(filePath);
        listStatus = hdfsFileSystem.listStatus(path);
        LOG.info(" Browsing Sub Directories .... ");
        // Browse Sub- directories
        for (FileStatus fileStatus : listStatus) {
            FileStatus[] fileStatusList = null;
            if (fileStatus.isDirectory())
                fileStatusList = hdfsFileSystem.listStatus(new Path(fileStatus.getPath().toUri().getPath()));

            if (fileStatusList != null && fileStatusList.length > 0)
                subdirectoriesMap.put(fileStatus.getPath().toUri().getPath(), /*  Key would be Parent */
                        getSubDirectories(fileStatusList) /*  Value Would be Child Paths */);
        }
        response.setFileList(Arrays.asList(listStatus));
        response.setSubDirectoriesMap(subdirectoriesMap);

    } catch (Exception ex) {
        LOG.error(" Exception when browsing files for the path " + filePath, ex.getMessage());
        throw new Exception(
                " Exception When browsing Files/Directories in HDFS .. Message :  " + ex.getMessage());
    } finally {
        //Close the file system
        if (hdfsFileSystem != null)
            hdfsFileSystem.close();
    }
    return response;
}

From source file:eagle.service.security.hdfs.HDFSFileSystem.java

License:Apache License

/**
 * Browse only Sub-directories //  w ww. ja va 2  s.c  om
 * @param listStatus
 * @return
 * @throws FileNotFoundException
 * @throws IOException
 */
public List<String> getSubDirectories(FileStatus[] listStatus) throws IOException {
    List<String> list = new ArrayList<>();
    for (FileStatus fileStatus : listStatus) {
        if (fileStatus.isDirectory())
            list.add(fileStatus.getPath().toUri().getPath());
    }
    return list;
}

From source file:eagle.service.security.hdfs.HDFSResourceUtils.java

License:Apache License

/**
 * Filter Only directories /*from  w  ww. j av a2s  . co m*/
 * @param fileStatuses
 * @return
 */
public static List<String> filterDirectories(List<FileStatus> fileStatuses) {
    List<String> directories = new ArrayList<>();
    for (FileStatus fileStatus : fileStatuses) {
        if (fileStatus.isDirectory())
            directories.add(fileStatus.getPath().toUri().getPath());

    }
    return directories;
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

License:BSD License

public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile),
            SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass));

    for (FileStatus status : fs.listStatus(fromDirectory)) {
        if (status.isDirectory()) {
            System.out.println("Skip directory '" + status.getPath().getName() + "'");
            continue;
        }//from w w  w .j  a v  a2s.co  m

        Path file = status.getPath();

        if (file.getName().startsWith("_")) {
            System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders 
            continue;
        }

        //System.out.println("Merging '" + file.getName() + "'");

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        while (reader.next(key, value)) {
            writer.append(key, value);
        }

        reader.close();
    }

    writer.close();
}

From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java

License:BSD License

public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException {
    String fromDirectory = args[0];
    String toEventsDirectory = args[1];
    String toOutliersDirectory = args[2];
    String metadataFile = args[3];

    // Detecting datasets.

    HashSet<String> datasets = new HashSet<String>();

    FileReader fileReader = new FileReader(metadataFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String line;//from  ww w  .j  a  va 2s .c om
    while ((line = bufferedReader.readLine()) != null) {
        String[] parts = line.split(",");
        datasets.add(parts[0]);
    }
    bufferedReader.close();

    // Downloading relationships.

    String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})";
    Pattern relationshipPattern = Pattern.compile(relationshipPatternStr);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileSystem localFS = FileSystem.getLocal(conf);

    for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) {
        if (!status.isDirectory()) {
            continue;
        }
        Path file = status.getPath();

        Matcher m = relationshipPattern.matcher(file.getName());
        if (!m.find())
            continue;

        String ds1 = m.group(1);
        String ds2 = m.group(2);

        if (!datasets.contains(ds1))
            continue;
        if (!datasets.contains(ds2))
            continue;

        for (FileStatus statusDir : fs.listStatus(file)) {
            if (!statusDir.isDirectory()) {
                continue;
            }

            Path fromPath = statusDir.getPath();
            String toPathStr;
            if (fromPath.getName().contains("events")) {
                toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName();
            } else {
                toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-"
                        + fromPath.getName();
            }
            Path toPath = new Path(toPathStr);

            System.out.println("Copying:");
            System.out.println("  From: " + fromPath.toString());
            System.out.println("  To: " + toPath.toString());

            FileUtil.copyMerge(fs, // HDFS File System
                    fromPath, // HDFS path
                    localFS, // Local File System
                    toPath, // Local Path
                    false, // Do not delete HDFS path
                    conf, // Configuration
                    null);
        }
    }
}