Example usage for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"
            + "\nTimon Leonard,716 Ac Ave,1-857-935-3882,62240";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", };
    MergeJob job = runMergeJobs(args);/*ww  w  .j  a  v  a2s. c om*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Timon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
            "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from   w  w  w . java2s .  com
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithKeyValueTextInputFormat() throws Exception {

    final String inputData1 = "A\tMacon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nB\tDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nC\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "A\tTimon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nD\tMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nB\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from   ww w .  j  a va  2 s .  com
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//w  ww .  j  a  va 2  s. c o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java

License:Apache License

@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    FileStatus status = null;
    try {/*from   w w w.  j a v a 2s.  c  o m*/
        // get the status or throw an FNFE
        status = getFileStatus(f);

        // if the thread reaches here, there is something at the path
        if (status.isDirectory()) {
            // path references a directory: automatic error
            throw new FileAlreadyExistsException(f + " is a directory");
        }
        if (!overwrite) {
            // path references a file and overwrite is disabled
            throw new FileAlreadyExistsException(f + " already exists");
        }
        LOG.debug("Overwriting file " + f);
    } catch (FileNotFoundException e) {
        // this means the file is not found

    }

    Path absolutePath = makeAbsolute(f);
    String key = pathToKey(absolutePath);
    return new FSDataOutputStream(
            new NativeOssFsOutputStream(getConf(), store, key, false, progress, bufferSize), statistics);
}

From source file:com.aliyun.fs.utils.OssInputUtils.java

License:Apache License

public FileSplit[] getSplits(String file, int numSplits) throws IOException {
    Path path = new Path(file);
    this.fs = FileSystem.get(path.toUri(), conf);
    fs.initialize(path.toUri(), conf);/*from  w  ww. j a v a 2 s.c  o m*/

    FileStatus[] files = fs.listStatus(path);
    long totalSize = 0;
    for (FileStatus file1 : files) {
        if (file1.isDirectory()) {
            throw new IOException("Not a file: " + file1.getPath());
        }
        totalSize += file1.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math
            .max(conf.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), 1);

    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    for (FileStatus file2 : files) {
        Path fp = file2.getPath();
        long length = file2.getLen();
        if (length != 0) {
            long splitSize = Math.max(minSize, goalSize);
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, splitSize, new String[0]);
                splits.add(split);
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, bytesRemaining, new String[0]);
                splits.add(split);
            }
        }
    }
    LOG.info("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:com.aliyun.odps.fs.VolumeFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
    Path absF = fixRelativePart(f);
    String filePath = getPathName(absF);
    if (!exists(absF)) {
        throw new FileNotFoundException(filePath);
    }//w w w  . j  a v  a  2  s  .c  o m
    FileStatus fileStatus = getFileStatus(f);
    if (fileStatus.isDirectory()) {
        throw new FileNotFoundException(VolumeFSErrorMessageGenerator.isADirectory(filePath));
    }
    return new FSDataInputStream(
            new VolumeFSInputStream(filePath, volumeClient, fileStatus.getLen(), getConf()));
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }//from w  w w .jav a 2 s .  c  om

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs)
        throws FileNotFoundException, IOException {
    final String dirPrefix = SolrOutputFormat.getOutputName(job);
    FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
        @Override/*w w  w.j av  a  2  s  . c o  m*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dirPrefix);
        }
    });
    for (FileStatus dir : dirs) {
        if (!dir.isDirectory()) {
            throw new IllegalStateException("Not a directory: " + dir.getPath());
        }
    }

    // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999
    // shards
    Arrays.sort(dirs, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus f1, FileStatus f2) {
            return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName());
        }
    });

    return dirs;
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.FileSystemCompatibilityHadoop2.java

License:Apache License

@Override
public boolean isDirectory(FileStatus status) {
    if (status == null) {
        throw new IllegalArgumentException("status must not be null"); //$NON-NLS-1$
    }/*from   w  w w.ja v  a 2  s  .c  om*/
    return status.isDirectory();
}