Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"
            + "\nTimon Leonard,716 Ac Ave,1-857-935-3882,62240";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", };
    MergeJob job = runMergeJobs(args);/*ww  w  .j  a  v  a2s. c om*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Timon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
            "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from   w  w  w . java2s .  com
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithKeyValueTextInputFormat() throws Exception {

    final String inputData1 = "A\tMacon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nB\tDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nC\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "A\tTimon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nD\tMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nB\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from   ww w .  j  a va  2 s .  com
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//w  ww .  j  a  va 2  s. c o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java

License:Apache License

@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    FileStatus status = null;
    try {/*from   w w w.  j a v a 2s.  c  o m*/
        // get the status or throw an FNFE
        status = getFileStatus(f);

        // if the thread reaches here, there is something at the path
        if (status.isDirectory()) {
            // path references a directory: automatic error
            throw new FileAlreadyExistsException(f + " is a directory");
        }
        if (!overwrite) {
            // path references a file and overwrite is disabled
            throw new FileAlreadyExistsException(f + " already exists");
        }
        LOG.debug("Overwriting file " + f);
    } catch (FileNotFoundException e) {
        // this means the file is not found

    }

    Path absolutePath = makeAbsolute(f);
    String key = pathToKey(absolutePath);
    return new FSDataOutputStream(
            new NativeOssFsOutputStream(getConf(), store, key, false, progress, bufferSize), statistics);
}

From source file:com.aliyun.fs.utils.OssInputUtils.java

License:Apache License

public FileSplit[] getSplits(String file, int numSplits) throws IOException {
    Path path = new Path(file);
    this.fs = FileSystem.get(path.toUri(), conf);
    fs.initialize(path.toUri(), conf);/*from  w  ww. j a v a 2 s.c  o m*/

    FileStatus[] files = fs.listStatus(path);
    long totalSize = 0;
    for (FileStatus file1 : files) {
        if (file1.isDirectory()) {
            throw new IOException("Not a file: " + file1.getPath());
        }
        totalSize += file1.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math
            .max(conf.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), 1);

    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    for (FileStatus file2 : files) {
        Path fp = file2.getPath();
        long length = file2.getLen();
        if (length != 0) {
            long splitSize = Math.max(minSize, goalSize);
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, splitSize, new String[0]);
                splits.add(split);
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                FileSplit split = new FileSplit(fp, length - bytesRemaining, bytesRemaining, new String[0]);
                splits.add(split);
            }
        }
    }
    LOG.info("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:com.aliyun.odps.fs.VolumeFileSystem.java

License:Apache License

@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
    Path absF = fixRelativePart(f);
    String filePath = getPathName(absF);
    if (!exists(absF)) {
        throw new FileNotFoundException(filePath);
    }//w w w  . j  a v  a  2  s  .c  o m
    FileStatus fileStatus = getFileStatus(f);
    if (fileStatus.isDirectory()) {
        throw new FileNotFoundException(VolumeFSErrorMessageGenerator.isADirectory(filePath));
    }
    return new FSDataInputStream(
            new VolumeFSInputStream(filePath, volumeClient, fileStatus.getLen(), getConf()));
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

/** API for Java clients;visible for testing;may become a public API eventually */
int run(Options options) throws Exception {
    if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) {
        throw new IllegalStateException(
                "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported "
                        + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, "
                        + "which is required for passing files via --files and --libjars");
    }//from w  w w .jav a 2 s .  c  om

    long programStartTime = System.nanoTime();
    getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    // switch off a false warning about allegedly not implementing Tool
    // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html
    // also see https://issues.apache.org/jira/browse/HADOOP-8183
    getConf().setBoolean("mapred.used.genericoptionsparser", true);

    if (options.log4jConfigFile != null) {
        Utils.setLogConfigFile(options.log4jConfigFile, getConf());
        addDistributedCacheFile(options.log4jConfigFile, getConf());
    }

    Configuration config = HBaseConfiguration.create();
    Job job = Job.getInstance(config);
    job.setJarByClass(getClass());

    // To be able to run this example from eclipse, we need to make sure 
    // the built jar is distributed to the map-reduce tasks from the
    // local file system.
    job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar"));

    FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration());
    if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) {
        return -1;
    }
    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");

    int reducers = 1;

    Scan scan = new Scan();
    scan.addFamily(CF);
    // tag::SETUP[]
    scan.setCaching(500); // <1>
    scan.setCacheBlocks(false); // <2>

    TableMapReduceUtil.initTableMapperJob( // <3>
            options.inputTable, // Input HBase table name
            scan, // Scan instance to control what to index
            HBaseAvroToSOLRMapper.class, // Mapper to parse cells content.
            Text.class, // Mapper output key
            SolrInputDocumentWritable.class, // Mapper output value
            job);

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class));
    job.setReducerClass(SolrReducer.class); // <4>
    job.setPartitionerClass(SolrCloudPartitioner.class); // <5>
    job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost);
    job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection);
    job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    job.setSpeculativeExecution(false);
    // end::SETUP[]
    job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have.
    if (!waitForCompletion(job, true)) {
        return -1;// job failed
    }

    // -------------------------------------------------------------------------------------------------------------------------------------

    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs)
        throws FileNotFoundException, IOException {
    final String dirPrefix = SolrOutputFormat.getOutputName(job);
    FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
        @Override/*w w  w.j av  a  2  s  . c o  m*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dirPrefix);
        }
    });
    for (FileStatus dir : dirs) {
        if (!dir.isDirectory()) {
            throw new IllegalStateException("Not a directory: " + dir.getPath());
        }
    }

    // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999
    // shards
    Arrays.sort(dirs, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus f1, FileStatus f2) {
            return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName());
        }
    });

    return dirs;
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.FileSystemCompatibilityHadoop2.java

License:Apache License

@Override
public boolean isDirectory(FileStatus status) {
    if (status == null) {
        throw new IllegalArgumentException("status must not be null"); //$NON-NLS-1$
    }/*from   w  w w.ja v  a 2  s  .c  om*/
    return status.isDirectory();
}