Example usage for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats)

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:org.apache.oozie.action.hadoop.FSLauncherURIHandler.java

License:Apache License

@Override
public boolean delete(URI uri, Configuration conf) throws LauncherException {
    boolean status = false;
    try {//from   w ww.j  av  a  2s  . c o m
        FileSystem fs = FileSystem.get(uri, conf);
        Path[] pathArr = FileUtil.stat2Paths(fs.globStatus(getNormalizedPath(uri)));
        if (pathArr != null && pathArr.length > 0) {
            int fsGlobMax = conf.getInt(LauncherMapper.CONF_OOZIE_ACTION_FS_GLOB_MAX, 1000);
            if (pathArr.length > fsGlobMax) {
                throw new LauncherException(
                        "exceeds max number (" + fsGlobMax + ") of files/dirs to delete in <prepare>");
            }
            for (Path path : pathArr) {
                if (fs.exists(path)) {
                    status = fs.delete(path, true);
                    if (status) {
                        System.out.println("Deletion of path " + path + " succeeded.");
                    } else {
                        System.out.println("Deletion of path " + path + " failed.");
                    }
                }
            }
        }
    } catch (IOException e) {
        throw new LauncherException("Deletion of path " + uri + " failed.", e);
    }
    return status;
}

From source file:org.apache.oozie.command.XLogPurgeXCommand.java

License:Apache License

private void deleteJobLogs(FileSystem fs, String hdfsDir, List<String> jobIds) {
    Path[] paths = null;//  w  w  w.j ava 2s .c o  m
    try {
        FileStatus[] fileStatuses = fs.listStatus(new Path(hdfsDir));
        paths = FileUtil.stat2Paths(fileStatuses);
    } catch (IOException ex) {
        LOG.error("file not found " + ex.getMessage());
    }

    for (Path path : paths) {
        for (String jobId : jobIds) {
            final Path p = new Path(path, jobId + ".log");
            try {
                if (fs.exists(p)) {
                    fs.delete(p, true);
                }
            } catch (IOException ex) {
                LOG.error("cannot delete job logs in hdfs",
                        new HadoopAccessorException(ErrorCode.E0902, "cannot delete file " + p));
            }
        }
    }
}

From source file:org.apache.pig.builtin.TestAvroStorage.java

License:Apache License

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in expected results*/
    Set<GenericData.Record> expected = getExpected(expectedOutpath);

    /* read in output results and compare */
    Path output = new Path(outPath);
    assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

            DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath),
                    reader);//from www.  j  a  va 2  s. c om
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                GenericData.Record obj = in.next();
                assertTrue(
                        "Avro result object found that's not expected: Found "
                                + (obj != null ? obj.getSchema() : "null") + ", " + obj.toString()
                                + "\nExpected " + (expected != null ? expected.toString() : "null") + "\n",
                        expected.contains(obj));
                count++;
            }
            in.close();
            assertEquals(expected.size(), count);
        }
    }
}

From source file:org.apache.pig.builtin.TestAvroStorage.java

License:Apache License

private Set<GenericData.Record> getExpected(String pathstr) throws IOException {

    Set<GenericData.Record> ret = new TreeSet<GenericData.Record>(new Comparator<GenericData.Record>() {
        @Override/*from   w ww. j  a v  a  2s.  c  o m*/
        public int compare(Record o1, Record o2) {
            return o1.toString().compareTo(o2.toString());
        }
    });
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in output results and compare */
    Path output = new Path(pathstr);
    assertTrue("Expected output does not exists!", fs.exists(output));

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

            DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath),
                    reader);

            while (in.hasNext()) {
                GenericData.Record obj = in.next();
                ret.add(obj);
            }
            in.close();
        }
    }
    return ret;
}

From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java

License:Apache License

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {

    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in expected results*/
    Set<Object> expected = getExpected(expectedOutpath);

    /* read in output results and compare */
    Path output = new Path(outPath);
    assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader);
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                Object obj = in.next();
                //System.out.println("obj = " + (GenericData.Array<Float>)obj);
                assertTrue("Avro result object found that's not expected: " + obj, expected.contains(obj));
                count++;/*from w  w w  .  j ava 2 s .  c  om*/
            }
            in.close();
            assertEquals(expected.size(), count);
        }
    }
}

From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java

License:Apache License

private Set<Object> getExpected(String pathstr) throws IOException {

    Set<Object> ret = new HashSet<Object>();
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in output results and compare */
    Path output = new Path(pathstr);
    assertTrue("Expected output does not exists!", fs.exists(output));

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader);

            while (in.hasNext()) {
                Object obj = in.next();
                ret.add(obj);/*ww w . j  a  va 2 s . c o m*/
            }
            in.close();
        }
    }
    return ret;
}

From source file:org.apache.pig.piggybank.test.storage.TestMultiStorage.java

License:Apache License

/**
 * Test if records are split into directories corresponding to split field
 * values/*  ww  w  .  java  2  s  .  c  o m*/
 * 
 * @param mode
 * @throws IOException
 */
private void verifyResults(Mode mode, String outPath) throws IOException {
    FileSystem fs = (Mode.local == mode ? FileSystem.getLocal(new Configuration()) : cluster.getFileSystem());
    Path output = new Path(outPath);
    Assert.assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    Assert.assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        String splitField = path.getName();
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        Assert.assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            Assert.assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath)));
            String line = "";
            int count = 0;
            while ((line = reader.readLine()) != null) {
                String[] fields = line.split("\\t");
                Assert.assertEquals(fields.length, 3);
                Assert.assertEquals("Unexpected field value in the output record", splitField, fields[1]);
                count++;
                System.out.println("field: " + fields[1]);
            }
            reader.close();
            Assert.assertEquals(count, 3);
        }
    }
}

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);/*from   www.  j  av a2  s. com*/
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.sqoop.mapreduce.CombineFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;//from   w ww . j a v  a 2  s.c  o  m
    } else {
        minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = conf.getLong("mapred.max.split.size", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split " + "size per rack " + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0]));
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (paths.length == 0) {
        return splits;
    }

    // Convert them to Paths first. This is a costly operation and
    // we should do it first, otherwise we will incur doing it multiple
    // times, one time each for each pool in the next loop.
    List<Path> newpaths = new LinkedList<Path>();
    for (int i = 0; i < paths.length; i++) {
        FileSystem fs = paths[i].getFileSystem(conf);

        //the scheme and authority will be kept if the path is
        //a valid path for a non-default file system
        Path p = fs.makeQualified(paths[i]);
        newpaths.add(p);
    }
    paths = null;

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) {
            Path p = iter.next();
            if (onepool.accept(p)) {
                myPaths.add(p); // add it to my output set
                iter.remove();
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    // create splits for all files that are not in any pool.
    getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java

License:Apache License

public static void decompressPath(final FileSystem fs, final String in, final String out,
        final String compressedFileSuffix, final boolean deletePrevious) throws IOException {
    final Path inPath = new Path(in);

    if (fs.isFile(inPath))
        HDFSTools.decompressFile(fs, in, out, deletePrevious);
    else {//w  w  w . j a  v a 2s .co  m
        final Path outPath = new Path(out);
        if (!fs.exists(outPath))
            fs.mkdirs(outPath);
        for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FORWARD_ASTERISK)))) {
            if (path.getName().endsWith(compressedFileSuffix))
                HDFSTools.decompressFile(fs, path.toString(),
                        outPath.toString() + FORWARD_SLASH + path.getName().split("\\.")[0], deletePrevious);
        }
    }
}