Example usage for org.apache.hadoop.fs FileUtil stat2Paths

List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats) 

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:org.apache.oozie.action.hadoop.FSLauncherURIHandler.java

License:Apache License

@Override
public boolean delete(URI uri, Configuration conf) throws LauncherException {
    boolean status = false;
    try {//from   w ww.j  av  a  2s  . c o m
        FileSystem fs = FileSystem.get(uri, conf);
        Path[] pathArr = FileUtil.stat2Paths(fs.globStatus(getNormalizedPath(uri)));
        if (pathArr != null && pathArr.length > 0) {
            int fsGlobMax = conf.getInt(LauncherMapper.CONF_OOZIE_ACTION_FS_GLOB_MAX, 1000);
            if (pathArr.length > fsGlobMax) {
                throw new LauncherException(
                        "exceeds max number (" + fsGlobMax + ") of files/dirs to delete in <prepare>");
            }
            for (Path path : pathArr) {
                if (fs.exists(path)) {
                    status = fs.delete(path, true);
                    if (status) {
                        System.out.println("Deletion of path " + path + " succeeded.");
                    } else {
                        System.out.println("Deletion of path " + path + " failed.");
                    }
                }
            }
        }
    } catch (IOException e) {
        throw new LauncherException("Deletion of path " + uri + " failed.", e);
    }
    return status;
}

From source file:org.apache.oozie.command.XLogPurgeXCommand.java

License:Apache License

private void deleteJobLogs(FileSystem fs, String hdfsDir, List<String> jobIds) {
    Path[] paths = null;//  w  w  w.j ava 2s .c o  m
    try {
        FileStatus[] fileStatuses = fs.listStatus(new Path(hdfsDir));
        paths = FileUtil.stat2Paths(fileStatuses);
    } catch (IOException ex) {
        LOG.error("file not found " + ex.getMessage());
    }

    for (Path path : paths) {
        for (String jobId : jobIds) {
            final Path p = new Path(path, jobId + ".log");
            try {
                if (fs.exists(p)) {
                    fs.delete(p, true);
                }
            } catch (IOException ex) {
                LOG.error("cannot delete job logs in hdfs",
                        new HadoopAccessorException(ErrorCode.E0902, "cannot delete file " + p));
            }
        }
    }
}

From source file:org.apache.pig.builtin.TestAvroStorage.java

License:Apache License

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in expected results*/
    Set<GenericData.Record> expected = getExpected(expectedOutpath);

    /* read in output results and compare */
    Path output = new Path(outPath);
    assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

            DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath),
                    reader);//from www.  j  a  va 2  s. c om
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                GenericData.Record obj = in.next();
                assertTrue(
                        "Avro result object found that's not expected: Found "
                                + (obj != null ? obj.getSchema() : "null") + ", " + obj.toString()
                                + "\nExpected " + (expected != null ? expected.toString() : "null") + "\n",
                        expected.contains(obj));
                count++;
            }
            in.close();
            assertEquals(expected.size(), count);
        }
    }
}

From source file:org.apache.pig.builtin.TestAvroStorage.java

License:Apache License

private Set<GenericData.Record> getExpected(String pathstr) throws IOException {

    Set<GenericData.Record> ret = new TreeSet<GenericData.Record>(new Comparator<GenericData.Record>() {
        @Override/*from   w ww. j  a v  a  2s.  c  o m*/
        public int compare(Record o1, Record o2) {
            return o1.toString().compareTo(o2.toString());
        }
    });
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in output results and compare */
    Path output = new Path(pathstr);
    assertTrue("Expected output does not exists!", fs.exists(output));

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>();

            DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath),
                    reader);

            while (in.hasNext()) {
                GenericData.Record obj = in.next();
                ret.add(obj);
            }
            in.close();
        }
    }
    return ret;
}

From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java

License:Apache License

private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException {

    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in expected results*/
    Set<Object> expected = getExpected(expectedOutpath);

    /* read in output results and compare */
    Path output = new Path(outPath);
    assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader);
            assertEquals("codec", expectedCodec, in.getMetaString("avro.codec"));
            int count = 0;
            while (in.hasNext()) {
                Object obj = in.next();
                //System.out.println("obj = " + (GenericData.Array<Float>)obj);
                assertTrue("Avro result object found that's not expected: " + obj, expected.contains(obj));
                count++;/*from w  w w  .  j ava 2 s .  c  om*/
            }
            in.close();
            assertEquals(expected.size(), count);
        }
    }
}

From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java

License:Apache License

private Set<Object> getExpected(String pathstr) throws IOException {

    Set<Object> ret = new HashSet<Object>();
    FileSystem fs = FileSystem.getLocal(new Configuration());

    /* read in output results and compare */
    Path output = new Path(pathstr);
    assertTrue("Expected output does not exists!", fs.exists(output));

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            GenericDatumReader<Object> reader = new GenericDatumReader<Object>();

            DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader);

            while (in.hasNext()) {
                Object obj = in.next();
                ret.add(obj);/*ww w . j  a  va 2 s . c o m*/
            }
            in.close();
        }
    }
    return ret;
}

From source file:org.apache.pig.piggybank.test.storage.TestMultiStorage.java

License:Apache License

/**
 * Test if records are split into directories corresponding to split field
 * values/*  ww  w  .  java  2  s  .  c  o m*/
 * 
 * @param mode
 * @throws IOException
 */
private void verifyResults(Mode mode, String outPath) throws IOException {
    FileSystem fs = (Mode.local == mode ? FileSystem.getLocal(new Configuration()) : cluster.getFileSystem());
    Path output = new Path(outPath);
    Assert.assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir());

    Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter));
    Assert.assertTrue("Split field dirs not found!", paths != null);

    for (Path path : paths) {
        String splitField = path.getName();
        Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter));
        Assert.assertTrue("No files found for path: " + path.toUri().getPath(), files != null);
        for (Path filePath : files) {
            Assert.assertTrue("This shouldn't be a directory", fs.isFile(filePath));

            BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath)));
            String line = "";
            int count = 0;
            while ((line = reader.readLine()) != null) {
                String[] fields = line.split("\\t");
                Assert.assertEquals(fields.length, 3);
                Assert.assertEquals("Unexpected field value in the output record", splitField, fields[1]);
                count++;
                System.out.println("field: " + fields[1]);
            }
            reader.close();
            Assert.assertEquals(count, 3);
        }
    }
}

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);/*from   www.  j  av a2  s. com*/
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.sqoop.mapreduce.CombineFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
    Configuration conf = job.getConfiguration();

    // the values specified by setxxxSplitSize() takes precedence over the
    // values that might have been specified in the config
    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;//from   w ww . j a v  a 2  s.c  o  m
    } else {
        minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = conf.getLong("mapred.max.split.size", 0);
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split " + "size per rack " + minSizeRack);
    }

    // all the files in input set
    Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0]));
    List<InputSplit> splits = new ArrayList<InputSplit>();
    if (paths.length == 0) {
        return splits;
    }

    // Convert them to Paths first. This is a costly operation and
    // we should do it first, otherwise we will incur doing it multiple
    // times, one time each for each pool in the next loop.
    List<Path> newpaths = new LinkedList<Path>();
    for (int i = 0; i < paths.length; i++) {
        FileSystem fs = paths[i].getFileSystem(conf);

        //the scheme and authority will be kept if the path is
        //a valid path for a non-default file system
        Path p = fs.makeQualified(paths[i]);
        newpaths.add(p);
    }
    paths = null;

    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        // pick one input path. If it matches all the filters in a pool,
        // add it to the output set
        for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) {
            Path p = iter.next();
            if (onepool.accept(p)) {
                myPaths.add(p); // add it to my output set
                iter.remove();
            }
        }
        // create splits for all files in this pool.
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    // create splits for all files that are not in any pool.
    getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
    return splits;
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java

License:Apache License

public static void decompressPath(final FileSystem fs, final String in, final String out,
        final String compressedFileSuffix, final boolean deletePrevious) throws IOException {
    final Path inPath = new Path(in);

    if (fs.isFile(inPath))
        HDFSTools.decompressFile(fs, in, out, deletePrevious);
    else {//w  w  w . j a  v a 2s .co  m
        final Path outPath = new Path(out);
        if (!fs.exists(outPath))
            fs.mkdirs(outPath);
        for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FORWARD_ASTERISK)))) {
            if (path.getName().endsWith(compressedFileSuffix))
                HDFSTools.decompressFile(fs, path.toString(),
                        outPath.toString() + FORWARD_SLASH + path.getName().split("\\.")[0], deletePrevious);
        }
    }
}