Example usage for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats)

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

  public static void main(String[] args) throws Exception {
  String uri = args[0];/*from ww w .ja va2s .c  o m*/
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(URI.create(uri), conf);
    
  Path[] paths = new Path[args.length];
  for (int i = 0; i < paths.length; i++) {
    paths[i] = new Path(args[i]);
  }
    
  FileStatus[] status = fs.listStatus(paths); // XXX Filesystem.listStatus(Path[]) -> FileStatus (see below)
  Path[] listedPaths = FileUtil.stat2Paths(status); // XXX FileStatus to Path[]
  for (Path p : listedPaths) {
    System.out.println(p);
  }
}

From source file:crunch.MaxTemperature.java

License:Apache License

  private Path[] glob(String pattern) throws IOException {
  return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern))); // XXX 
}

From source file:crunch.MaxTemperature.java

License:Apache License

  private Path[] glob(String pattern, PathFilter pathFilter) throws IOException {
  return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern), pathFilter)); // XXX
}

From source file:crunch.MaxTemperature.java

License:Apache License

public void test() throws Exception {
        Configuration conf = createJobConf();

        Path localInput = new Path("input/ncdc/micro");
        Path input = getInputDir();
        Path output = getOutputDir();

        // Copy input data into test HDFS
        getFileSystem().copyFromLocalFile(localInput, input);

        MaxTemperatureDriver driver = new MaxTemperatureDriver();
        driver.setConf(conf);/*from  w  w w .  j ava2  s  . c  om*/

        int exitCode = driver.run(new String[] { input.toString(), output.toString() });
        assertThat(exitCode, is(0));

        // Check the output is as expected
        Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        InputStream in = getFileSystem().open(outputFiles[0]);
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        assertThat(reader.readLine(), is("1949\t111"));
        assertThat(reader.readLine(), is("1950\t22"));
        assertThat(reader.readLine(), nullValue());
        reader.close();
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkOutput(Configuration conf, Path output) throws IOException {
        FileSystem fs = FileSystem.getLocal(conf);
        Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        BufferedReader actual = asBufferedReader(fs.open(outputFiles[0]));
        BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt"));
        String expectedLine;// w w  w  .  j a v a2 s .com
        while ((expectedLine = expected.readLine()) != null) {
            assertThat(actual.readLine(), is(expectedLine));
        }
        assertThat(actual.readLine(), nullValue());
        actual.close();
        expected.close();
    }

From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    /**/*ww w. j av  a 2  s.co m*/
     * Option seqOpt =
     * obuilder.withLongName("seqFile").withRequired(false).withArgument(
     * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
     * withDescription(
     * "The Sequence File containing the Vectors").withShortName
     * ("s").create(); Option dirOpt =
     * obuilder.withLongName("seqDirectory").
     * withRequired(false).withArgument(
     * abuilder.withName("seqDirectory").withMinimum
     * (1).withMaximum(1).create()) .withDescription(
     * "The directory containing Sequence File of Vectors")
     * .withShortName("d").create();
     */
    addInputOption();
    addOutputOption();
    addOption("useKey", "u", "If the Key is a vector than dump that instead");
    addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
    addOption("dictionary", "d", "The dictionary file.", false);
    addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
    addOption("csv", "c",
            "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
    addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector "
            + "(if the vector is one) printing out the name");
    addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
    addOption("sortVectors", "sort",
            "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order");
    addOption("quiet", "q", "Print only file contents");
    addOption("sizeOnly", "sz", "Dump only the size of the vector");
    addOption("numItems", "ni", "Output at most <n> vecors", false);
    addOption("vectorSize", "vs",
            "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort",
            false);
    addOption(buildOption("filter", "fi",
            "Only dump out those vectors whose name matches the filter."
                    + "  Multiple items may be specified by repeating the argument.",
            true, 1, Integer.MAX_VALUE, false, null));

    if (parseArguments(args, false, true) == null) {
        return -1;
    }

    Path[] pathArr;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path input = getInputPath();
    FileStatus fileStatus = fs.getFileStatus(input);
    if (fileStatus.isDir()) {
        pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter()));
    } else {
        FileStatus[] inputPaths = fs.globStatus(input);
        pathArr = new Path[inputPaths.length];
        int i = 0;
        for (FileStatus fstatus : inputPaths) {
            pathArr[i++] = fstatus.getPath();
        }
    }

    String dictionaryType = getOption("dictionaryType", "text");

    boolean sortVectors = hasOption("sortVectors");
    boolean quiet = hasOption("quiet");
    if (!quiet) {
        log.info("Sort? {}", sortVectors);
    }

    String[] dictionary = null;
    if (hasOption("dictionary")) {
        String dictFile = getOption("dictionary");
        if ("text".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
        } else if ("sequencefile".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
        } else {
            // TODO: support Lucene's FST as a dictionary type
            throw new IOException("Invalid dictionary type: " + dictionaryType);
        }
    }

    Set<String> filters;
    if (hasOption("filter")) {
        filters = Sets.newHashSet(getOptions("filter"));
    } else {
        filters = null;
    }

    boolean useCSV = hasOption("csv");

    boolean sizeOnly = hasOption("sizeOnly");
    boolean nameOnly = hasOption("nameOnly");
    boolean namesAsComments = hasOption("namesAsComments");
    boolean transposeKeyValue = hasOption("vectorAsKey");
    Writer writer;
    boolean shouldClose;
    File output = getOutputFile();
    if (output != null) {
        shouldClose = true;
        log.info("Output file: {}", output);
        Files.createParentDirs(output);
        writer = Files.newWriter(output, Charsets.UTF_8);
    } else {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    }
    try {
        boolean printKey = hasOption("printKey");
        if (useCSV && dictionary != null) {
            writer.write("#");
            for (int j = 0; j < dictionary.length; j++) {
                writer.write(dictionary[j]);
                if (j < dictionary.length - 1) {
                    writer.write(',');
                }
            }
            writer.write('\n');
        }
        Long numItems = null;
        if (hasOption("numItems")) {
            numItems = Long.parseLong(getOption("numItems"));
            if (quiet) {
                writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
            }
        }
        int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize"))
                : Integer.MAX_VALUE;
        long itemCount = 0;
        int fileCount = 0;
        for (Path path : pathArr) {
            if (numItems != null && numItems <= itemCount) {
                break;
            }
            if (quiet) {
                log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length);
            }
            SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(
                    path, true, conf);
            Iterator<Pair<Writable, Writable>> iterator = iterable.iterator();
            long i = 0;
            while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                Pair<Writable, Writable> record = iterator.next();
                Writable keyWritable = record.getFirst();
                Writable valueWritable = record.getSecond();
                if (printKey) {
                    Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                    writer.write(notTheVectorWritable.toString());
                    writer.write('\t');
                }
                Vector vector;
                try {
                    vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get();
                } catch (ClassCastException e) {
                    if ((transposeKeyValue ? keyWritable
                            : valueWritable) instanceof WeightedPropertyVectorWritable) {
                        vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable
                                : valueWritable)).getVector();
                    } else {
                        throw e;
                    }
                }
                if (filters != null && vector instanceof NamedVector
                        && !filters.contains(((NamedVector) vector).getName())) {
                    // we are filtering out this item, skip
                    continue;
                }
                if (sizeOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write(":");
                    } else {
                        writer.write(String.valueOf(i++));
                        writer.write(":");
                    }
                    writer.write(String.valueOf(vector.size()));
                    writer.write('\n');
                } else if (nameOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write('\n');
                    }
                } else {
                    String fmtStr;
                    if (useCSV) {
                        fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
                    } else {
                        fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
                                sortVectors);
                    }
                    writer.write(fmtStr);
                    writer.write('\n');
                }
                itemCount++;
            }
        }
        writer.flush();
    } finally {
        if (shouldClose) {
            Closeables.close(writer, false);
        }
    }

    return 0;
}

From source file:eml.studio.server.util.HDFSIO.java

License:Open Source License

/** Return the paths of files under directory uri */
public static Path[] list(String uri) throws IOException {

    Path path = new Path(uri);
    if (!fs.exists(path))
        return null;
    FileStatus[] status = fs.listStatus(new Path(uri));
    Path[] listedPaths = FileUtil.stat2Paths(status);
    return listedPaths;
}

From source file:gr.ntua.h2rdf.byteImport.HexastoreBulkImport.java

License:Open Source License

private void loadHFiles() throws Exception {
    conf = HBaseConfiguration.create();//from   ww  w .  j a  v a 2 s.  c  om
    conf.addResource("hbase-default.xml");
    conf.addResource("hbase-site.xml");
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;

            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("A");
    desc.addFamily(family);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        //hadmin.disableTable(TABLE_NAME);
        //hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:gr.ntua.h2rdf.client.ResultSet.java

License:Open Source License

public ResultSet(String out, H2RDFConf hconf) {
    //System.out.println(out);
    Configuration conf = hconf.getConf();//new Configuration();
    //System.out.println(conf.get("fs.default.name"));
    try {// w ww.  j a  v  a 2 s. c om
        try {
            //Configuration c = HBaseConfiguration.create();
            this.table = new HTable(conf, hconf.getTable());
            fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser());
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        //fs.setWorkingDirectory(new Path("/user/arcomem/"));
        if (out.startsWith("output/")) {
            Path p = new Path(out);
            o = p;
            if (fs.isFile(p)) {//file
                outfiles = new Path[1];
                outfiles[0] = p;
                filesNo = 1;
                nextfile = 1;
                FSDataInputStream o = fs.open(p);
                outfile = new BufferedReader(new InputStreamReader(o));
            } else if (fs.exists(p)) {//MapReduce folder
                Path[] outf = FileUtil.stat2Paths(fs.listStatus(p));
                int paths = 0;
                outfiles = new Path[outf.length];
                for (Path f : outf) {
                    if (f.getName().startsWith("part")) {
                        outfiles[paths] = f;
                        paths++;
                    }
                }
                filesNo = paths;
                nextfile = 1;
                FSDataInputStream o = fs.open(outfiles[0]);
                outfile = new BufferedReader(new InputStreamReader(o));
            }
        } else {
            o = null;
            filesNo = 1;
            nextfile = 1;
            InputStream is = new ByteArrayInputStream(out.getBytes());
            outfile = new BufferedReader(new InputStreamReader(is));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:gr.ntua.h2rdf.client.ResultSetOpenRDF.java

License:Open Source License

public ResultSetOpenRDF(String out, H2RDFConf hconf) {
    //System.out.println(out);
    Configuration conf = hconf.getConf();// new Configuration();
    //System.out.println(conf.get("fs.default.name"));
    try {/*w w  w. ja v  a 2s. c o  m*/
        try {
            //Configuration c = HBaseConfiguration.create();
            this.table = new HTable(conf, hconf.getTable());
            fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser());
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        //fs.setWorkingDirectory(new Path("/user/arcomem/"));
        if (out.startsWith("output/")) {
            Path p = new Path(out);
            o = p;
            if (fs.isFile(p)) {//file
                outfiles = new Path[1];
                outfiles[0] = p;
                filesNo = 1;
                nextfile = 1;
                FSDataInputStream o = fs.open(p);
                outfile = new BufferedReader(new InputStreamReader(o));
            } else if (fs.exists(p)) {//MapReduce folder
                Path[] outf = FileUtil.stat2Paths(fs.listStatus(p));
                int paths = 0;
                outfiles = new Path[outf.length];
                for (Path f : outf) {
                    if (f.getName().startsWith("part")) {
                        outfiles[paths] = f;
                        paths++;
                    }
                }
                filesNo = paths;
                nextfile = 1;
                FSDataInputStream o = fs.open(outfiles[0]);
                outfile = new BufferedReader(new InputStreamReader(o));
            }
        } else {
            o = null;
            filesNo = 1;
            nextfile = 1;
            InputStream is = new ByteArrayInputStream(out.getBytes());
            outfile = new BufferedReader(new InputStreamReader(is));
        }
        lineFinished = true;
    } catch (IOException e) {
        e.printStackTrace();
    }
}