Example usage for org.apache.hadoop.fs FileUtil stat2Paths

List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats) 

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

  public static void main(String[] args) throws Exception {
  String uri = args[0];/*from ww w .ja va2s .c  o m*/
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(URI.create(uri), conf);
    
  Path[] paths = new Path[args.length];
  for (int i = 0; i < paths.length; i++) {
    paths[i] = new Path(args[i]);
  }
    
  FileStatus[] status = fs.listStatus(paths); // XXX Filesystem.listStatus(Path[]) -> FileStatus (see below)
  Path[] listedPaths = FileUtil.stat2Paths(status); // XXX FileStatus to Path[]
  for (Path p : listedPaths) {
    System.out.println(p);
  }
}

From source file:crunch.MaxTemperature.java

License:Apache License

  private Path[] glob(String pattern) throws IOException {
  return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern))); // XXX 
}

From source file:crunch.MaxTemperature.java

License:Apache License

  private Path[] glob(String pattern, PathFilter pathFilter) throws IOException {
  return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern), pathFilter)); // XXX
}

From source file:crunch.MaxTemperature.java

License:Apache License

public void test() throws Exception {
        Configuration conf = createJobConf();

        Path localInput = new Path("input/ncdc/micro");
        Path input = getInputDir();
        Path output = getOutputDir();

        // Copy input data into test HDFS
        getFileSystem().copyFromLocalFile(localInput, input);

        MaxTemperatureDriver driver = new MaxTemperatureDriver();
        driver.setConf(conf);/*from  w  w w .  j ava2  s  . c  om*/

        int exitCode = driver.run(new String[] { input.toString(), output.toString() });
        assertThat(exitCode, is(0));

        // Check the output is as expected
        Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        InputStream in = getFileSystem().open(outputFiles[0]);
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        assertThat(reader.readLine(), is("1949\t111"));
        assertThat(reader.readLine(), is("1950\t22"));
        assertThat(reader.readLine(), nullValue());
        reader.close();
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkOutput(Configuration conf, Path output) throws IOException {
        FileSystem fs = FileSystem.getLocal(conf);
        Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        BufferedReader actual = asBufferedReader(fs.open(outputFiles[0]));
        BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt"));
        String expectedLine;// w w  w  .  j a v a2 s .com
        while ((expectedLine = expected.readLine()) != null) {
            assertThat(actual.readLine(), is(expectedLine));
        }
        assertThat(actual.readLine(), nullValue());
        actual.close();
        expected.close();
    }

From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    /**/*ww w. j av  a 2  s.co m*/
     * Option seqOpt =
     * obuilder.withLongName("seqFile").withRequired(false).withArgument(
     * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()).
     * withDescription(
     * "The Sequence File containing the Vectors").withShortName
     * ("s").create(); Option dirOpt =
     * obuilder.withLongName("seqDirectory").
     * withRequired(false).withArgument(
     * abuilder.withName("seqDirectory").withMinimum
     * (1).withMaximum(1).create()) .withDescription(
     * "The directory containing Sequence File of Vectors")
     * .withShortName("d").create();
     */
    addInputOption();
    addOutputOption();
    addOption("useKey", "u", "If the Key is a vector than dump that instead");
    addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true");
    addOption("dictionary", "d", "The dictionary file.", false);
    addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false);
    addOption("csv", "c",
            "Output the Vector as CSV.  Otherwise it substitutes in the terms for vector cell entries");
    addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector "
            + "(if the vector is one) printing out the name");
    addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)");
    addOption("sortVectors", "sort",
            "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order");
    addOption("quiet", "q", "Print only file contents");
    addOption("sizeOnly", "sz", "Dump only the size of the vector");
    addOption("numItems", "ni", "Output at most <n> vecors", false);
    addOption("vectorSize", "vs",
            "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort",
            false);
    addOption(buildOption("filter", "fi",
            "Only dump out those vectors whose name matches the filter."
                    + "  Multiple items may be specified by repeating the argument.",
            true, 1, Integer.MAX_VALUE, false, null));

    if (parseArguments(args, false, true) == null) {
        return -1;
    }

    Path[] pathArr;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path input = getInputPath();
    FileStatus fileStatus = fs.getFileStatus(input);
    if (fileStatus.isDir()) {
        pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter()));
    } else {
        FileStatus[] inputPaths = fs.globStatus(input);
        pathArr = new Path[inputPaths.length];
        int i = 0;
        for (FileStatus fstatus : inputPaths) {
            pathArr[i++] = fstatus.getPath();
        }
    }

    String dictionaryType = getOption("dictionaryType", "text");

    boolean sortVectors = hasOption("sortVectors");
    boolean quiet = hasOption("quiet");
    if (!quiet) {
        log.info("Sort? {}", sortVectors);
    }

    String[] dictionary = null;
    if (hasOption("dictionary")) {
        String dictFile = getOption("dictionary");
        if ("text".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
        } else if ("sequencefile".equals(dictionaryType)) {
            dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
        } else {
            // TODO: support Lucene's FST as a dictionary type
            throw new IOException("Invalid dictionary type: " + dictionaryType);
        }
    }

    Set<String> filters;
    if (hasOption("filter")) {
        filters = Sets.newHashSet(getOptions("filter"));
    } else {
        filters = null;
    }

    boolean useCSV = hasOption("csv");

    boolean sizeOnly = hasOption("sizeOnly");
    boolean nameOnly = hasOption("nameOnly");
    boolean namesAsComments = hasOption("namesAsComments");
    boolean transposeKeyValue = hasOption("vectorAsKey");
    Writer writer;
    boolean shouldClose;
    File output = getOutputFile();
    if (output != null) {
        shouldClose = true;
        log.info("Output file: {}", output);
        Files.createParentDirs(output);
        writer = Files.newWriter(output, Charsets.UTF_8);
    } else {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    }
    try {
        boolean printKey = hasOption("printKey");
        if (useCSV && dictionary != null) {
            writer.write("#");
            for (int j = 0; j < dictionary.length; j++) {
                writer.write(dictionary[j]);
                if (j < dictionary.length - 1) {
                    writer.write(',');
                }
            }
            writer.write('\n');
        }
        Long numItems = null;
        if (hasOption("numItems")) {
            numItems = Long.parseLong(getOption("numItems"));
            if (quiet) {
                writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n');
            }
        }
        int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize"))
                : Integer.MAX_VALUE;
        long itemCount = 0;
        int fileCount = 0;
        for (Path path : pathArr) {
            if (numItems != null && numItems <= itemCount) {
                break;
            }
            if (quiet) {
                log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length);
            }
            SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>(
                    path, true, conf);
            Iterator<Pair<Writable, Writable>> iterator = iterable.iterator();
            long i = 0;
            while (iterator.hasNext() && (numItems == null || itemCount < numItems)) {
                Pair<Writable, Writable> record = iterator.next();
                Writable keyWritable = record.getFirst();
                Writable valueWritable = record.getSecond();
                if (printKey) {
                    Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
                    writer.write(notTheVectorWritable.toString());
                    writer.write('\t');
                }
                Vector vector;
                try {
                    vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get();
                } catch (ClassCastException e) {
                    if ((transposeKeyValue ? keyWritable
                            : valueWritable) instanceof WeightedPropertyVectorWritable) {
                        vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable
                                : valueWritable)).getVector();
                    } else {
                        throw e;
                    }
                }
                if (filters != null && vector instanceof NamedVector
                        && !filters.contains(((NamedVector) vector).getName())) {
                    // we are filtering out this item, skip
                    continue;
                }
                if (sizeOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write(":");
                    } else {
                        writer.write(String.valueOf(i++));
                        writer.write(":");
                    }
                    writer.write(String.valueOf(vector.size()));
                    writer.write('\n');
                } else if (nameOnly) {
                    if (vector instanceof NamedVector) {
                        writer.write(((NamedVector) vector).getName());
                        writer.write('\n');
                    }
                } else {
                    String fmtStr;
                    if (useCSV) {
                        fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
                    } else {
                        fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
                                sortVectors);
                    }
                    writer.write(fmtStr);
                    writer.write('\n');
                }
                itemCount++;
            }
        }
        writer.flush();
    } finally {
        if (shouldClose) {
            Closeables.close(writer, false);
        }
    }

    return 0;
}

From source file:eml.studio.server.util.HDFSIO.java

License:Open Source License

/** Return the paths of files under directory uri */
public static Path[] list(String uri) throws IOException {

    Path path = new Path(uri);
    if (!fs.exists(path))
        return null;
    FileStatus[] status = fs.listStatus(new Path(uri));
    Path[] listedPaths = FileUtil.stat2Paths(status);
    return listedPaths;
}

From source file:gr.ntua.h2rdf.byteImport.HexastoreBulkImport.java

License:Open Source License

private void loadHFiles() throws Exception {
    conf = HBaseConfiguration.create();//from   ww  w .  j a  v a 2 s.  c  om
    conf.addResource("hbase-default.xml");
    conf.addResource("hbase-site.xml");
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;

            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("A");
    desc.addFamily(family);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        //hadmin.disableTable(TABLE_NAME);
        //hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:gr.ntua.h2rdf.client.ResultSet.java

License:Open Source License

public ResultSet(String out, H2RDFConf hconf) {
    //System.out.println(out);
    Configuration conf = hconf.getConf();//new Configuration();
    //System.out.println(conf.get("fs.default.name"));
    try {// w ww.  j a  v  a 2 s. c om
        try {
            //Configuration c = HBaseConfiguration.create();
            this.table = new HTable(conf, hconf.getTable());
            fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser());
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        //fs.setWorkingDirectory(new Path("/user/arcomem/"));
        if (out.startsWith("output/")) {
            Path p = new Path(out);
            o = p;
            if (fs.isFile(p)) {//file
                outfiles = new Path[1];
                outfiles[0] = p;
                filesNo = 1;
                nextfile = 1;
                FSDataInputStream o = fs.open(p);
                outfile = new BufferedReader(new InputStreamReader(o));
            } else if (fs.exists(p)) {//MapReduce folder
                Path[] outf = FileUtil.stat2Paths(fs.listStatus(p));
                int paths = 0;
                outfiles = new Path[outf.length];
                for (Path f : outf) {
                    if (f.getName().startsWith("part")) {
                        outfiles[paths] = f;
                        paths++;
                    }
                }
                filesNo = paths;
                nextfile = 1;
                FSDataInputStream o = fs.open(outfiles[0]);
                outfile = new BufferedReader(new InputStreamReader(o));
            }
        } else {
            o = null;
            filesNo = 1;
            nextfile = 1;
            InputStream is = new ByteArrayInputStream(out.getBytes());
            outfile = new BufferedReader(new InputStreamReader(is));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:gr.ntua.h2rdf.client.ResultSetOpenRDF.java

License:Open Source License

public ResultSetOpenRDF(String out, H2RDFConf hconf) {
    //System.out.println(out);
    Configuration conf = hconf.getConf();// new Configuration();
    //System.out.println(conf.get("fs.default.name"));
    try {/*w w  w. ja v  a 2s. c o  m*/
        try {
            //Configuration c = HBaseConfiguration.create();
            this.table = new HTable(conf, hconf.getTable());
            fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser());
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        //fs.setWorkingDirectory(new Path("/user/arcomem/"));
        if (out.startsWith("output/")) {
            Path p = new Path(out);
            o = p;
            if (fs.isFile(p)) {//file
                outfiles = new Path[1];
                outfiles[0] = p;
                filesNo = 1;
                nextfile = 1;
                FSDataInputStream o = fs.open(p);
                outfile = new BufferedReader(new InputStreamReader(o));
            } else if (fs.exists(p)) {//MapReduce folder
                Path[] outf = FileUtil.stat2Paths(fs.listStatus(p));
                int paths = 0;
                outfiles = new Path[outf.length];
                for (Path f : outf) {
                    if (f.getName().startsWith("part")) {
                        outfiles[paths] = f;
                        paths++;
                    }
                }
                filesNo = paths;
                nextfile = 1;
                FSDataInputStream o = fs.open(outfiles[0]);
                outfile = new BufferedReader(new InputStreamReader(o));
            }
        } else {
            o = null;
            filesNo = 1;
            nextfile = 1;
            InputStream is = new ByteArrayInputStream(out.getBytes());
            outfile = new BufferedReader(new InputStreamReader(is));
        }
        lineFinished = true;
    } catch (IOException e) {
        e.printStackTrace();
    }
}