List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths
public static Path[] stat2Paths(FileStatus[] stats)
From source file:crunch.MaxTemperature.java
License:Apache License
public static void main(String[] args) throws Exception { String uri = args[0];/*from ww w .ja va2s .c o m*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); // XXX Filesystem.listStatus(Path[]) -> FileStatus (see below) Path[] listedPaths = FileUtil.stat2Paths(status); // XXX FileStatus to Path[] for (Path p : listedPaths) { System.out.println(p); } }
From source file:crunch.MaxTemperature.java
License:Apache License
private Path[] glob(String pattern) throws IOException { return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern))); // XXX }
From source file:crunch.MaxTemperature.java
License:Apache License
private Path[] glob(String pattern, PathFilter pathFilter) throws IOException { return FileUtil.stat2Paths(fs.globStatus(new Path(BASE_PATH + pattern), pathFilter)); // XXX }
From source file:crunch.MaxTemperature.java
License:Apache License
public void test() throws Exception { Configuration conf = createJobConf(); Path localInput = new Path("input/ncdc/micro"); Path input = getInputDir(); Path output = getOutputDir(); // Copy input data into test HDFS getFileSystem().copyFromLocalFile(localInput, input); MaxTemperatureDriver driver = new MaxTemperatureDriver(); driver.setConf(conf);/*from w w w . j ava2 s . c om*/ int exitCode = driver.run(new String[] { input.toString(), output.toString() }); assertThat(exitCode, is(0)); // Check the output is as expected Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter())); assertThat(outputFiles.length, is(1)); InputStream in = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertThat(reader.readLine(), is("1949\t111")); assertThat(reader.readLine(), is("1950\t22")); assertThat(reader.readLine(), nullValue()); reader.close(); }
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkOutput(Configuration conf, Path output) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter())); assertThat(outputFiles.length, is(1)); BufferedReader actual = asBufferedReader(fs.open(outputFiles[0])); BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt")); String expectedLine;// w w w . j a v a2 s .com while ((expectedLine = expected.readLine()) != null) { assertThat(actual.readLine(), is(expectedLine)); } assertThat(actual.readLine(), nullValue()); actual.close(); expected.close(); }
From source file:de.rwth.i9.palm.analytics.algorithm.lda.CustomVectorDumper.java
License:Apache License
@Override public int run(String[] args) throws Exception { /**/*ww w. j av a 2 s.co m*/ * Option seqOpt = * obuilder.withLongName("seqFile").withRequired(false).withArgument( * abuilder.withName("seqFile").withMinimum(1).withMaximum(1).create()). * withDescription( * "The Sequence File containing the Vectors").withShortName * ("s").create(); Option dirOpt = * obuilder.withLongName("seqDirectory"). * withRequired(false).withArgument( * abuilder.withName("seqDirectory").withMinimum * (1).withMaximum(1).create()) .withDescription( * "The directory containing Sequence File of Vectors") * .withShortName("d").create(); */ addInputOption(); addOutputOption(); addOption("useKey", "u", "If the Key is a vector than dump that instead"); addOption("printKey", "p", "Print out the key as well, delimited by tab (or the value if useKey is true"); addOption("dictionary", "d", "The dictionary file.", false); addOption("dictionaryType", "dt", "The dictionary file type (text|seqfile)", false); addOption("csv", "c", "Output the Vector as CSV. Otherwise it substitutes in the terms for vector cell entries"); addOption("namesAsComments", "n", "If using CSV output, optionally add a comment line for each NamedVector " + "(if the vector is one) printing out the name"); addOption("nameOnly", "N", "Use the name as the value for each NamedVector (skip other vectors)"); addOption("sortVectors", "sort", "Sort output key/value pairs of the vector entries in abs magnitude " + "descending order"); addOption("quiet", "q", "Print only file contents"); addOption("sizeOnly", "sz", "Dump only the size of the vector"); addOption("numItems", "ni", "Output at most <n> vecors", false); addOption("vectorSize", "vs", "Truncate vectors to <vs> length when dumping (most useful when in" + " conjunction with -sort", false); addOption(buildOption("filter", "fi", "Only dump out those vectors whose name matches the filter." + " Multiple items may be specified by repeating the argument.", true, 1, Integer.MAX_VALUE, false, null)); if (parseArguments(args, false, true) == null) { return -1; } Path[] pathArr; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path input = getInputPath(); FileStatus fileStatus = fs.getFileStatus(input); if (fileStatus.isDir()) { pathArr = FileUtil.stat2Paths(fs.listStatus(input, new OutputFilesFilter())); } else { FileStatus[] inputPaths = fs.globStatus(input); pathArr = new Path[inputPaths.length]; int i = 0; for (FileStatus fstatus : inputPaths) { pathArr[i++] = fstatus.getPath(); } } String dictionaryType = getOption("dictionaryType", "text"); boolean sortVectors = hasOption("sortVectors"); boolean quiet = hasOption("quiet"); if (!quiet) { log.info("Sort? {}", sortVectors); } String[] dictionary = null; if (hasOption("dictionary")) { String dictFile = getOption("dictionary"); if ("text".equals(dictionaryType)) { dictionary = VectorHelper.loadTermDictionary(new File(dictFile)); } else if ("sequencefile".equals(dictionaryType)) { dictionary = VectorHelper.loadTermDictionary(conf, dictFile); } else { // TODO: support Lucene's FST as a dictionary type throw new IOException("Invalid dictionary type: " + dictionaryType); } } Set<String> filters; if (hasOption("filter")) { filters = Sets.newHashSet(getOptions("filter")); } else { filters = null; } boolean useCSV = hasOption("csv"); boolean sizeOnly = hasOption("sizeOnly"); boolean nameOnly = hasOption("nameOnly"); boolean namesAsComments = hasOption("namesAsComments"); boolean transposeKeyValue = hasOption("vectorAsKey"); Writer writer; boolean shouldClose; File output = getOutputFile(); if (output != null) { shouldClose = true; log.info("Output file: {}", output); Files.createParentDirs(output); writer = Files.newWriter(output, Charsets.UTF_8); } else { shouldClose = false; writer = new OutputStreamWriter(System.out, Charsets.UTF_8); } try { boolean printKey = hasOption("printKey"); if (useCSV && dictionary != null) { writer.write("#"); for (int j = 0; j < dictionary.length; j++) { writer.write(dictionary[j]); if (j < dictionary.length - 1) { writer.write(','); } } writer.write('\n'); } Long numItems = null; if (hasOption("numItems")) { numItems = Long.parseLong(getOption("numItems")); if (quiet) { writer.append("#Max Items to dump: ").append(String.valueOf(numItems)).append('\n'); } } int maxIndexesPerVector = hasOption("vectorSize") ? Integer.parseInt(getOption("vectorSize")) : Integer.MAX_VALUE; long itemCount = 0; int fileCount = 0; for (Path path : pathArr) { if (numItems != null && numItems <= itemCount) { break; } if (quiet) { log.info("Processing file '{}' ({}/{})", path, ++fileCount, pathArr.length); } SequenceFileIterable<Writable, Writable> iterable = new SequenceFileIterable<Writable, Writable>( path, true, conf); Iterator<Pair<Writable, Writable>> iterator = iterable.iterator(); long i = 0; while (iterator.hasNext() && (numItems == null || itemCount < numItems)) { Pair<Writable, Writable> record = iterator.next(); Writable keyWritable = record.getFirst(); Writable valueWritable = record.getSecond(); if (printKey) { Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable; writer.write(notTheVectorWritable.toString()); writer.write('\t'); } Vector vector; try { vector = ((VectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).get(); } catch (ClassCastException e) { if ((transposeKeyValue ? keyWritable : valueWritable) instanceof WeightedPropertyVectorWritable) { vector = ((WeightedPropertyVectorWritable) (transposeKeyValue ? keyWritable : valueWritable)).getVector(); } else { throw e; } } if (filters != null && vector instanceof NamedVector && !filters.contains(((NamedVector) vector).getName())) { // we are filtering out this item, skip continue; } if (sizeOnly) { if (vector instanceof NamedVector) { writer.write(((NamedVector) vector).getName()); writer.write(":"); } else { writer.write(String.valueOf(i++)); writer.write(":"); } writer.write(String.valueOf(vector.size())); writer.write('\n'); } else if (nameOnly) { if (vector instanceof NamedVector) { writer.write(((NamedVector) vector).getName()); writer.write('\n'); } } else { String fmtStr; if (useCSV) { fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments); } else { fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector, sortVectors); } writer.write(fmtStr); writer.write('\n'); } itemCount++; } } writer.flush(); } finally { if (shouldClose) { Closeables.close(writer, false); } } return 0; }
From source file:eml.studio.server.util.HDFSIO.java
License:Open Source License
/** Return the paths of files under directory uri */ public static Path[] list(String uri) throws IOException { Path path = new Path(uri); if (!fs.exists(path)) return null; FileStatus[] status = fs.listStatus(new Path(uri)); Path[] listedPaths = FileUtil.stat2Paths(status); return listedPaths; }
From source file:gr.ntua.h2rdf.byteImport.HexastoreBulkImport.java
License:Open Source License
private void loadHFiles() throws Exception { conf = HBaseConfiguration.create();//from ww w . j a v a 2 s. c om conf.addResource("hbase-default.xml"); conf.addResource("hbase-site.xml"); HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //} FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("A"); desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { //hadmin.disableTable(TABLE_NAME); //hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:gr.ntua.h2rdf.client.ResultSet.java
License:Open Source License
public ResultSet(String out, H2RDFConf hconf) { //System.out.println(out); Configuration conf = hconf.getConf();//new Configuration(); //System.out.println(conf.get("fs.default.name")); try {// w ww. j a v a 2 s. c om try { //Configuration c = HBaseConfiguration.create(); this.table = new HTable(conf, hconf.getTable()); fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser()); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } //fs.setWorkingDirectory(new Path("/user/arcomem/")); if (out.startsWith("output/")) { Path p = new Path(out); o = p; if (fs.isFile(p)) {//file outfiles = new Path[1]; outfiles[0] = p; filesNo = 1; nextfile = 1; FSDataInputStream o = fs.open(p); outfile = new BufferedReader(new InputStreamReader(o)); } else if (fs.exists(p)) {//MapReduce folder Path[] outf = FileUtil.stat2Paths(fs.listStatus(p)); int paths = 0; outfiles = new Path[outf.length]; for (Path f : outf) { if (f.getName().startsWith("part")) { outfiles[paths] = f; paths++; } } filesNo = paths; nextfile = 1; FSDataInputStream o = fs.open(outfiles[0]); outfile = new BufferedReader(new InputStreamReader(o)); } } else { o = null; filesNo = 1; nextfile = 1; InputStream is = new ByteArrayInputStream(out.getBytes()); outfile = new BufferedReader(new InputStreamReader(is)); } } catch (IOException e) { e.printStackTrace(); } }
From source file:gr.ntua.h2rdf.client.ResultSetOpenRDF.java
License:Open Source License
public ResultSetOpenRDF(String out, H2RDFConf hconf) { //System.out.println(out); Configuration conf = hconf.getConf();// new Configuration(); //System.out.println(conf.get("fs.default.name")); try {/*w w w. ja v a 2s. c o m*/ try { //Configuration c = HBaseConfiguration.create(); this.table = new HTable(conf, hconf.getTable()); fs = FileSystem.get(new URI(conf.get("fs.default.name")), conf, hconf.getUser()); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } //fs.setWorkingDirectory(new Path("/user/arcomem/")); if (out.startsWith("output/")) { Path p = new Path(out); o = p; if (fs.isFile(p)) {//file outfiles = new Path[1]; outfiles[0] = p; filesNo = 1; nextfile = 1; FSDataInputStream o = fs.open(p); outfile = new BufferedReader(new InputStreamReader(o)); } else if (fs.exists(p)) {//MapReduce folder Path[] outf = FileUtil.stat2Paths(fs.listStatus(p)); int paths = 0; outfiles = new Path[outf.length]; for (Path f : outf) { if (f.getName().startsWith("part")) { outfiles[paths] = f; paths++; } } filesNo = paths; nextfile = 1; FSDataInputStream o = fs.open(outfiles[0]); outfile = new BufferedReader(new InputStreamReader(o)); } } else { o = null; filesNo = 1; nextfile = 1; InputStream is = new ByteArrayInputStream(out.getBytes()); outfile = new BufferedReader(new InputStreamReader(is)); } lineFinished = true; } catch (IOException e) { e.printStackTrace(); } }