List of usage examples for org.apache.hadoop.fs FileSystem globStatus
public FileStatus[] globStatus(Path pathPattern) throws IOException
Return all the files that match filePattern and are not checksum files.
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** * Get a listing of all files in that match the file pattern <i>srcf</i>. * @param srcf a file pattern specifying source files * @param recursive if need to list files in subdirs * @throws IOException // w ww.j av a 2 s . c om * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) */ private int ls(String srcf, boolean recursive) throws IOException { Path srcPath = new Path(srcf); FileSystem srcFs = srcPath.getFileSystem(this.getConf()); FileStatus[] srcs = srcFs.globStatus(srcPath); if (srcs == null || srcs.length == 0) { throw new FileNotFoundException("Cannot access " + srcf + ": No such file or directory."); } boolean printHeader = (srcs.length == 1) ? true : false; int numOfErrors = 0; for (int i = 0; i < srcs.length; i++) { numOfErrors += ls(srcs[i], srcFs, recursive, printHeader); } return numOfErrors == 0 ? 0 : -1; }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException { int errors = 0; for (int i = startIndex; i < args.length; i++) { Path srcPath = new Path(args[i]); FileSystem srcFs = srcPath.getFileSystem(getConf()); Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); // if nothing matches to given glob pattern then increment error count if (paths.length == 0) { System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'"); errors++;/*w w w .ja va2 s. c o m*/ } for (Path path : paths) { try { FileStatus file = srcFs.getFileStatus(path); if (file == null) { System.err.println(handler.getName() + ": could not get status for '" + path + "'"); errors++; } else { errors += runCmdHandler(handler, file, srcFs, recursive); } } catch (IOException e) { String msg = (e.getMessage() != null ? e.getLocalizedMessage() : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null")); System.err.println( handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]); errors++; } } } return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0; }
From source file:com.ibm.jaql.lang.expr.system.LsFn.java
License:Apache License
@Override public JsonIterator iter(final Context context) throws Exception { JsonString glob = (JsonString) exprs[0].eval(context); // Configuration conf = context.getConfiguration(); Configuration conf = new Configuration(); // TODO: get from context, incl options //URI uri;//from www.java 2 s . com //FileSystem fs = FileSystem.get(uri, conf); Path inpath = new Path(glob.toString()); FileSystem fs = inpath.getFileSystem(conf); //final FileStatus[] stats = fs.listStatus(path, filter); final FileStatus[] stats = fs.globStatus(inpath); if (stats == null || stats.length == 0) { return JsonIterator.EMPTY; } final MutableJsonDate accessTime = new MutableJsonDate(); final MutableJsonDate modifyTime = new MutableJsonDate(); final MutableJsonLong length = new MutableJsonLong(); final MutableJsonLong blockSize = new MutableJsonLong(); final MutableJsonLong replication = new MutableJsonLong(); final MutableJsonString path = new MutableJsonString(); final MutableJsonString owner = new MutableJsonString(); final MutableJsonString group = new MutableJsonString(); final MutableJsonString permission = new MutableJsonString(); final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path, owner, group, permission }; final BufferedJsonRecord rec = new BufferedJsonRecord(); rec.set(LsField.names, values, values.length, false); return new JsonIterator(rec) { int i = 0; @Override public boolean moveNext() throws Exception { if (i >= stats.length) { return false; } FileStatus stat = stats[i++]; // fs.getUri().toString(); long x = HadoopShim.getAccessTime(stat); if (x <= 0) { values[LsField.ACCESS_TIME.ordinal()] = null; } else { accessTime.set(x); values[LsField.ACCESS_TIME.ordinal()] = accessTime; } modifyTime.set(stat.getModificationTime()); length.set(stat.getLen()); blockSize.set(stat.getBlockSize()); replication.set(stat.getReplication()); path.setCopy(stat.getPath().toString()); owner.setCopy(stat.getOwner()); group.setCopy(stat.getGroup()); permission.setCopy(stat.getPermission().toString()); return true; } }; }
From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java
License:Open Source License
public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws IOException, SAXException, ParserConfigurationException { BasicDBList dbl = new BasicDBList(); PropertiesManager props = new PropertiesManager(); Configuration conf = getConfiguration(props); Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false); FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*")); for (FileStatus file : files) { if (file.getLen() > 0) { FSDataInputStream in = fs.open(file.getPath()); BufferedReader bin = new BufferedReader(new InputStreamReader(in)); for (;;) { String s = bin.readLine(); if (null == s) break; String[] keyValue = s.split("\t", 2); BasicDBObject dbo = new BasicDBObject(); if (keyValue.length > 1) { dbo.put("key", keyValue[0]); dbo.put("value", keyValue[1]); } else { dbo.put("value", keyValue[0]); }/* ww w .j a va 2 s . co m*/ dbl.add(dbo); } in.close(); } } return dbl; }
From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java
License:Apache License
/** * Implementation of CopyListing::buildListing(). * Creates the copy listing by "globbing" all source-paths. * @param pathToListingFile: The location at which the copy-listing file * is to be created. * @param options: Input Options for DistCp (indicating source/target paths.) * @throws IOException/*from w w w .j a v a 2 s . c om*/ */ @Override public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { List<Path> globbedPaths = new ArrayList<Path>(); if (options.getSourcePaths().isEmpty()) { throw new InvalidInputException("Nothing to process. Source paths::EMPTY"); } for (Path p : options.getSourcePaths()) { FileSystem fs = p.getFileSystem(getConf()); FileStatus[] inputs = fs.globStatus(p); if (inputs != null && inputs.length > 0) { for (FileStatus onePath : inputs) { globbedPaths.add(onePath.getPath()); } } else { throw new InvalidInputException(p + " doesn't exist"); } } DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath()); optionsGlobbed.setSyncFolder(options.shouldSyncFolder()); optionsGlobbed.setOverwrite(options.shouldOverwrite()); optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing()); optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath()); optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation()); optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing()); simpleListing.buildListing(pathToListingFile, optionsGlobbed); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteAttemptTempFiles(Path targetWorkPath, FileSystem targetFS, String jobId) throws IOException { FileStatus[] tempFiles = targetFS .globStatus(new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job", "attempt") + "*")); if (tempFiles != null && tempFiles.length > 0) { for (FileStatus file : tempFiles) { LOG.info("Cleaning up " + file.getPath()); targetFS.delete(file.getPath(), false); }/*from www . j ava 2 s .c o m*/ } }
From source file:com.linkedin.cubert.analyzer.physical.AggregateRewriter.java
License:Open Source License
private void readSummaryMetaData() throws IOException, AggregateRewriteException { FileSystem fs = FileSystem.get(new JobConf()); this.mvExists = false; FileStatus[] files = fs.globStatus(new Path(mvPath + "/avro/*.avro")); if (files != null && files.length > 0) { this.mvExists = true; processSummaryMetaData(mvPath);// www . ja v a 2 s. co m } }
From source file:com.linkedin.cubert.examples.ListFiles.java
License:Open Source License
@Override public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException, InterruptedException { List<String> files = new ArrayList<String>(); String dirsStr = JsonUtils.getText(json.get("args"), "dirs"); String[] dirs = CommonUtils.trim(dirsStr.split(",")); for (String dir : dirs) { Path path = new Path(dir); FileSystem fs = path.getFileSystem(PhaseContext.getConf()); FileStatus[] allStatus = fs.globStatus(path); if (allStatus == null || allStatus.length == 0) continue; for (FileStatus status : allStatus) { if (status.isDir()) { listFiles(fs, status.getPath(), files); } else { files.add(status.getPath().toUri().getPath()); }//from ww w . j ava 2 s .c o m } } iterator = files.iterator(); output = TupleFactory.getInstance().newTuple(1); }
From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java
License:Open Source License
public static void mergeDictionaries(Configuration conf, Path dir) throws IOException { Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>(); FileSystem fs = FileSystem.get(conf); Path currentDictPath = new Path(dir, "dictionary"); Schema schema = getSchema();/*from w w w . ja va2 s .c o m*/ // Read the existing dictionaries if (fs.exists(currentDictPath)) { dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf)); // move the current dictionary to new file Path oldPath = new Path(dir, "_dictionary.old"); fs.delete(oldPath, true); fs.rename(currentDictPath, oldPath); } // Read the new entries Path globPath = new Path(dir, "tmp/part-*"); FileStatus[] allStatus = fs.globStatus(globPath); for (FileStatus status : allStatus) { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new FsInput(status.getPath(), conf), datumReader); GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(); String colName = record.get("colname").toString(); String colValue = record.get("colvalue").toString(); CodeDictionary dict = dictionaries.get(colName); if (dict == null) { dict = new CodeDictionary(); dictionaries.put(colName, dict); } dict.addKey(colValue); } } // Write the dictionaries back DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); FSDataOutputStream out = fs.create(currentDictPath); dataFileWriter.create(schema, out); Record record = new Record(schema); for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) { String colName = entry.getKey(); CodeDictionary dict = entry.getValue(); for (String colValue : dict.keySet()) { int code = dict.getCodeForKey(colValue); record.put("colname", colName); record.put("colvalue", colValue); record.put("code", code); dataFileWriter.append(record); } } dataFileWriter.close(); }
From source file:com.linkedin.cubert.utils.AvroUtils.java
License:Open Source License
/** * Extracts the schema of an Avro file.//from www. j av a 2s . c om * * @param conf * @param path * @return * @throws IOException */ public static Schema getSchema(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.getFileStatus(path).isDir()) { Path globPath = new Path(path, "*.avro"); FileStatus[] allFiles = fs.globStatus(globPath); if (allFiles.length == 0) { throw new IOException("there are no files in " + path.toString()); } path = allFiles[0].getPath(); } System.out.println("Obtaining schema of avro file " + path.toString()); return getSchema(new FsInput(path, conf)); }