Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/**
 * Get a listing of all files in that match the file pattern <i>srcf</i>.
 * @param srcf a file pattern specifying source files
 * @param recursive if need to list files in subdirs
 * @throws IOException  //  w ww.j av  a 2  s  . c  om
 * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
 */
private int ls(String srcf, boolean recursive) throws IOException {
    Path srcPath = new Path(srcf);
    FileSystem srcFs = srcPath.getFileSystem(this.getConf());
    FileStatus[] srcs = srcFs.globStatus(srcPath);
    if (srcs == null || srcs.length == 0) {
        throw new FileNotFoundException("Cannot access " + srcf + ": No such file or directory.");
    }

    boolean printHeader = (srcs.length == 1) ? true : false;
    int numOfErrors = 0;
    for (int i = 0; i < srcs.length; i++) {
        numOfErrors += ls(srcs[i], srcFs, recursive, printHeader);
    }
    return numOfErrors == 0 ? 0 : -1;
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException {
    int errors = 0;

    for (int i = startIndex; i < args.length; i++) {
        Path srcPath = new Path(args[i]);
        FileSystem srcFs = srcPath.getFileSystem(getConf());
        Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
        // if nothing matches to given glob pattern then increment error count
        if (paths.length == 0) {
            System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'");
            errors++;/*w w w .ja  va2  s. c  o  m*/
        }
        for (Path path : paths) {
            try {
                FileStatus file = srcFs.getFileStatus(path);
                if (file == null) {
                    System.err.println(handler.getName() + ": could not get status for '" + path + "'");
                    errors++;
                } else {
                    errors += runCmdHandler(handler, file, srcFs, recursive);
                }
            } catch (IOException e) {
                String msg = (e.getMessage() != null ? e.getLocalizedMessage()
                        : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null"));
                System.err.println(
                        handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]);
                errors++;
            }
        }
    }

    return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0;
}

From source file:com.ibm.jaql.lang.expr.system.LsFn.java

License:Apache License

@Override
public JsonIterator iter(final Context context) throws Exception {
    JsonString glob = (JsonString) exprs[0].eval(context);
    // Configuration conf = context.getConfiguration();
    Configuration conf = new Configuration(); // TODO: get from context, incl options
    //URI uri;//from  www.java  2  s  .  com
    //FileSystem fs = FileSystem.get(uri, conf);
    Path inpath = new Path(glob.toString());
    FileSystem fs = inpath.getFileSystem(conf);
    //final FileStatus[] stats = fs.listStatus(path, filter);
    final FileStatus[] stats = fs.globStatus(inpath);

    if (stats == null || stats.length == 0) {
        return JsonIterator.EMPTY;
    }

    final MutableJsonDate accessTime = new MutableJsonDate();
    final MutableJsonDate modifyTime = new MutableJsonDate();
    final MutableJsonLong length = new MutableJsonLong();
    final MutableJsonLong blockSize = new MutableJsonLong();
    final MutableJsonLong replication = new MutableJsonLong();
    final MutableJsonString path = new MutableJsonString();
    final MutableJsonString owner = new MutableJsonString();
    final MutableJsonString group = new MutableJsonString();
    final MutableJsonString permission = new MutableJsonString();
    final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path,
            owner, group, permission };
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    rec.set(LsField.names, values, values.length, false);

    return new JsonIterator(rec) {
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= stats.length) {
                return false;
            }

            FileStatus stat = stats[i++];
            // fs.getUri().toString();
            long x = HadoopShim.getAccessTime(stat);
            if (x <= 0) {
                values[LsField.ACCESS_TIME.ordinal()] = null;
            } else {
                accessTime.set(x);
                values[LsField.ACCESS_TIME.ordinal()] = accessTime;
            }
            modifyTime.set(stat.getModificationTime());
            length.set(stat.getLen());
            blockSize.set(stat.getBlockSize());
            replication.set(stat.getReplication());
            path.setCopy(stat.getPath().toString());
            owner.setCopy(stat.getOwner());
            group.setCopy(stat.getGroup());
            permission.setCopy(stat.getPermission().toString());
            return true;
        }
    };
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws IOException, SAXException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*"));
    for (FileStatus file : files) {
        if (file.getLen() > 0) {
            FSDataInputStream in = fs.open(file.getPath());
            BufferedReader bin = new BufferedReader(new InputStreamReader(in));
            for (;;) {
                String s = bin.readLine();
                if (null == s)
                    break;

                String[] keyValue = s.split("\t", 2);
                BasicDBObject dbo = new BasicDBObject();
                if (keyValue.length > 1) {
                    dbo.put("key", keyValue[0]);
                    dbo.put("value", keyValue[1]);
                } else {
                    dbo.put("value", keyValue[0]);
                }/* ww  w .j  a va 2  s  . co m*/
                dbl.add(dbo);
            }
            in.close();
        }
    }
    return dbl;
}

From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java

License:Apache License

/**
 * Implementation of CopyListing::buildListing().
 * Creates the copy listing by "globbing" all source-paths.
 * @param pathToListingFile: The location at which the copy-listing file
 *                           is to be created.
 * @param options: Input Options for DistCp (indicating source/target paths.)
 * @throws IOException/*from w w w  .j  a  v a  2  s  . c  om*/
 */
@Override
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    List<Path> globbedPaths = new ArrayList<Path>();
    if (options.getSourcePaths().isEmpty()) {
        throw new InvalidInputException("Nothing to process. Source paths::EMPTY");
    }

    for (Path p : options.getSourcePaths()) {
        FileSystem fs = p.getFileSystem(getConf());
        FileStatus[] inputs = fs.globStatus(p);

        if (inputs != null && inputs.length > 0) {
            for (FileStatus onePath : inputs) {
                globbedPaths.add(onePath.getPath());
            }
        } else {
            throw new InvalidInputException(p + " doesn't exist");
        }
    }

    DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath());
    optionsGlobbed.setSyncFolder(options.shouldSyncFolder());
    optionsGlobbed.setOverwrite(options.shouldOverwrite());
    optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing());
    optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath());
    optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation());
    optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing());

    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteAttemptTempFiles(Path targetWorkPath, FileSystem targetFS, String jobId) throws IOException {

    FileStatus[] tempFiles = targetFS
            .globStatus(new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job", "attempt") + "*"));

    if (tempFiles != null && tempFiles.length > 0) {
        for (FileStatus file : tempFiles) {
            LOG.info("Cleaning up " + file.getPath());
            targetFS.delete(file.getPath(), false);
        }/*from www .  j  ava 2 s .c  o  m*/
    }
}

From source file:com.linkedin.cubert.analyzer.physical.AggregateRewriter.java

License:Open Source License

private void readSummaryMetaData() throws IOException, AggregateRewriteException {
    FileSystem fs = FileSystem.get(new JobConf());
    this.mvExists = false;
    FileStatus[] files = fs.globStatus(new Path(mvPath + "/avro/*.avro"));
    if (files != null && files.length > 0) {
        this.mvExists = true;
        processSummaryMetaData(mvPath);//  www  .  ja v  a 2 s. co m
    }
}

From source file:com.linkedin.cubert.examples.ListFiles.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    List<String> files = new ArrayList<String>();
    String dirsStr = JsonUtils.getText(json.get("args"), "dirs");
    String[] dirs = CommonUtils.trim(dirsStr.split(","));

    for (String dir : dirs) {
        Path path = new Path(dir);
        FileSystem fs = path.getFileSystem(PhaseContext.getConf());
        FileStatus[] allStatus = fs.globStatus(path);

        if (allStatus == null || allStatus.length == 0)
            continue;

        for (FileStatus status : allStatus) {
            if (status.isDir()) {
                listFiles(fs, status.getPath(), files);
            } else {
                files.add(status.getPath().toUri().getPath());
            }//from   ww  w  . j ava  2  s .c  o  m
        }

    }

    iterator = files.iterator();
    output = TupleFactory.getInstance().newTuple(1);
}

From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java

License:Open Source License

public static void mergeDictionaries(Configuration conf, Path dir) throws IOException {
    Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>();
    FileSystem fs = FileSystem.get(conf);

    Path currentDictPath = new Path(dir, "dictionary");
    Schema schema = getSchema();/*from  w  w  w  . ja va2  s  .c o  m*/

    // Read the existing dictionaries
    if (fs.exists(currentDictPath)) {
        dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf));

        // move the current dictionary to new file
        Path oldPath = new Path(dir, "_dictionary.old");
        fs.delete(oldPath, true);
        fs.rename(currentDictPath, oldPath);
    }

    // Read the new entries
    Path globPath = new Path(dir, "tmp/part-*");
    FileStatus[] allStatus = fs.globStatus(globPath);
    for (FileStatus status : allStatus) {
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
        DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(
                new FsInput(status.getPath(), conf), datumReader);
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next();
            String colName = record.get("colname").toString();
            String colValue = record.get("colvalue").toString();

            CodeDictionary dict = dictionaries.get(colName);
            if (dict == null) {
                dict = new CodeDictionary();
                dictionaries.put(colName, dict);
            }

            dict.addKey(colValue);
        }
    }

    // Write the dictionaries back
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    FSDataOutputStream out = fs.create(currentDictPath);

    dataFileWriter.create(schema, out);
    Record record = new Record(schema);

    for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) {
        String colName = entry.getKey();
        CodeDictionary dict = entry.getValue();

        for (String colValue : dict.keySet()) {
            int code = dict.getCodeForKey(colValue);
            record.put("colname", colName);
            record.put("colvalue", colValue);
            record.put("code", code);

            dataFileWriter.append(record);
        }
    }
    dataFileWriter.close();

}

From source file:com.linkedin.cubert.utils.AvroUtils.java

License:Open Source License

/**
 * Extracts the schema of an Avro file.//from  www. j av a  2s  . c  om
 * 
 * @param conf
 * @param path
 * @return
 * @throws IOException
 */
public static Schema getSchema(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*.avro");
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }

        path = allFiles[0].getPath();
    }
    System.out.println("Obtaining schema of avro file " + path.toString());

    return getSchema(new FsInput(path, conf));
}