Example usage for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/**
 * Get a listing of all files in that match the file pattern <i>srcf</i>.
 * @param srcf a file pattern specifying source files
 * @param recursive if need to list files in subdirs
 * @throws IOException  //  w ww.j av  a 2  s  . c  om
 * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
 */
private int ls(String srcf, boolean recursive) throws IOException {
    Path srcPath = new Path(srcf);
    FileSystem srcFs = srcPath.getFileSystem(this.getConf());
    FileStatus[] srcs = srcFs.globStatus(srcPath);
    if (srcs == null || srcs.length == 0) {
        throw new FileNotFoundException("Cannot access " + srcf + ": No such file or directory.");
    }

    boolean printHeader = (srcs.length == 1) ? true : false;
    int numOfErrors = 0;
    for (int i = 0; i < srcs.length; i++) {
        numOfErrors += ls(srcs[i], srcFs, recursive, printHeader);
    }
    return numOfErrors == 0 ? 0 : -1;
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException {
    int errors = 0;

    for (int i = startIndex; i < args.length; i++) {
        Path srcPath = new Path(args[i]);
        FileSystem srcFs = srcPath.getFileSystem(getConf());
        Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
        // if nothing matches to given glob pattern then increment error count
        if (paths.length == 0) {
            System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'");
            errors++;/*w w w .ja  va2  s. c  o  m*/
        }
        for (Path path : paths) {
            try {
                FileStatus file = srcFs.getFileStatus(path);
                if (file == null) {
                    System.err.println(handler.getName() + ": could not get status for '" + path + "'");
                    errors++;
                } else {
                    errors += runCmdHandler(handler, file, srcFs, recursive);
                }
            } catch (IOException e) {
                String msg = (e.getMessage() != null ? e.getLocalizedMessage()
                        : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null"));
                System.err.println(
                        handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]);
                errors++;
            }
        }
    }

    return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0;
}

From source file:com.ibm.jaql.lang.expr.system.LsFn.java

License:Apache License

@Override
public JsonIterator iter(final Context context) throws Exception {
    JsonString glob = (JsonString) exprs[0].eval(context);
    // Configuration conf = context.getConfiguration();
    Configuration conf = new Configuration(); // TODO: get from context, incl options
    //URI uri;//from  www.java  2  s  .  com
    //FileSystem fs = FileSystem.get(uri, conf);
    Path inpath = new Path(glob.toString());
    FileSystem fs = inpath.getFileSystem(conf);
    //final FileStatus[] stats = fs.listStatus(path, filter);
    final FileStatus[] stats = fs.globStatus(inpath);

    if (stats == null || stats.length == 0) {
        return JsonIterator.EMPTY;
    }

    final MutableJsonDate accessTime = new MutableJsonDate();
    final MutableJsonDate modifyTime = new MutableJsonDate();
    final MutableJsonLong length = new MutableJsonLong();
    final MutableJsonLong blockSize = new MutableJsonLong();
    final MutableJsonLong replication = new MutableJsonLong();
    final MutableJsonString path = new MutableJsonString();
    final MutableJsonString owner = new MutableJsonString();
    final MutableJsonString group = new MutableJsonString();
    final MutableJsonString permission = new MutableJsonString();
    final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path,
            owner, group, permission };
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    rec.set(LsField.names, values, values.length, false);

    return new JsonIterator(rec) {
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= stats.length) {
                return false;
            }

            FileStatus stat = stats[i++];
            // fs.getUri().toString();
            long x = HadoopShim.getAccessTime(stat);
            if (x <= 0) {
                values[LsField.ACCESS_TIME.ordinal()] = null;
            } else {
                accessTime.set(x);
                values[LsField.ACCESS_TIME.ordinal()] = accessTime;
            }
            modifyTime.set(stat.getModificationTime());
            length.set(stat.getLen());
            blockSize.set(stat.getBlockSize());
            replication.set(stat.getReplication());
            path.setCopy(stat.getPath().toString());
            owner.setCopy(stat.getOwner());
            group.setCopy(stat.getGroup());
            permission.setCopy(stat.getPermission().toString());
            return true;
        }
    };
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws IOException, SAXException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*"));
    for (FileStatus file : files) {
        if (file.getLen() > 0) {
            FSDataInputStream in = fs.open(file.getPath());
            BufferedReader bin = new BufferedReader(new InputStreamReader(in));
            for (;;) {
                String s = bin.readLine();
                if (null == s)
                    break;

                String[] keyValue = s.split("\t", 2);
                BasicDBObject dbo = new BasicDBObject();
                if (keyValue.length > 1) {
                    dbo.put("key", keyValue[0]);
                    dbo.put("value", keyValue[1]);
                } else {
                    dbo.put("value", keyValue[0]);
                }/* ww  w .j  a va 2  s  . co m*/
                dbl.add(dbo);
            }
            in.close();
        }
    }
    return dbl;
}

From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java

License:Apache License

/**
 * Implementation of CopyListing::buildListing().
 * Creates the copy listing by "globbing" all source-paths.
 * @param pathToListingFile: The location at which the copy-listing file
 *                           is to be created.
 * @param options: Input Options for DistCp (indicating source/target paths.)
 * @throws IOException/*from w w w  .j  a  v a  2  s  . c  om*/
 */
@Override
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    List<Path> globbedPaths = new ArrayList<Path>();
    if (options.getSourcePaths().isEmpty()) {
        throw new InvalidInputException("Nothing to process. Source paths::EMPTY");
    }

    for (Path p : options.getSourcePaths()) {
        FileSystem fs = p.getFileSystem(getConf());
        FileStatus[] inputs = fs.globStatus(p);

        if (inputs != null && inputs.length > 0) {
            for (FileStatus onePath : inputs) {
                globbedPaths.add(onePath.getPath());
            }
        } else {
            throw new InvalidInputException(p + " doesn't exist");
        }
    }

    DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath());
    optionsGlobbed.setSyncFolder(options.shouldSyncFolder());
    optionsGlobbed.setOverwrite(options.shouldOverwrite());
    optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing());
    optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath());
    optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation());
    optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing());

    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteAttemptTempFiles(Path targetWorkPath, FileSystem targetFS, String jobId) throws IOException {

    FileStatus[] tempFiles = targetFS
            .globStatus(new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job", "attempt") + "*"));

    if (tempFiles != null && tempFiles.length > 0) {
        for (FileStatus file : tempFiles) {
            LOG.info("Cleaning up " + file.getPath());
            targetFS.delete(file.getPath(), false);
        }/*from www .  j  ava 2 s .c  o  m*/
    }
}

From source file:com.linkedin.cubert.analyzer.physical.AggregateRewriter.java

License:Open Source License

private void readSummaryMetaData() throws IOException, AggregateRewriteException {
    FileSystem fs = FileSystem.get(new JobConf());
    this.mvExists = false;
    FileStatus[] files = fs.globStatus(new Path(mvPath + "/avro/*.avro"));
    if (files != null && files.length > 0) {
        this.mvExists = true;
        processSummaryMetaData(mvPath);//  www  .  ja v  a 2 s. co m
    }
}

From source file:com.linkedin.cubert.examples.ListFiles.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    List<String> files = new ArrayList<String>();
    String dirsStr = JsonUtils.getText(json.get("args"), "dirs");
    String[] dirs = CommonUtils.trim(dirsStr.split(","));

    for (String dir : dirs) {
        Path path = new Path(dir);
        FileSystem fs = path.getFileSystem(PhaseContext.getConf());
        FileStatus[] allStatus = fs.globStatus(path);

        if (allStatus == null || allStatus.length == 0)
            continue;

        for (FileStatus status : allStatus) {
            if (status.isDir()) {
                listFiles(fs, status.getPath(), files);
            } else {
                files.add(status.getPath().toUri().getPath());
            }//from   ww  w  . j ava  2  s .c  o  m
        }

    }

    iterator = files.iterator();
    output = TupleFactory.getInstance().newTuple(1);
}

From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java

License:Open Source License

public static void mergeDictionaries(Configuration conf, Path dir) throws IOException {
    Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>();
    FileSystem fs = FileSystem.get(conf);

    Path currentDictPath = new Path(dir, "dictionary");
    Schema schema = getSchema();/*from  w  w  w  . ja va2  s  .c o  m*/

    // Read the existing dictionaries
    if (fs.exists(currentDictPath)) {
        dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf));

        // move the current dictionary to new file
        Path oldPath = new Path(dir, "_dictionary.old");
        fs.delete(oldPath, true);
        fs.rename(currentDictPath, oldPath);
    }

    // Read the new entries
    Path globPath = new Path(dir, "tmp/part-*");
    FileStatus[] allStatus = fs.globStatus(globPath);
    for (FileStatus status : allStatus) {
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
        DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(
                new FsInput(status.getPath(), conf), datumReader);
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next();
            String colName = record.get("colname").toString();
            String colValue = record.get("colvalue").toString();

            CodeDictionary dict = dictionaries.get(colName);
            if (dict == null) {
                dict = new CodeDictionary();
                dictionaries.put(colName, dict);
            }

            dict.addKey(colValue);
        }
    }

    // Write the dictionaries back
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    FSDataOutputStream out = fs.create(currentDictPath);

    dataFileWriter.create(schema, out);
    Record record = new Record(schema);

    for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) {
        String colName = entry.getKey();
        CodeDictionary dict = entry.getValue();

        for (String colValue : dict.keySet()) {
            int code = dict.getCodeForKey(colValue);
            record.put("colname", colName);
            record.put("colvalue", colValue);
            record.put("code", code);

            dataFileWriter.append(record);
        }
    }
    dataFileWriter.close();

}

From source file:com.linkedin.cubert.utils.AvroUtils.java

License:Open Source License

/**
 * Extracts the schema of an Avro file.//from  www. j av a  2s  . c  om
 * 
 * @param conf
 * @param path
 * @return
 * @throws IOException
 */
public static Schema getSchema(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.getFileStatus(path).isDir()) {
        Path globPath = new Path(path, "*.avro");
        FileStatus[] allFiles = fs.globStatus(globPath);
        if (allFiles.length == 0) {
            throw new IOException("there are no files in " + path.toString());
        }

        path = allFiles[0].getPath();
    }
    System.out.println("Obtaining schema of avro file " + path.toString());

    return getSchema(new FsInput(path, conf));
}