Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:com.rim.logdriver.admin.HFind.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final long startTime = System.currentTimeMillis();

    int i = 0;//w  ww .j av  a2  s  . c  om
    while (i < args.length) {
        if (args[i].startsWith("-")) {
            break;
        }

        Path path = new Path(args[i]);
        FileSystem fs = path.getFileSystem(getConf());
        FileStatus[] fileStatuses = fs.globStatus(path);
        if (fileStatuses != null) {
            for (FileStatus fileStatus : fileStatuses) {
                paths.add(fileStatus.getPath());
                fileStatusCache.put(fileStatus.getPath(), fileStatus);
            }
        }

        i++;
    }

    while (i < args.length) {
        // -print action
        if ("-print".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    System.out.println(fileStatus.getPath());
                    return true;
                }
            });
        }

        // -delete action
        if ("-delete".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    try {
                        FileSystem fs = fileStatus.getPath().getFileSystem(getConf());
                        if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) {
                            return fs.delete(fileStatus.getPath(), true);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    return false;
                }
            });
        }

        // -atime test
        else if ("-atime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -atime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mtime test
        else if ("-mtime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mtime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -amin test
        else if ("-amin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -amin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mmin test
        else if ("-mmin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mmin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -regex test
        else if ("-regex".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -regex");
                System.exit(1);
            }

            final Pattern p = Pattern.compile(args[i]);
            tests.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    if (p.matcher(fileStatus.getPath().toString()).matches()) {
                        return true;
                    } else {
                        return false;
                    }
                }
            });
        }

        i++;
    }

    if (actions.size() == 0) {
        actions.add(new FileStatusFilter() {
            @Override
            public boolean accept(FileStatus fileStatus) {
                System.out.println(fileStatus.getPath());
                return true;
            }
        });
    }

    search();

    return 0;
}

From source file:com.rim.logdriver.LockedFs.java

License:Apache License

public void move(Configuration conf, String[] from, String to) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    List<FileStatus> fromList = new ArrayList<FileStatus>();
    for (String s : from) {
        FileStatus[] statuses = fs.globStatus(new Path(s));
        if (statuses == null) {
            continue;
        }/*from   ww  w.  j  av a2 s  .  c  o  m*/
        for (FileStatus status : statuses) {
            fromList.add(status);
        }
    }

    Path toPath = new Path(to);
    Boolean toExists = fs.exists(toPath);
    FileStatus toFileStatus = null;
    if (toExists) {
        toFileStatus = fs.getFileStatus(toPath);
    }

    // If there is no from, that's a problem.
    if (fromList.isEmpty()) {
        throw new IOException("No input files found");
    }

    // If the to exists, and is a file, that's a problem too.
    if (toExists && !toFileStatus.isDir()) {
        throw new IOException("Destination file exists:" + to);
    }

    // If the destination exists, and is a directory, then ensure that none of
    // the from list names will clash with existing contents of the directory.
    if (toExists && toFileStatus.isDir()) {
        for (FileStatus fromStatus : fromList) {
            String name = fromStatus.getPath().getName();
            if (fs.exists(new Path(toPath, name))) {
                throw new IOException("Destination file exists:" + to + "/" + name);
            }
        }
    }

    // If the destination doesn't exist, but it ends with a slash, then create
    // it as a directory.
    if (!toExists && to.endsWith("/")) {
        fs.mkdirs(toPath);
        toFileStatus = fs.getFileStatus(toPath);
        toExists = true;
    }

    // If the destination doesn't exist, and there is more than one 'from', then
    // create a directory.
    if (!toExists && fromList.size() > 1) {
        fs.mkdirs(toPath);
        toFileStatus = fs.getFileStatus(toPath);
    }

    // If there was only one from, then just rename it to to
    if (fromList.size() == 1) {
        fs.mkdirs(toPath.getParent());
        fs.rename(fromList.get(0).getPath(), toPath);
    }

    // If there was more than one from, then for each file in the from list,
    // move it to the to directory.
    if (fromList.size() > 1) {
        for (FileStatus fromStatus : fromList) {
            String name = fromStatus.getPath().getName();
            fs.rename(fromStatus.getPath(), new Path(toPath, name));
        }
    }
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/* www.  j av  a  2s  .co m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*w w w. ja v a2s.co  m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }// www  .j  a  v  a 2s . com

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.rim.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from  ww  w .j a  v  a2  s.  c om

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.rim.logdriver.util.Search.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//w  w w . j  av  a  2  s .  c o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Search.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", searchString);

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.skp.experiment.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Read the Frequent Patterns generated from Text
 * //from   ww  w  .j ava2 s.co  m
 * @return List of TopK patterns for each string frequent feature
 */
public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Parameters params) throws IOException {

    Configuration conf = new Configuration();

    Path frequentPatternsPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
    FileSystem fs = FileSystem.get(frequentPatternsPath.toUri(), conf);
    FileStatus[] outputFiles = fs.globStatus(new Path(frequentPatternsPath, FILE_PATTERN));

    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    for (FileStatus fileStatus : outputFiles) {
        ret.addAll(org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth.readFrequentPattern(conf,
                fileStatus.getPath()));
    }
    return ret;
}

From source file:com.splout.db.dnode.Fetcher.java

License:Open Source License

private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException {
    UUID uniqueId = UUID.randomUUID();
    File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName());
    File toDir = new File(toFile.getParent());
    if (toDir.exists()) {
        FileUtils.deleteDirectory(toDir);
    }/* ww w .j av  a 2 s.c o m*/
    toDir.mkdirs();
    Path toPath = new Path(toFile.getCanonicalPath());

    FileSystem fS = fromPath.getFileSystem(hadoopConf);
    FileSystem tofS = FileSystem.getLocal(hadoopConf);

    Throttler throttler = new Throttler((double) bytesPerSecThrottle);
    try {
        for (FileStatus fStatus : fS.globStatus(fromPath)) {
            log.info("Copying " + fStatus.getPath() + " to " + toPath);
            long bytesSoFar = 0;

            FSDataInputStream iS = fS.open(fStatus.getPath());
            FSDataOutputStream oS = tofS.create(toPath);

            byte[] buffer = new byte[downloadBufferSize];

            int nRead;
            while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) {
                // Needed to being able to be interrupted at any moment.
                if (Thread.interrupted()) {
                    iS.close();
                    oS.close();
                    cleanDirNoExceptions(toDir);
                    throw new InterruptedException();
                }
                bytesSoFar += nRead;
                oS.write(buffer, 0, nRead);
                throttler.incrementAndThrottle(nRead);
                if (bytesSoFar >= bytesToReportProgress) {
                    reporter.progress(bytesSoFar);
                    bytesSoFar = 0l;
                }
            }

            if (reporter != null) {
                reporter.progress(bytesSoFar);
            }

            oS.close();
            iS.close();
        }

        return toDir;
    } catch (ClosedByInterruptException e) {
        // This can be thrown by the method read.
        cleanDirNoExceptions(toDir);
        throw new InterruptedIOException();
    }
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.DefaultFsHelper.java

License:Apache License

@Override
public void commitOldFiles(FileSystem fs) throws StageException, IOException {
    if (context.getLastBatchTime() > 0) {
        for (String glob : recordWriterManager.getGlobs()) {
            LOG.debug("Looking for uncommitted files using glob '{}'", glob);
            FileStatus[] globStatus = fs.globStatus(new Path(glob));
            if (globStatus != null) {
                for (FileStatus status : globStatus) {
                    LOG.debug("Found uncommitted file '{}'", status.getPath());
                    recordWriterManager.renameToFinalName(fs, status.getPath());
                }//w w  w.  ja  va2s  .  co  m
            }
        }
    }
}