Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);/*from w  ww. j ava2  s  .  c o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:HadoopUtilsTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration confgiruration = HBaseConfiguration.create();
    FileSystem fileSystem = null;
    try {/*www .ja  v a 2s  .co m*/
        fileSystem = FileSystem.get(confgiruration);
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"),
                new PathFilter() {
                    @Override
                    public boolean accept(Path path) {

                        return path.getName().matches("part-r-\\d*");
                    }
                });
        for (FileStatus f : fileStatuses) {
            IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }
}

From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java

License:Apache License

/**
 * Called after the MapReduce job has completed, to verify that the outputs
 * generated by the MapReduce job align with the expected outputs that were
 * set with calls to {@link #addExpectedOutput(String)} and
 * {@link #addExpectedOutput(String...)}.
 *
 * @return a reference to this object//from w w  w  .j a  v  a  2  s  .c o  m
 * @throws IOException if something goes wrong
 */
public TextIOJobBuilder verifyResults() throws IOException {

    FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() {
        @Override
        public boolean accept(final Path path) {
            return path.getName().startsWith("part");
        }
    });

    System.out.println("Output files: " + StringUtils.join(outputFiles));

    int i = 0;
    for (FileStatus file : outputFiles) {
        List<String> actualLines = FileUtils.readLines(fs, file.getPath());

        for (String actualLine : actualLines) {
            String expectedLine = expectedOutputs.get(i++);
            assertEquals(expectedLine, actualLine);
        }
    }

    assertEquals(expectedOutputs.size(), i);

    return this;
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs)
        throws FileNotFoundException, IOException {
    final String dirPrefix = SolrOutputFormat.getOutputName(job);
    FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
        @Override/*from  w w  w.  ja v  a2s  .com*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dirPrefix);
        }
    });
    for (FileStatus dir : dirs) {
        if (!dir.isDirectory()) {
            throw new IllegalStateException("Not a directory: " + dir.getPath());
        }
    }

    // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999
    // shards
    Arrays.sort(dirs, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus f1, FileStatus f2) {
            return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName());
        }
    });

    return dirs;
}

From source file:com.bah.lucene.hdfs.HdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    LOG.debug(MessageFormat.format("listAll [{0}]", _path));
    FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() {
        @Override/*from   w  w w .  j a  v a  2 s.c o m*/
        public boolean accept(Path path) {
            try {
                return _fileSystem.isFile(path);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    });
    String[] result = new String[files.length];
    for (int i = 0; i < result.length; i++) {
        result[i] = files[i].getPath().getName();
    }
    return result;
}

From source file:com.bah.lucene.hdfs.SoftlinkHdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    LOG.debug(MessageFormat.format("listAll [{0}]", _path));
    FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() {
        @Override//from w w  w . j a  va2 s .  c om
        public boolean accept(Path path) {
            try {
                return _fileSystem.isFile(path);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    });
    String[] result = new String[files.length];
    for (int i = 0; i < result.length; i++) {
        result[i] = removeLinkExtensionSuffix(files[i].getPath().getName());
    }
    return result;
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent,
        Configuration conf, String queue_name, String additional_jars, File pig_tmp,
        ArrayList<String> D_options, String PIG_DIR, FileSystem fs) {
    //Set input parameter for pig job - calling Pig directly
    params.put("tmpdir", StringEscapeUtils.escapeJava(tmp));

    //Check for an out of '-', meaning write to stdout
    String pigout;/*from  ww w .  ja  va2  s  .c o  m*/
    if (out.equals("-")) {
        params.put("out", tmp + "/final");
        pigout = tmp + "/final";
    } else {
        params.put("out", StringEscapeUtils.escapeJava(out));
        pigout = StringEscapeUtils.escapeJava(out);
    }

    try {
        logConsole(quiet, silent, info, "Running PIG Command");
        conf.set("mapred.job.queue.name", queue_name);
        conf.set("pig.additional.jars", additional_jars);
        conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
        conf.set("pig.logfile", pig_tmp.toString());
        conf.set("hadoopversion", "23");
        //PIG temp directory set to be able to delete all temp files/directories
        conf.set("pig.temp.dir", tmp);

        //Setting output separator for logdriver
        String DEFAULT_OUTPUT_SEPARATOR = "\t";
        Charset UTF_8 = Charset.forName("UTF-8");
        String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            logConsole(true, true, error, "The output separator must be a single byte in UTF-8.");
            System.exit(1);
        }
        conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

        dOpts(D_options, silent, out, conf);

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf);
        pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");
    if (out.equals("-")) {
        System.out.println(";#################### DATA RESULTS ####################");
        try {
            //Create filter to find files with the results from PIG job
            PathFilter filter = new PathFilter() {
                public boolean accept(Path file) {
                    return file.getName().contains("part-");
                }
            };

            //Find the files in the directory, open and printout results
            FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter);
            for (int i = 0; i < status.length; i++) {
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                String line;
                line = br.readLine();
                while (line != null) {
                    System.out.println(line);
                    line = br.readLine();
                }
            }
            System.out.println(";#################### END OF RESULTS ####################");
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    } else {
        System.out.println(
                ";#################### Done. Search results are in " + pigout + " ####################");
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathCombineTextInputFormat.java

License:Apache License

public synchronized static List<PathFilter> getPoll() {

    List<PathFilter> pools = new ArrayList<PathFilter>();
    Map<String, String> map = ProvUtil.getCode();

    for (Map.Entry<String, String> entry : map.entrySet()) {
        final String prov_id = entry.getValue();
        pools.add(new PathFilter() {
            String provId = prov_id;

            @Override/*from   ww  w .  j  av  a2 s .c o m*/
            public boolean accept(Path path) {
                String fileName = path.getParent().toString();
                boolean need = fileName.endsWith(prov_id);
                return need;
            }
        });
    }

    return pools;
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void go(String srcDir) throws URISyntaxException, IOException {
    hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig);

    System.out.println();/*from  ww w  . j a va  2s  .c  o  m*/
    System.out.println("Processing files from " + srcDir);
    System.out.println();

    logger.debug("Searching for files in {}", srcDir);
    Path path = new Path(srcDir);
    if (!hdfsFs.exists(path)) {
        System.out.println("The path does not exist - cannot continue : " + path.toString());
        return;
    }

    FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && name.endsWith(".avro");
        }
    });

    for (FileStatus fs : statuses) {
        try {
            Path inPath = fs.getPath();
            long fileSize = hdfsFs.getFileStatus(inPath).getLen();
            System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize));

            testFileAndFix(inPath);
        } catch (Exception e) {
            // don't care about the cause, the test should be able to read all files it cares about
            e.printStackTrace();
        }
    }
}

From source file:com.cloudera.cdk.data.filesystem.PathFilters.java

License:Apache License

public static PathFilter notHidden() {
    return new PathFilter() {

        @Override//from  w  w w . java 2s. co m
        public boolean accept(Path path) {
            return !(path.getName().startsWith(".") || path.getName().startsWith("_"));
        }
    };
}