Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);/*from w  ww. j ava2  s  .  c o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:HadoopUtilsTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration confgiruration = HBaseConfiguration.create();
    FileSystem fileSystem = null;
    try {/*www .ja  v a 2s  .co m*/
        fileSystem = FileSystem.get(confgiruration);
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"),
                new PathFilter() {
                    @Override
                    public boolean accept(Path path) {

                        return path.getName().matches("part-r-\\d*");
                    }
                });
        for (FileStatus f : fileStatuses) {
            IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }
}

From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java

License:Apache License

/**
 * Called after the MapReduce job has completed, to verify that the outputs
 * generated by the MapReduce job align with the expected outputs that were
 * set with calls to {@link #addExpectedOutput(String)} and
 * {@link #addExpectedOutput(String...)}.
 *
 * @return a reference to this object//from w w  w  .j a  v  a  2  s  .c o  m
 * @throws IOException if something goes wrong
 */
public TextIOJobBuilder verifyResults() throws IOException {

    FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() {
        @Override
        public boolean accept(final Path path) {
            return path.getName().startsWith("part");
        }
    });

    System.out.println("Output files: " + StringUtils.join(outputFiles));

    int i = 0;
    for (FileStatus file : outputFiles) {
        List<String> actualLines = FileUtils.readLines(fs, file.getPath());

        for (String actualLine : actualLines) {
            String expectedLine = expectedOutputs.get(i++);
            assertEquals(expectedLine, actualLine);
        }
    }

    assertEquals(expectedOutputs.size(), i);

    return this;
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs)
        throws FileNotFoundException, IOException {
    final String dirPrefix = SolrOutputFormat.getOutputName(job);
    FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
        @Override/*from  w w  w.  ja v  a2s  .com*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dirPrefix);
        }
    });
    for (FileStatus dir : dirs) {
        if (!dir.isDirectory()) {
            throw new IllegalStateException("Not a directory: " + dir.getPath());
        }
    }

    // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999
    // shards
    Arrays.sort(dirs, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus f1, FileStatus f2) {
            return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName());
        }
    });

    return dirs;
}

From source file:com.bah.lucene.hdfs.HdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    LOG.debug(MessageFormat.format("listAll [{0}]", _path));
    FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() {
        @Override/*from   w  w w .  j a  v a  2 s.c o m*/
        public boolean accept(Path path) {
            try {
                return _fileSystem.isFile(path);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    });
    String[] result = new String[files.length];
    for (int i = 0; i < result.length; i++) {
        result[i] = files[i].getPath().getName();
    }
    return result;
}

From source file:com.bah.lucene.hdfs.SoftlinkHdfsDirectory.java

License:Apache License

@Override
public String[] listAll() throws IOException {
    LOG.debug(MessageFormat.format("listAll [{0}]", _path));
    FileStatus[] files = _fileSystem.listStatus(_path, new PathFilter() {
        @Override//from w w  w . j a  va2 s .  c om
        public boolean accept(Path path) {
            try {
                return _fileSystem.isFile(path);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    });
    String[] result = new String[files.length];
    for (int i = 0; i < result.length; i++) {
        result[i] = removeLinkExtensionSuffix(files[i].getPath().getName());
    }
    return result;
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent,
        Configuration conf, String queue_name, String additional_jars, File pig_tmp,
        ArrayList<String> D_options, String PIG_DIR, FileSystem fs) {
    //Set input parameter for pig job - calling Pig directly
    params.put("tmpdir", StringEscapeUtils.escapeJava(tmp));

    //Check for an out of '-', meaning write to stdout
    String pigout;/*from  ww w .  ja  va2  s  .c o  m*/
    if (out.equals("-")) {
        params.put("out", tmp + "/final");
        pigout = tmp + "/final";
    } else {
        params.put("out", StringEscapeUtils.escapeJava(out));
        pigout = StringEscapeUtils.escapeJava(out);
    }

    try {
        logConsole(quiet, silent, info, "Running PIG Command");
        conf.set("mapred.job.queue.name", queue_name);
        conf.set("pig.additional.jars", additional_jars);
        conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
        conf.set("pig.logfile", pig_tmp.toString());
        conf.set("hadoopversion", "23");
        //PIG temp directory set to be able to delete all temp files/directories
        conf.set("pig.temp.dir", tmp);

        //Setting output separator for logdriver
        String DEFAULT_OUTPUT_SEPARATOR = "\t";
        Charset UTF_8 = Charset.forName("UTF-8");
        String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            logConsole(true, true, error, "The output separator must be a single byte in UTF-8.");
            System.exit(1);
        }
        conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

        dOpts(D_options, silent, out, conf);

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf);
        pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");
    if (out.equals("-")) {
        System.out.println(";#################### DATA RESULTS ####################");
        try {
            //Create filter to find files with the results from PIG job
            PathFilter filter = new PathFilter() {
                public boolean accept(Path file) {
                    return file.getName().contains("part-");
                }
            };

            //Find the files in the directory, open and printout results
            FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter);
            for (int i = 0; i < status.length; i++) {
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                String line;
                line = br.readLine();
                while (line != null) {
                    System.out.println(line);
                    line = br.readLine();
                }
            }
            System.out.println(";#################### END OF RESULTS ####################");
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    } else {
        System.out.println(
                ";#################### Done. Search results are in " + pigout + " ####################");
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.PathCombineTextInputFormat.java

License:Apache License

public synchronized static List<PathFilter> getPoll() {

    List<PathFilter> pools = new ArrayList<PathFilter>();
    Map<String, String> map = ProvUtil.getCode();

    for (Map.Entry<String, String> entry : map.entrySet()) {
        final String prov_id = entry.getValue();
        pools.add(new PathFilter() {
            String provId = prov_id;

            @Override/*from   ww  w .  j  av  a2 s .c o m*/
            public boolean accept(Path path) {
                String fileName = path.getParent().toString();
                boolean need = fileName.endsWith(prov_id);
                return need;
            }
        });
    }

    return pools;
}

From source file:com.btoddb.chronicle.apps.AvroTools.java

License:Open Source License

private void go(String srcDir) throws URISyntaxException, IOException {
    hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig);

    System.out.println();/*from  ww w  . j a va  2s  .c  o  m*/
    System.out.println("Processing files from " + srcDir);
    System.out.println();

    logger.debug("Searching for files in {}", srcDir);
    Path path = new Path(srcDir);
    if (!hdfsFs.exists(path)) {
        System.out.println("The path does not exist - cannot continue : " + path.toString());
        return;
    }

    FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String name = path.getName();
            return !name.startsWith("_") && name.endsWith(".avro");
        }
    });

    for (FileStatus fs : statuses) {
        try {
            Path inPath = fs.getPath();
            long fileSize = hdfsFs.getFileStatus(inPath).getLen();
            System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize));

            testFileAndFix(inPath);
        } catch (Exception e) {
            // don't care about the cause, the test should be able to read all files it cares about
            e.printStackTrace();
        }
    }
}

From source file:com.cloudera.cdk.data.filesystem.PathFilters.java

License:Apache License

public static PathFilter notHidden() {
    return new PathFilter() {

        @Override//from  w  w w . java 2s. co m
        public boolean accept(Path path) {
            return !(path.getName().startsWith(".") || path.getName().startsWith("_"));
        }
    };
}