Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:spark.func.movierecommendation.MovieRecommendationApp.java

License:Apache License

@Override
public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext,
        Map<String, String> params) throws Exception {
    if (hadoopConfig == null) {
        return Lists.newArrayList();
    }// w w w  .  j  a v a2 s . c o  m

    String outputPath = getJobOutputPath(hadoopConfig, params);

    FileSystem fileSystem = FileSystem.get(hadoopConfig);
    FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });

    List<List<String>> results = Lists.newArrayList();

    for (int i = 0; i < status.length; i++) {
        FSDataInputStream is = fileSystem.open(status[i].getPath());
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = br.readLine();
        while (line != null) {
            Matcher matcher = RESULT_PATTERN.matcher(line);
            if (matcher.find()) {
                // group 1 - userid, group 2 - bookid, group 3 - rating, group 4 - title
                LOGGER.debug("SPARK RESULT: {} {} {}", matcher.group(2), matcher.group(3), matcher.group(4));
                results.add(Lists.newArrayList(matcher.group(2), matcher.group(3), matcher.group(4)));
            }
            line = br.readLine();
        }
        is.close();
    }

    return results;
}

From source file:spark.func.topn.TopNApp.java

License:Apache License

@Override
public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext,
        Map<String, String> params) throws Exception {
    if (hadoopConfig == null) {
        return Lists.newArrayList();
    }/*from   w  ww  . ja v a 2s.  co m*/
    String outputPath = getJobOutputPath(hadoopConfig, params);
    FileSystem fileSystem = FileSystem.get(hadoopConfig);
    FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });

    List<List<String>> results = Lists.newArrayList();

    for (int i = 0; i < status.length; i++) {
        FSDataInputStream is = fileSystem.open(status[i].getPath());
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = br.readLine();
        while (line != null) {
            String[] parts = line.replace("(", "").replace(")", "").split(",");
            if (parts.length >= 2) {
                LOGGER.debug("SPARK RESULT: {} {}", parts[0], parts[1]);
                results.add(Lists.newArrayList(parts[0], parts[1]));
            }
            line = br.readLine();
        }
        is.close();
    }

    return results;
}

From source file:test.hiveserver.parse.SemanticAnalyzer.java

License:Apache License

private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
        GlobalLimitCtx globalLimitCtx) throws SemanticException {

    // bypass for explain queries for now
    if (ctx.getExplain()) {
        return;//from  w w  w .j av  a 2s.  c  o  m
    }

    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }

    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);

    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);

            long estimatedInput;

            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                //
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + ","
                        + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: "
                        + estimatedInput);
            }

            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput,
                    inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }

    if (!hasNonLocalJob) {
        // none of the mapred tasks needs to be run locally. That means that the
        // query can be executed entirely in local mode. Save the current tracker
        // value and restore it when done
        ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT));
        conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
        console.printInfo("Automatically selecting local only mode for query");

        // If all the tasks can be run locally, we can use local disk for
        // storing intermediate data.

        /**
         * This code is commented out pending further testing/development
         * for (Task<? extends Serializable> t: rootTasks)
         * t.localizeMRTmpFiles(ctx);
         */
    }
}

From source file:tv.icntv.grade.film.recommend.CorrelateJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    HadoopUtils.deleteIfExist(strings[1]);
    Job correlate = new Job(configuration, "icntv correlate job");
    MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate,
            getPaths(strings[0].split(",")));
    MapReduceUtils.initReducerJob(new Path(strings[1]), UserHistoryReducer.class, correlate);
    if (!correlate.waitForCompletion(true)) {
        return 1;
    }/*from   ww w  .j a  v a 2s  .c  o m*/
    ;
    Parameters parameter = getParameter(strings[2]);
    HadoopUtils.deleteIfExist(parameter.get("output"));
    PFPGrowth.runPFPGrowth(parameter, configuration);
    String output = parameter.get("output") + "/frequentpatterns";
    long count = HadoopUtils.count(new Path(output), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates.
        }
    });
    if (count == 0) {
        return 1;
    }
    configuration.setLong("icntv.correlate.total.size", count);
    HadoopUtils.deleteIfExist(strings[3]);
    Job result = new Job(configuration, "correlate result calculate");
    MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result,
            new Path(output));
    result.setInputFormatClass(SequenceFileInputFormat.class);
    //        TableMapReduceUtil.initTableReducerJob("");
    MapReduceUtils.initReducerJob(new Path(strings[3]), CorrelateOutPutReducer.class, result);
    result.waitForCompletion(true);
    return 0; //To change body of implemented methods use File | Settings | File Templates.
}

From source file:tv.icntv.grade.film.recommend.CorrelateResultJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = super.getConf();
    String output = strings[0] + "/frequentpatterns";
    long count = HadoopUtils.count(new Path(output), new PathFilter() {
        @Override/* w w  w  . ja v a  2 s .  c  om*/
        public boolean accept(Path path) {
            return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates.
        }
    });
    System.out.println("count =" + count);
    if (count == 0) {
        return 1;
    }

    configuration.setLong("icntv.correlate.total.size", count);
    Job result = new Job(configuration, "correlate result calculate");
    MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result,
            new Path(output));
    result.setInputFormatClass(SequenceFileInputFormat.class);
    //        TableMapReduceUtil.initTableReducerJob("");
    MapReduceUtils.initReducerJob(new Path(strings[1]), CorrelateOutPutReducer.class, result);
    result.waitForCompletion(true);
    return 0; //To change body of implemented methods use File | Settings | File Templates.
}

From source file:tv.icntv.log.stb.filter.FilterJob.java

License:Apache License

@Override
public boolean run(Map<String, String> maps) throws Exception {
    Configuration configuration = getConf();
    // ????/*from   ww  w  .  j a  v  a 2  s  .  c  o m*/
    configuration.setBoolean("mapreduce.reduce.speculative", false);
    configuration.setBoolean("mapreduce.map.speculative", false);
    //setting conf
    Path input = new Path(maps.get(INPUT));
    Path back = new Path(maps.get(BACK));
    Path output = new Path(maps.get(OUTPUT_PREFIX));
    configuration.set(OUTPUT_SUFFIX, maps.get(OUTPUT_SUFFIX));
    configuration.set(OUTPUT_PREFIX, output.toString());
    configuration.set(OTHER_PATH, maps.get(OTHER_PATH));

    //        configuration
    //        Path input=new Path("/icntv/log/stb/2014-05-19/stb-2014-05-18-23.lzo_deflate");
    //        Path back=new Path("/icntv/parser/stb/filter/status/2014-05-18/");
    //        Path output=new Path("/icntv/parser/stb/filter/result/2014-05-18/");
    Path[] in = HadoopUtils.createFile(input, back, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(file_success_suffix); //To change body of implemented methods use File | Settings | File Templates.
        }
    }, file_success_suffix, parseing_suffix, parsed_suffix);
    if (null == in || in.length == 0) {
        logger.info("input not exist;");
        return false;
    }
    List<Path> inTemp = Lists.newArrayList(in);
    String ye = DateUtils.addDay(input.getName(), "yyyy-MM-dd", -1);
    Path prefix = new Path(input.getParent() + File.separator + ye, "stb-" + ye + "-23.lzo");
    logger.info("prefix path ={}", prefix.toString());
    if (HadoopUtils.isExist(prefix)) {
        logger.info("add today path= {}", prefix.toString());
        inTemp.add(prefix);
    }
    String day = DateUtils.addDay(input.getName(), "yyyy-MM-dd", 1);
    Path nextPath = new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo");
    logger.info("next path ={},writed path={}", nextPath.toString(),
            new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"));
    if (HadoopUtils
            .isExist(new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"))) {
        logger.info("add today path= {}", nextPath.toString());
        inTemp.add(nextPath);
    }

    logger.info("input size = {}", inTemp.size());
    //        inTemp.add(new Path(input.getParent()+ File.separator+ DateTime.now().toString("yyyy-MM-dd"),"")
    Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file");
    //setting job configuration .....
    stbFilterJob.setMapperClass(FilterMapper.class);
    stbFilterJob.setOutputKeyClass(NullWritable.class);
    stbFilterJob.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(stbFilterJob, inTemp.toArray(new Path[inTemp.size()]));
    stbFilterJob.setJarByClass(getClass());

    FileOutputFormat.setOutputPath(stbFilterJob, output);
    LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class);

    stbFilterJob.setNumReduceTasks(0);

    if (stbFilterJob.waitForCompletion(true)) {
        ;
        for (Path path : in) {
            HadoopUtils.rename(new Path(path + parseing_suffix), new Path(path + parsed_suffix));
        }
        return true;
    }
    return false;

}

From source file:tv.icntv.log.tools.FileApi.java

License:Apache License

@Override
public synchronized boolean writeDat(Path[] inputs, final String regular, Path output) {
    FileSystem fileSystem = null;
    BufferedReader reader = null;
    FSDataOutputStream outputStream = null;
    try {//from   ww w .  j av  a 2 s. c o m
        fileSystem = FileSystem.get(conf);
        //
        FileStatus[] fileStatuses = fileSystem.listStatus(inputs, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().matches(regular); //To change body of implemented methods use File | Settings | File Templates.
            }
        });
        if (null == fileStatuses || fileStatuses.length == 0) {
            System.out.println("null...");
            return false;
        }
        System.out.println(fileStatuses.length);
        outputStream = fileSystem.create(output, true, 40960);
        for (FileStatus status : fileStatuses) {
            if (regular.endsWith("lzo")) {
                reader = new BufferedReader(new InputStreamReader(
                        lzopInputStream.createInputStream(fileSystem.open(status.getPath())), "utf-8"));
            } else {
                reader = new BufferedReader(new InputStreamReader(fileSystem.open(status.getPath())));
            }
            String line = null;
            while (null != (line = reader.readLine())) {
                byte[] lineByte = (line + "\r\n").getBytes("utf-8");
                outputStream.write(lineByte, 0, lineByte.length);
            }
        }
    } catch (IOException e) {
        System.out.println(e);
        e.printStackTrace();
        return false;
    } finally {
        IOUtils.closeStream(reader);
        IOUtils.closeStream(outputStream);
        IOUtils.closeStream(fileSystem);
    }

    return true;

}

From source file:tv.icntv.logsys.HadoopRun.java

License:Apache License

protected List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override/* w w  w. java 2 s  .c  o m*/
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });

    if (null == fileStatuses || fileStatuses.length == 0) {
        logger.info("fileStatuses is null");
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if ((name.endsWith(".gz") || name.endsWith(".log")) && store.isExist(fromPath + separator + name)) {
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}

From source file:tv.icntv.logsys.HadoopRunMain.java

License:Apache License

protected List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override//  w w  w  .j av  a 2 s  .c  o  m
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });

    if (null == fileStatuses || fileStatuses.length == 0) {
        logger.info("fileStatuses is null");
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if (store.isExist(fromPath + separator + name)) { //(name.endsWith()||name.endsWith(".log")) &&
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}

From source file:tv.icntv.logsys.Main.java

License:Apache License

public List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override/*from   ww  w  . j  ava  2s  .co m*/
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });
    if (null == fileStatuses || fileStatuses.length == 0) {
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if (name.endsWith(".gz") && store.isExist(fromPath + separator + name)) {
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}