Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:spark.func.movierecommendation.MovieRecommendationApp.java

License:Apache License

@Override
public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext,
        Map<String, String> params) throws Exception {
    if (hadoopConfig == null) {
        return Lists.newArrayList();
    }// w w w  .  j  a v a2 s . c o  m

    String outputPath = getJobOutputPath(hadoopConfig, params);

    FileSystem fileSystem = FileSystem.get(hadoopConfig);
    FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });

    List<List<String>> results = Lists.newArrayList();

    for (int i = 0; i < status.length; i++) {
        FSDataInputStream is = fileSystem.open(status[i].getPath());
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = br.readLine();
        while (line != null) {
            Matcher matcher = RESULT_PATTERN.matcher(line);
            if (matcher.find()) {
                // group 1 - userid, group 2 - bookid, group 3 - rating, group 4 - title
                LOGGER.debug("SPARK RESULT: {} {} {}", matcher.group(2), matcher.group(3), matcher.group(4));
                results.add(Lists.newArrayList(matcher.group(2), matcher.group(3), matcher.group(4)));
            }
            line = br.readLine();
        }
        is.close();
    }

    return results;
}

From source file:spark.func.topn.TopNApp.java

License:Apache License

@Override
public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext,
        Map<String, String> params) throws Exception {
    if (hadoopConfig == null) {
        return Lists.newArrayList();
    }/*from   w  ww  . ja v a 2s.  co m*/
    String outputPath = getJobOutputPath(hadoopConfig, params);
    FileSystem fileSystem = FileSystem.get(hadoopConfig);
    FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });

    List<List<String>> results = Lists.newArrayList();

    for (int i = 0; i < status.length; i++) {
        FSDataInputStream is = fileSystem.open(status[i].getPath());
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = br.readLine();
        while (line != null) {
            String[] parts = line.replace("(", "").replace(")", "").split(",");
            if (parts.length >= 2) {
                LOGGER.debug("SPARK RESULT: {} {}", parts[0], parts[1]);
                results.add(Lists.newArrayList(parts[0], parts[1]));
            }
            line = br.readLine();
        }
        is.close();
    }

    return results;
}

From source file:test.hiveserver.parse.SemanticAnalyzer.java

License:Apache License

private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
        GlobalLimitCtx globalLimitCtx) throws SemanticException {

    // bypass for explain queries for now
    if (ctx.getExplain()) {
        return;//from  w w  w .j av  a 2s.  c  o  m
    }

    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }

    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);

    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);

            long estimatedInput;

            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                //
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + ","
                        + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: "
                        + estimatedInput);
            }

            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput,
                    inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }

    if (!hasNonLocalJob) {
        // none of the mapred tasks needs to be run locally. That means that the
        // query can be executed entirely in local mode. Save the current tracker
        // value and restore it when done
        ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT));
        conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
        console.printInfo("Automatically selecting local only mode for query");

        // If all the tasks can be run locally, we can use local disk for
        // storing intermediate data.

        /**
         * This code is commented out pending further testing/development
         * for (Task<? extends Serializable> t: rootTasks)
         * t.localizeMRTmpFiles(ctx);
         */
    }
}

From source file:tv.icntv.grade.film.recommend.CorrelateJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    HadoopUtils.deleteIfExist(strings[1]);
    Job correlate = new Job(configuration, "icntv correlate job");
    MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate,
            getPaths(strings[0].split(",")));
    MapReduceUtils.initReducerJob(new Path(strings[1]), UserHistoryReducer.class, correlate);
    if (!correlate.waitForCompletion(true)) {
        return 1;
    }/*from   ww w  .j a  v a 2s  .c  o m*/
    ;
    Parameters parameter = getParameter(strings[2]);
    HadoopUtils.deleteIfExist(parameter.get("output"));
    PFPGrowth.runPFPGrowth(parameter, configuration);
    String output = parameter.get("output") + "/frequentpatterns";
    long count = HadoopUtils.count(new Path(output), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates.
        }
    });
    if (count == 0) {
        return 1;
    }
    configuration.setLong("icntv.correlate.total.size", count);
    HadoopUtils.deleteIfExist(strings[3]);
    Job result = new Job(configuration, "correlate result calculate");
    MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result,
            new Path(output));
    result.setInputFormatClass(SequenceFileInputFormat.class);
    //        TableMapReduceUtil.initTableReducerJob("");
    MapReduceUtils.initReducerJob(new Path(strings[3]), CorrelateOutPutReducer.class, result);
    result.waitForCompletion(true);
    return 0; //To change body of implemented methods use File | Settings | File Templates.
}

From source file:tv.icntv.grade.film.recommend.CorrelateResultJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = super.getConf();
    String output = strings[0] + "/frequentpatterns";
    long count = HadoopUtils.count(new Path(output), new PathFilter() {
        @Override/* w w  w  . ja v a  2 s .  c  om*/
        public boolean accept(Path path) {
            return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates.
        }
    });
    System.out.println("count =" + count);
    if (count == 0) {
        return 1;
    }

    configuration.setLong("icntv.correlate.total.size", count);
    Job result = new Job(configuration, "correlate result calculate");
    MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result,
            new Path(output));
    result.setInputFormatClass(SequenceFileInputFormat.class);
    //        TableMapReduceUtil.initTableReducerJob("");
    MapReduceUtils.initReducerJob(new Path(strings[1]), CorrelateOutPutReducer.class, result);
    result.waitForCompletion(true);
    return 0; //To change body of implemented methods use File | Settings | File Templates.
}

From source file:tv.icntv.log.stb.filter.FilterJob.java

License:Apache License

@Override
public boolean run(Map<String, String> maps) throws Exception {
    Configuration configuration = getConf();
    // ????/*from   ww  w  .  j a  v  a 2  s  .  c  o m*/
    configuration.setBoolean("mapreduce.reduce.speculative", false);
    configuration.setBoolean("mapreduce.map.speculative", false);
    //setting conf
    Path input = new Path(maps.get(INPUT));
    Path back = new Path(maps.get(BACK));
    Path output = new Path(maps.get(OUTPUT_PREFIX));
    configuration.set(OUTPUT_SUFFIX, maps.get(OUTPUT_SUFFIX));
    configuration.set(OUTPUT_PREFIX, output.toString());
    configuration.set(OTHER_PATH, maps.get(OTHER_PATH));

    //        configuration
    //        Path input=new Path("/icntv/log/stb/2014-05-19/stb-2014-05-18-23.lzo_deflate");
    //        Path back=new Path("/icntv/parser/stb/filter/status/2014-05-18/");
    //        Path output=new Path("/icntv/parser/stb/filter/result/2014-05-18/");
    Path[] in = HadoopUtils.createFile(input, back, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(file_success_suffix); //To change body of implemented methods use File | Settings | File Templates.
        }
    }, file_success_suffix, parseing_suffix, parsed_suffix);
    if (null == in || in.length == 0) {
        logger.info("input not exist;");
        return false;
    }
    List<Path> inTemp = Lists.newArrayList(in);
    String ye = DateUtils.addDay(input.getName(), "yyyy-MM-dd", -1);
    Path prefix = new Path(input.getParent() + File.separator + ye, "stb-" + ye + "-23.lzo");
    logger.info("prefix path ={}", prefix.toString());
    if (HadoopUtils.isExist(prefix)) {
        logger.info("add today path= {}", prefix.toString());
        inTemp.add(prefix);
    }
    String day = DateUtils.addDay(input.getName(), "yyyy-MM-dd", 1);
    Path nextPath = new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo");
    logger.info("next path ={},writed path={}", nextPath.toString(),
            new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"));
    if (HadoopUtils
            .isExist(new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"))) {
        logger.info("add today path= {}", nextPath.toString());
        inTemp.add(nextPath);
    }

    logger.info("input size = {}", inTemp.size());
    //        inTemp.add(new Path(input.getParent()+ File.separator+ DateTime.now().toString("yyyy-MM-dd"),"")
    Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file");
    //setting job configuration .....
    stbFilterJob.setMapperClass(FilterMapper.class);
    stbFilterJob.setOutputKeyClass(NullWritable.class);
    stbFilterJob.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(stbFilterJob, inTemp.toArray(new Path[inTemp.size()]));
    stbFilterJob.setJarByClass(getClass());

    FileOutputFormat.setOutputPath(stbFilterJob, output);
    LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class);

    stbFilterJob.setNumReduceTasks(0);

    if (stbFilterJob.waitForCompletion(true)) {
        ;
        for (Path path : in) {
            HadoopUtils.rename(new Path(path + parseing_suffix), new Path(path + parsed_suffix));
        }
        return true;
    }
    return false;

}

From source file:tv.icntv.log.tools.FileApi.java

License:Apache License

@Override
public synchronized boolean writeDat(Path[] inputs, final String regular, Path output) {
    FileSystem fileSystem = null;
    BufferedReader reader = null;
    FSDataOutputStream outputStream = null;
    try {//from   ww w .  j av  a 2 s. c o m
        fileSystem = FileSystem.get(conf);
        //
        FileStatus[] fileStatuses = fileSystem.listStatus(inputs, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().matches(regular); //To change body of implemented methods use File | Settings | File Templates.
            }
        });
        if (null == fileStatuses || fileStatuses.length == 0) {
            System.out.println("null...");
            return false;
        }
        System.out.println(fileStatuses.length);
        outputStream = fileSystem.create(output, true, 40960);
        for (FileStatus status : fileStatuses) {
            if (regular.endsWith("lzo")) {
                reader = new BufferedReader(new InputStreamReader(
                        lzopInputStream.createInputStream(fileSystem.open(status.getPath())), "utf-8"));
            } else {
                reader = new BufferedReader(new InputStreamReader(fileSystem.open(status.getPath())));
            }
            String line = null;
            while (null != (line = reader.readLine())) {
                byte[] lineByte = (line + "\r\n").getBytes("utf-8");
                outputStream.write(lineByte, 0, lineByte.length);
            }
        }
    } catch (IOException e) {
        System.out.println(e);
        e.printStackTrace();
        return false;
    } finally {
        IOUtils.closeStream(reader);
        IOUtils.closeStream(outputStream);
        IOUtils.closeStream(fileSystem);
    }

    return true;

}

From source file:tv.icntv.logsys.HadoopRun.java

License:Apache License

protected List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override/* w w  w. java 2 s  .c  o m*/
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });

    if (null == fileStatuses || fileStatuses.length == 0) {
        logger.info("fileStatuses is null");
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if ((name.endsWith(".gz") || name.endsWith(".log")) && store.isExist(fromPath + separator + name)) {
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}

From source file:tv.icntv.logsys.HadoopRunMain.java

License:Apache License

protected List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override//  w w  w  .j av  a 2 s  .c  o  m
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });

    if (null == fileStatuses || fileStatuses.length == 0) {
        logger.info("fileStatuses is null");
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if (store.isExist(fromPath + separator + name)) { //(name.endsWith()||name.endsWith(".log")) &&
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}

From source file:tv.icntv.logsys.Main.java

License:Apache License

public List<Path> getFileStatus(String fromPath) {
    FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() {
        @Override/*from   ww  w  . j  ava  2s  .co m*/
        public boolean accept(Path path) {
            return path.getName().endsWith("writed");
        }
    });
    if (null == fileStatuses || fileStatuses.length == 0) {
        return null;
    }
    List<Path> list = Lists.newArrayList();
    for (FileStatus fileStatus : fileStatuses) {
        String name = fileStatus.getPath().getName();
        name = name.replace(".writed", "");
        if (name.endsWith(".gz") && store.isExist(fromPath + separator + name)) {
            list.add(new Path(fromPath + separator + name));
        }
    }
    return list;
}