List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:spark.func.movierecommendation.MovieRecommendationApp.java
License:Apache License
@Override public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext, Map<String, String> params) throws Exception { if (hadoopConfig == null) { return Lists.newArrayList(); }// w w w . j a v a2 s . c o m String outputPath = getJobOutputPath(hadoopConfig, params); FileSystem fileSystem = FileSystem.get(hadoopConfig); FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); List<List<String>> results = Lists.newArrayList(); for (int i = 0; i < status.length; i++) { FSDataInputStream is = fileSystem.open(status[i].getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line = br.readLine(); while (line != null) { Matcher matcher = RESULT_PATTERN.matcher(line); if (matcher.find()) { // group 1 - userid, group 2 - bookid, group 3 - rating, group 4 - title LOGGER.debug("SPARK RESULT: {} {} {}", matcher.group(2), matcher.group(3), matcher.group(4)); results.add(Lists.newArrayList(matcher.group(2), matcher.group(3), matcher.group(4))); } line = br.readLine(); } is.close(); } return results; }
From source file:spark.func.topn.TopNApp.java
License:Apache License
@Override public List<List<String>> fetchResults(Configuration hadoopConfig, JavaSparkContext sparkContext, Map<String, String> params) throws Exception { if (hadoopConfig == null) { return Lists.newArrayList(); }/*from w ww . ja v a 2s. co m*/ String outputPath = getJobOutputPath(hadoopConfig, params); FileSystem fileSystem = FileSystem.get(hadoopConfig); FileStatus[] status = fileSystem.listStatus(new Path(outputPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); List<List<String>> results = Lists.newArrayList(); for (int i = 0; i < status.length; i++) { FSDataInputStream is = fileSystem.open(status[i].getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line = br.readLine(); while (line != null) { String[] parts = line.replace("(", "").replace(")", "").split(","); if (parts.length >= 2) { LOGGER.debug("SPARK RESULT: {} {}", parts[0], parts[1]); results.add(Lists.newArrayList(parts[0], parts[1])); } line = br.readLine(); } is.close(); } return results; }
From source file:test.hiveserver.parse.SemanticAnalyzer.java
License:Apache License
private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException { // bypass for explain queries for now if (ctx.getExplain()) { return;//from w w w .j av a 2s. c o m } // user has told us to run in local mode or doesn't want auto-local mode if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { return; } final Context lCtx = ctx; PathFilter p = new PathFilter() { public boolean accept(Path file) { return !lCtx.isMRTmpFileURI(file.toUri().getPath()); } }; List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks); // map-reduce jobs will be run locally based on data size // first find out if any of the jobs needs to run non-locally boolean hasNonLocalJob = false; for (ExecDriver mrtask : mrtasks) { try { ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; if (globalLimitCtx != null && globalLimitCtx.isEnable()) { // If the global limit optimization is triggered, we will // estimate input data actually needed based on limit rows. // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2) // long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE); estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow; long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE); long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1; estimatedInput = estimatedInput * (estimatedNumMap + 1); } else { estimatedInput = inputSummary.getLength(); } if (LOG.isDebugEnabled()) { LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput); } if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) { hasNonLocalJob = true; break; } else { mrtask.setLocalMode(true); } } catch (IOException e) { throw new SemanticException(e); } } if (!hasNonLocalJob) { // none of the mapred tasks needs to be run locally. That means that the // query can be executed entirely in local mode. Save the current tracker // value and restore it when done ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT)); conf.setVar(HiveConf.ConfVars.HADOOPJT, "local"); console.printInfo("Automatically selecting local only mode for query"); // If all the tasks can be run locally, we can use local disk for // storing intermediate data. /** * This code is commented out pending further testing/development * for (Task<? extends Serializable> t: rootTasks) * t.localizeMRTmpFiles(ctx); */ } }
From source file:tv.icntv.grade.film.recommend.CorrelateJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); HadoopUtils.deleteIfExist(strings[1]); Job correlate = new Job(configuration, "icntv correlate job"); MapReduceUtils.initMapperJob(UserHistoryMapper.class, Text.class, Text.class, this.getClass(), correlate, getPaths(strings[0].split(","))); MapReduceUtils.initReducerJob(new Path(strings[1]), UserHistoryReducer.class, correlate); if (!correlate.waitForCompletion(true)) { return 1; }/*from ww w .j a v a 2s .c o m*/ ; Parameters parameter = getParameter(strings[2]); HadoopUtils.deleteIfExist(parameter.get("output")); PFPGrowth.runPFPGrowth(parameter, configuration); String output = parameter.get("output") + "/frequentpatterns"; long count = HadoopUtils.count(new Path(output), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates. } }); if (count == 0) { return 1; } configuration.setLong("icntv.correlate.total.size", count); HadoopUtils.deleteIfExist(strings[3]); Job result = new Job(configuration, "correlate result calculate"); MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result, new Path(output)); result.setInputFormatClass(SequenceFileInputFormat.class); // TableMapReduceUtil.initTableReducerJob(""); MapReduceUtils.initReducerJob(new Path(strings[3]), CorrelateOutPutReducer.class, result); result.waitForCompletion(true); return 0; //To change body of implemented methods use File | Settings | File Templates. }
From source file:tv.icntv.grade.film.recommend.CorrelateResultJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = super.getConf(); String output = strings[0] + "/frequentpatterns"; long count = HadoopUtils.count(new Path(output), new PathFilter() { @Override/* w w w . ja v a 2 s . c om*/ public boolean accept(Path path) { return path.getName().matches("part-r-\\d*"); //To change body of implemented methods use File | Settings | File Templates. } }); System.out.println("count =" + count); if (count == 0) { return 1; } configuration.setLong("icntv.correlate.total.size", count); Job result = new Job(configuration, "correlate result calculate"); MapReduceUtils.initMapperJob(CorrelateInputMapper.class, Text.class, Text.class, this.getClass(), result, new Path(output)); result.setInputFormatClass(SequenceFileInputFormat.class); // TableMapReduceUtil.initTableReducerJob(""); MapReduceUtils.initReducerJob(new Path(strings[1]), CorrelateOutPutReducer.class, result); result.waitForCompletion(true); return 0; //To change body of implemented methods use File | Settings | File Templates. }
From source file:tv.icntv.log.stb.filter.FilterJob.java
License:Apache License
@Override public boolean run(Map<String, String> maps) throws Exception { Configuration configuration = getConf(); // ????/*from ww w . j a v a 2 s . c o m*/ configuration.setBoolean("mapreduce.reduce.speculative", false); configuration.setBoolean("mapreduce.map.speculative", false); //setting conf Path input = new Path(maps.get(INPUT)); Path back = new Path(maps.get(BACK)); Path output = new Path(maps.get(OUTPUT_PREFIX)); configuration.set(OUTPUT_SUFFIX, maps.get(OUTPUT_SUFFIX)); configuration.set(OUTPUT_PREFIX, output.toString()); configuration.set(OTHER_PATH, maps.get(OTHER_PATH)); // configuration // Path input=new Path("/icntv/log/stb/2014-05-19/stb-2014-05-18-23.lzo_deflate"); // Path back=new Path("/icntv/parser/stb/filter/status/2014-05-18/"); // Path output=new Path("/icntv/parser/stb/filter/result/2014-05-18/"); Path[] in = HadoopUtils.createFile(input, back, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(file_success_suffix); //To change body of implemented methods use File | Settings | File Templates. } }, file_success_suffix, parseing_suffix, parsed_suffix); if (null == in || in.length == 0) { logger.info("input not exist;"); return false; } List<Path> inTemp = Lists.newArrayList(in); String ye = DateUtils.addDay(input.getName(), "yyyy-MM-dd", -1); Path prefix = new Path(input.getParent() + File.separator + ye, "stb-" + ye + "-23.lzo"); logger.info("prefix path ={}", prefix.toString()); if (HadoopUtils.isExist(prefix)) { logger.info("add today path= {}", prefix.toString()); inTemp.add(prefix); } String day = DateUtils.addDay(input.getName(), "yyyy-MM-dd", 1); Path nextPath = new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo"); logger.info("next path ={},writed path={}", nextPath.toString(), new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed")); if (HadoopUtils .isExist(new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"))) { logger.info("add today path= {}", nextPath.toString()); inTemp.add(nextPath); } logger.info("input size = {}", inTemp.size()); // inTemp.add(new Path(input.getParent()+ File.separator+ DateTime.now().toString("yyyy-MM-dd"),"") Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file"); //setting job configuration ..... stbFilterJob.setMapperClass(FilterMapper.class); stbFilterJob.setOutputKeyClass(NullWritable.class); stbFilterJob.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(stbFilterJob, inTemp.toArray(new Path[inTemp.size()])); stbFilterJob.setJarByClass(getClass()); FileOutputFormat.setOutputPath(stbFilterJob, output); LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class); stbFilterJob.setNumReduceTasks(0); if (stbFilterJob.waitForCompletion(true)) { ; for (Path path : in) { HadoopUtils.rename(new Path(path + parseing_suffix), new Path(path + parsed_suffix)); } return true; } return false; }
From source file:tv.icntv.log.tools.FileApi.java
License:Apache License
@Override public synchronized boolean writeDat(Path[] inputs, final String regular, Path output) { FileSystem fileSystem = null; BufferedReader reader = null; FSDataOutputStream outputStream = null; try {//from ww w . j av a 2 s. c o m fileSystem = FileSystem.get(conf); // FileStatus[] fileStatuses = fileSystem.listStatus(inputs, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches(regular); //To change body of implemented methods use File | Settings | File Templates. } }); if (null == fileStatuses || fileStatuses.length == 0) { System.out.println("null..."); return false; } System.out.println(fileStatuses.length); outputStream = fileSystem.create(output, true, 40960); for (FileStatus status : fileStatuses) { if (regular.endsWith("lzo")) { reader = new BufferedReader(new InputStreamReader( lzopInputStream.createInputStream(fileSystem.open(status.getPath())), "utf-8")); } else { reader = new BufferedReader(new InputStreamReader(fileSystem.open(status.getPath()))); } String line = null; while (null != (line = reader.readLine())) { byte[] lineByte = (line + "\r\n").getBytes("utf-8"); outputStream.write(lineByte, 0, lineByte.length); } } } catch (IOException e) { System.out.println(e); e.printStackTrace(); return false; } finally { IOUtils.closeStream(reader); IOUtils.closeStream(outputStream); IOUtils.closeStream(fileSystem); } return true; }
From source file:tv.icntv.logsys.HadoopRun.java
License:Apache License
protected List<Path> getFileStatus(String fromPath) { FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() { @Override/* w w w. java 2 s .c o m*/ public boolean accept(Path path) { return path.getName().endsWith("writed"); } }); if (null == fileStatuses || fileStatuses.length == 0) { logger.info("fileStatuses is null"); return null; } List<Path> list = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { String name = fileStatus.getPath().getName(); name = name.replace(".writed", ""); if ((name.endsWith(".gz") || name.endsWith(".log")) && store.isExist(fromPath + separator + name)) { list.add(new Path(fromPath + separator + name)); } } return list; }
From source file:tv.icntv.logsys.HadoopRunMain.java
License:Apache License
protected List<Path> getFileStatus(String fromPath) { FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() { @Override// w w w .j av a 2 s .c o m public boolean accept(Path path) { return path.getName().endsWith("writed"); } }); if (null == fileStatuses || fileStatuses.length == 0) { logger.info("fileStatuses is null"); return null; } List<Path> list = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { String name = fileStatus.getPath().getName(); name = name.replace(".writed", ""); if (store.isExist(fromPath + separator + name)) { //(name.endsWith()||name.endsWith(".log")) && list.add(new Path(fromPath + separator + name)); } } return list; }
From source file:tv.icntv.logsys.Main.java
License:Apache License
public List<Path> getFileStatus(String fromPath) { FileStatus[] fileStatuses = store.getFiles(fromPath, new PathFilter() { @Override/*from ww w . j ava 2s .co m*/ public boolean accept(Path path) { return path.getName().endsWith("writed"); } }); if (null == fileStatuses || fileStatuses.length == 0) { return null; } List<Path> list = Lists.newArrayList(); for (FileStatus fileStatus : fileStatuses) { String name = fileStatus.getPath().getName(); name = name.replace(".writed", ""); if (name.endsWith(".gz") && store.isExist(fromPath + separator + name)) { list.add(new Path(fromPath + separator + name)); } } return list; }