List of usage examples for org.apache.hadoop.util StringUtils stringToPath
public static Path[] stringToPath(String[] str)
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Sets the given comma separated paths as the list of inputs * for the map-reduce job.//from ww w . ja v a 2s . c om * * @param conf Configuration of the job * @param commaSeparatedPaths Comma separated paths to be set as * the list of inputs for the map-reduce job. */ public static void setInputPaths(JobConf conf, String commaSeparatedPaths) { setInputPaths(conf, StringUtils.stringToPath(getPathStrings(commaSeparatedPaths))); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Sets the given comma separated paths as the list of inputs * for the map-reduce job.//from w w w.ja v a 2 s.c om * * @param job the job * @param commaSeparatedPaths Comma separated paths to be set as * the list of inputs for the map-reduce job. */ public static void setInputPaths(Job job, String commaSeparatedPaths) throws IOException { setInputPaths(job, StringUtils.stringToPath(getPathStrings(commaSeparatedPaths))); }
From source file:hitune.analysis.mapreduce.processor.FileFilter.ChukwaFileFilter.java
License:Apache License
protected boolean inputValidation(Configuration job, String dir, PathFilter filter) { boolean result = false; if (filter == null) { filter = new PathFilter() { @Override/*from w w w. ja v a2s .c o m*/ public boolean accept(Path path) { // TODO Auto-generated method stub return true; } }; } Path[] p = StringUtils.stringToPath(new String[] { dir }); try { FileSystem fs = p[0].getFileSystem(job); FileStatus[] matches = fs.globStatus(p[0], filter); if (matches != null && matches.length != 0) { result = true; } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return result; }
From source file:org.apache.giraph.io.hcatalog.GiraphHCatInputFormat.java
License:Apache License
/** * Set input path for job./*from w w w . ja v a2s .c om*/ * * @param jobConf Job configuration * @param location Location of input files * @throws IOException */ private void setInputPath(JobConf jobConf, String location) throws IOException { int length = location.length(); int curlyOpen = 0; int pathStart = 0; boolean globPattern = false; List<String> pathStrings = new ArrayList<String>(); for (int i = 0; i < length; i++) { char ch = location.charAt(i); switch (ch) { case '{': curlyOpen++; if (!globPattern) { globPattern = true; } break; case '}': curlyOpen--; if (curlyOpen == 0 && globPattern) { globPattern = false; } break; case ',': if (!globPattern) { pathStrings.add(location.substring(pathStart, i)); pathStart = i + 1; } break; default: } } pathStrings.add(location.substring(pathStart, length)); Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); FileSystem fs = FileSystem.get(jobConf); Path path = paths[0].makeQualified(fs); StringBuilder str = new StringBuilder(StringUtils.escapeString(path.toString())); for (int i = 1; i < paths.length; i++) { str.append(StringUtils.COMMA_STR); path = paths[i].makeQualified(fs); str.append(StringUtils.escapeString(path.toString())); } jobConf.set("mapred.input.dir", str.toString()); }
From source file:org.apache.hama.bsp.FileInputFormat.java
License:Apache License
/** * Sets the given comma separated paths as the list of inputs for the BSP job. * /*from w ww.j a v a2s . c o m*/ * @param conf Configuration of the job * @param commaSeparatedPaths Comma separated paths to be set as the list of * inputs for the BSP job. */ public static void setInputPaths(BSPJob conf, String commaSeparatedPaths) { setInputPaths(conf, StringUtils.stringToPath(getPathStrings(commaSeparatedPaths))); }
From source file:org.apache.hcatalog.mapreduce.HCatBaseInputFormat.java
License:Apache License
private void setInputPath(JobConf jobConf, String location) throws IOException { // ideally we should just call FileInputFormat.setInputPaths() here - but // that won't work since FileInputFormat.setInputPaths() needs // a Job object instead of a JobContext which we are handed here int length = location.length(); int curlyOpen = 0; int pathStart = 0; boolean globPattern = false; List<String> pathStrings = new ArrayList<String>(); for (int i = 0; i < length; i++) { char ch = location.charAt(i); switch (ch) { case '{': { curlyOpen++;// w w w . j ava 2 s. co m if (!globPattern) { globPattern = true; } break; } case '}': { curlyOpen--; if (curlyOpen == 0 && globPattern) { globPattern = false; } break; } case ',': { if (!globPattern) { pathStrings.add(location.substring(pathStart, i)); pathStart = i + 1; } break; } } } pathStrings.add(location.substring(pathStart, length)); Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); String separator = ""; StringBuilder str = new StringBuilder(); for (Path path : paths) { FileSystem fs = path.getFileSystem(jobConf); final String qualifiedPath = fs.makeQualified(path).toString(); str.append(separator).append(StringUtils.escapeString(qualifiedPath)); separator = StringUtils.COMMA_STR; } jobConf.set("mapred.input.dir", str.toString()); }
From source file:uk.ac.cam.eng.extraction.hadoop.merge.MergeJob.java
License:Apache License
public int run(String[] args) throws IllegalArgumentException, IllegalAccessException, IOException, ClassNotFoundException, InterruptedException { MergeJobParameters params = new MergeJobParameters(); JCommander cmd = new JCommander(params); try {/*from w w w. j a v a 2 s . co m*/ cmd.parse(args); Configuration conf = getConf(); Util.ApplyConf(cmd, "", conf); Job job = getJob(conf); String[] featurePathNames = params.input_features.split(","); Path[] featurePaths = StringUtils.stringToPath(featurePathNames); for (Path featurePath : featurePaths) { MultipleInputs.addInputPath(job, featurePath, SequenceFileInputFormat.class, MergeFeatureMapper.class); } Path rulePath = new Path(params.input_rules); MultipleInputs.addInputPath(job, rulePath, SequenceFileInputFormat.class, MergeRuleMapper.class); FileOutputFormat.setOutputPath(job, new Path(params.output)); return job.waitForCompletion(true) ? 0 : 1; } catch (ParameterException e) { System.err.println(e.getMessage()); cmd.usage(); } return 1; }