List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:CountJob.java
License:Apache License
public static void doJob(String param, String args[], String msgs) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); conf.set(TokenizerMapper.PATTERN, args[2]); FileSystem hdfs = FileSystem.get(conf); Path tempOutput1 = new Path("/data/output/temp/" + param + "1"); Path tempOutput2 = new Path("/data/output/temp/" + param + "2"); if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) { hdfs.delete(tempOutput1, true);//ww w . ja v a 2s . c o m hdfs.delete(tempOutput2, true); } Job job = new Job(conf, "word count"); job.setJarByClass(CountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, tempOutput1); job.waitForCompletion(true); Job sortJob1 = new Job(conf); sortJob1.setJobName("grep-sort"); FileInputFormat.setInputPaths(sortJob1, tempOutput1); sortJob1.setInputFormatClass(SequenceFileInputFormat.class); sortJob1.setMapperClass(InverseMapper.class); sortJob1.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob1, tempOutput2); sortJob1.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob1.waitForCompletion(true); hdfs.delete(tempOutput1, true); }
From source file:CountJob.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String msgs = ""; doJob("1", args, msgs); doJob("2", args, msgs); FileSystem hdfs = FileSystem.get(conf); BufferedReader bfr = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000")))); BufferedReader bfr2 = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000")))); Boolean same = true;//from ww w. j av a2 s . c o m String line1; String line2; line1 = bfr.readLine(); line2 = bfr2.readLine(); while (same == true) { if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) { same = false; break; } else if ((line1 == null && line2 == null)) { break; } else { if (line1.equals(line2)) { line1 = bfr.readLine(); line2 = bfr2.readLine(); } else { same = false; break; } } } if (same == true) { System.out.print("same " + same + "\n"); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } else { System.out.print("Different"); doJob("3", args, msgs); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true); hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true); }
From source file:FileAnalyzerTest.java
License:Open Source License
@Test public void testAvro() throws IOException, URISyntaxException { URL url = ClassLoader.getSystemResource("test_sample.avro"); Path pt = new Path(url.toURI()); FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs); DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.avro"); assert schema != null; SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.avro"); assert sampleData != null; }
From source file:FileAnalyzerTest.java
License:Open Source License
@Test(enabled = false) public void testOrc() throws IOException, URISyntaxException { URL url = ClassLoader.getSystemResource("test_sample.orc"); Path pt = new Path(url.toURI()); FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs); DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.orc"); assert schema != null; SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.orc"); assert sampleData != null; }
From source file:lab2_3.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem.get(conf).delete(new Path(args[1]), true); FileSystem.get(conf).delete(TMPDIR, true); Job first = Job.getInstance(conf, "drive time lab 2.2"); first.setJarByClass(lab2_3.class); first.setMapperClass(lab2_2.PartitioningMapper.class); first.setPartitionerClass(lab2_2.TypePartitioner.class); first.setReducerClass(lab2_2.IdentityReducer.class); first.setNumReduceTasks(6);/*from w ww . java 2 s . co m*/ first.setOutputKeyClass(IntWritable.class); first.setOutputValueClass(Text.class); FileInputFormat.addInputPath(first, new Path(args[0])); FileOutputFormat.setOutputPath(first, TMPDIR); int code = first.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job second = Job.getInstance(conf, "drive time lab 2.3"); second.setJarByClass(lab2_3.class); second.setMapperClass(MMMaper.class); second.setReducerClass(Reeeducer.class); second.setPartitionerClass(Partitioneeeer.class); second.setNumReduceTasks(6); second.setOutputKeyClass(Text.class); second.setOutputValueClass(lab2_1.Statistics.class); FileInputFormat.addInputPath(second, TMPDIR); FileOutputFormat.setOutputPath(second, new Path(args[1])); code = second.waitForCompletion(true) ? 0 : 1; } // FileSystem.get(conf).delete(TMPDIR, true); System.exit(code); }
From source file:Egg.java
License:Open Source License
/** Adds a path to the list of inputs for the map-reduce job * @param pathString The name of the path * @return The 'this' object *///from w ww .ja va 2 s .com @JSFunction public Egg addInput(String pathString) throws IOException { Path path = new Path(pathString); FileInputFormat.addInputPath(job, path); return this; }
From source file:Egg.java
License:Open Source License
/** Sets the list of inputs for the map-reduce job to the path * @param pathString The name of the path * @return The 'this' object */// ww w. ja va 2 s.c o m @JSFunction public Egg input(String pathString) throws IOException { Path path = new Path(pathString); FileInputFormat.setInputPaths(job, path); return this; }
From source file:Egg.java
License:Open Source License
/** Sets the output for the map-reduce job to the path * @param pathString The name of the path * @return The 'this' object */// ww w . j av a 2 s.com @JSFunction public Egg output(String pathString) { Path path = new Path(pathString); FileOutputFormat.setOutputPath(job, path); return this; }
From source file:Script.java
License:Open Source License
/** Evaluates the Javascript expressions contained in a * DataInputStream serialized file and passed over the distributed * cache./* www . j a va 2s.c o m*/ * @param conf The Hadoop configuration object * @param pathString The path string of the cached file * @param name The name of the file added to the cache * @return The result of the Javascript evaluation */ public Object evalCache(Configuration conf, String pathString, String name) throws IOException { FSDataInputStream in; FileSystem fs = FileSystem.getLocal(conf); try { Path path = new Path(pathString); in = fs.open(path); } catch (FileNotFoundException e) { // must be running in standalone mode Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name); in = fs.open(path); // read it from the eggshell script directory instead } String buf = in.readUTF(); in.close(); return evalString(buf); }
From source file:Script.java
License:Open Source License
/** Serialize the Javascript object into a file on HDFS and then add * the file to the distributed cache.// w ww . j a va2s . co m * @param conf The Hadoop configuration object * @param o The Javascript object to serialize * @param name The name of file to save the serialized object to */ public void serialize(Configuration conf, Object o, String name) throws IOException { FileSystem hdfs = FileSystem.get(conf); Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name); FSDataOutputStream out = hdfs.create(path); // create the file String buf; if (!(o instanceof NativeObject)) { buf = cx.toString(o); // serialize if (o instanceof NativeArray) buf = "[" + buf + "]"; // if array } else { buf = "{"; NativeObject obj = (NativeObject) o; Object[] propIds = obj.getPropertyIds(obj); for (Object propId : propIds) { String key = propId.toString(); Object value = obj.getProperty(obj, key); buf += key + ":" + cx.toString(value) + ","; } buf += "}"; } buf = "(" + buf + ")"; // force evaluation out.writeUTF(buf); out.close(); DistributedCache.addCacheFile(path.toUri(), conf); }