Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);//ww w  .  ja  v a 2s  .  c o m
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;//from   ww  w. j av  a2 s  .  c o m
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:FileAnalyzerTest.java

License:Open Source License

@Test
public void testAvro() throws IOException, URISyntaxException {

    URL url = ClassLoader.getSystemResource("test_sample.avro");
    Path pt = new Path(url.toURI());
    FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs);
    DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.avro");

    assert schema != null;
    SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.avro");
    assert sampleData != null;
}

From source file:FileAnalyzerTest.java

License:Open Source License

@Test(enabled = false)
public void testOrc() throws IOException, URISyntaxException {
    URL url = ClassLoader.getSystemResource("test_sample.orc");
    Path pt = new Path(url.toURI());
    FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs);
    DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.orc");
    assert schema != null;

    SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.orc");
    assert sampleData != null;
}

From source file:lab2_3.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);
    FileSystem.get(conf).delete(TMPDIR, true);

    Job first = Job.getInstance(conf, "drive time lab 2.2");
    first.setJarByClass(lab2_3.class);
    first.setMapperClass(lab2_2.PartitioningMapper.class);
    first.setPartitionerClass(lab2_2.TypePartitioner.class);
    first.setReducerClass(lab2_2.IdentityReducer.class);
    first.setNumReduceTasks(6);/*from w ww .  java  2  s  .  co m*/

    first.setOutputKeyClass(IntWritable.class);
    first.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(first, new Path(args[0]));

    FileOutputFormat.setOutputPath(first, TMPDIR);

    int code = first.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Job second = Job.getInstance(conf, "drive time lab 2.3");
        second.setJarByClass(lab2_3.class);
        second.setMapperClass(MMMaper.class);
        second.setReducerClass(Reeeducer.class);
        second.setPartitionerClass(Partitioneeeer.class);

        second.setNumReduceTasks(6);
        second.setOutputKeyClass(Text.class);
        second.setOutputValueClass(lab2_1.Statistics.class);

        FileInputFormat.addInputPath(second, TMPDIR);
        FileOutputFormat.setOutputPath(second, new Path(args[1]));
        code = second.waitForCompletion(true) ? 0 : 1;
    }
    //        FileSystem.get(conf).delete(TMPDIR, true);
    System.exit(code);
}

From source file:Egg.java

License:Open Source License

/** Adds a path to the list of inputs for the map-reduce job
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///from w ww .ja  va 2 s  .com
@JSFunction
public Egg addInput(String pathString) throws IOException {
    Path path = new Path(pathString);
    FileInputFormat.addInputPath(job, path);
    return this;
}

From source file:Egg.java

License:Open Source License

/** Sets the list of inputs for the map-reduce job to the path
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///  ww  w.  ja va 2 s.c  o m
@JSFunction
public Egg input(String pathString) throws IOException {
    Path path = new Path(pathString);
    FileInputFormat.setInputPaths(job, path);
    return this;
}

From source file:Egg.java

License:Open Source License

/** Sets the output for the map-reduce job to the path
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///  ww w  .  j  av  a  2 s.com
@JSFunction
public Egg output(String pathString) {
    Path path = new Path(pathString);
    FileOutputFormat.setOutputPath(job, path);
    return this;
}

From source file:Script.java

License:Open Source License

/** Evaluates the Javascript expressions contained in a
 *  DataInputStream serialized file and passed over the distributed
 *  cache./* www .  j  a  va 2s.c o m*/
 *  @param conf       The Hadoop configuration object
 *  @param pathString The path string of the cached file
 *  @param name       The name of the file added to the cache
 *  @return           The result of the Javascript evaluation
 */
public Object evalCache(Configuration conf, String pathString, String name) throws IOException {
    FSDataInputStream in;
    FileSystem fs = FileSystem.getLocal(conf);
    try {
        Path path = new Path(pathString);
        in = fs.open(path);
    } catch (FileNotFoundException e) { // must be running in standalone mode
        Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name);
        in = fs.open(path); // read it from the eggshell script directory instead
    }
    String buf = in.readUTF();
    in.close();
    return evalString(buf);
}

From source file:Script.java

License:Open Source License

/** Serialize the Javascript object into a file on HDFS and then add
 *  the file to the distributed cache.//  w ww . j  a va2s  . co  m
 *  @param conf       The Hadoop configuration object
 *  @param o          The Javascript object to serialize
 *  @param name       The name of file to save the serialized object to
 */
public void serialize(Configuration conf, Object o, String name) throws IOException {
    FileSystem hdfs = FileSystem.get(conf);
    Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name);
    FSDataOutputStream out = hdfs.create(path); // create the file
    String buf;
    if (!(o instanceof NativeObject)) {
        buf = cx.toString(o); // serialize
        if (o instanceof NativeArray)
            buf = "[" + buf + "]"; // if array
    } else {
        buf = "{";
        NativeObject obj = (NativeObject) o;
        Object[] propIds = obj.getPropertyIds(obj);
        for (Object propId : propIds) {
            String key = propId.toString();
            Object value = obj.getProperty(obj, key);
            buf += key + ":" + cx.toString(value) + ",";
        }
        buf += "}";
    }
    buf = "(" + buf + ")"; // force evaluation
    out.writeUTF(buf);
    out.close();
    DistributedCache.addCacheFile(path.toUri(), conf);
}