Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:CountJob.java

License:Apache License

public static void doJob(String param, String args[], String msgs)
        throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(TokenizerMapper.PATTERN, args[2]);
    FileSystem hdfs = FileSystem.get(conf);
    Path tempOutput1 = new Path("/data/output/temp/" + param + "1");
    Path tempOutput2 = new Path("/data/output/temp/" + param + "2");
    if (hdfs.exists(tempOutput1) || hdfs.exists(tempOutput2)) {
        hdfs.delete(tempOutput1, true);//ww w  .  ja  v a 2s  .  c o m
        hdfs.delete(tempOutput2, true);
    }

    Job job = new Job(conf, "word count");
    job.setJarByClass(CountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempOutput1);
    job.waitForCompletion(true);

    Job sortJob1 = new Job(conf);
    sortJob1.setJobName("grep-sort");
    FileInputFormat.setInputPaths(sortJob1, tempOutput1);
    sortJob1.setInputFormatClass(SequenceFileInputFormat.class);
    sortJob1.setMapperClass(InverseMapper.class);
    sortJob1.setNumReduceTasks(1); // write a single file
    FileOutputFormat.setOutputPath(sortJob1, tempOutput2);
    sortJob1.setSortComparatorClass( // sort by decreasing freq
            LongWritable.DecreasingComparator.class);
    sortJob1.waitForCompletion(true);
    hdfs.delete(tempOutput1, true);

}

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;//from   ww  w. j av  a2 s  .  c o m
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:FileAnalyzerTest.java

License:Open Source License

@Test
public void testAvro() throws IOException, URISyntaxException {

    URL url = ClassLoader.getSystemResource("test_sample.avro");
    Path pt = new Path(url.toURI());
    FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs);
    DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.avro");

    assert schema != null;
    SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.avro");
    assert sampleData != null;
}

From source file:FileAnalyzerTest.java

License:Open Source License

@Test(enabled = false)
public void testOrc() throws IOException, URISyntaxException {
    URL url = ClassLoader.getSystemResource("test_sample.orc");
    Path pt = new Path(url.toURI());
    FileAnalyzerFactory fileAnalyzerFactory = new FileAnalyzerFactory(fs);
    DatasetJsonRecord schema = fileAnalyzerFactory.getSchema(pt, "test_sample.orc");
    assert schema != null;

    SampleDataRecord sampleData = fileAnalyzerFactory.getSampleData(pt, "test_sample.orc");
    assert sampleData != null;
}

From source file:lab2_3.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem.get(conf).delete(new Path(args[1]), true);
    FileSystem.get(conf).delete(TMPDIR, true);

    Job first = Job.getInstance(conf, "drive time lab 2.2");
    first.setJarByClass(lab2_3.class);
    first.setMapperClass(lab2_2.PartitioningMapper.class);
    first.setPartitionerClass(lab2_2.TypePartitioner.class);
    first.setReducerClass(lab2_2.IdentityReducer.class);
    first.setNumReduceTasks(6);/*from w ww .  java  2  s  .  co m*/

    first.setOutputKeyClass(IntWritable.class);
    first.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(first, new Path(args[0]));

    FileOutputFormat.setOutputPath(first, TMPDIR);

    int code = first.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Job second = Job.getInstance(conf, "drive time lab 2.3");
        second.setJarByClass(lab2_3.class);
        second.setMapperClass(MMMaper.class);
        second.setReducerClass(Reeeducer.class);
        second.setPartitionerClass(Partitioneeeer.class);

        second.setNumReduceTasks(6);
        second.setOutputKeyClass(Text.class);
        second.setOutputValueClass(lab2_1.Statistics.class);

        FileInputFormat.addInputPath(second, TMPDIR);
        FileOutputFormat.setOutputPath(second, new Path(args[1]));
        code = second.waitForCompletion(true) ? 0 : 1;
    }
    //        FileSystem.get(conf).delete(TMPDIR, true);
    System.exit(code);
}

From source file:Egg.java

License:Open Source License

/** Adds a path to the list of inputs for the map-reduce job
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///from w ww .ja  va 2 s  .com
@JSFunction
public Egg addInput(String pathString) throws IOException {
    Path path = new Path(pathString);
    FileInputFormat.addInputPath(job, path);
    return this;
}

From source file:Egg.java

License:Open Source License

/** Sets the list of inputs for the map-reduce job to the path
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///  ww  w.  ja va 2 s.c  o m
@JSFunction
public Egg input(String pathString) throws IOException {
    Path path = new Path(pathString);
    FileInputFormat.setInputPaths(job, path);
    return this;
}

From source file:Egg.java

License:Open Source License

/** Sets the output for the map-reduce job to the path
 *  @param pathString  The name of the path
 *  @return            The 'this' object
 *///  ww w  .  j  av  a  2 s.com
@JSFunction
public Egg output(String pathString) {
    Path path = new Path(pathString);
    FileOutputFormat.setOutputPath(job, path);
    return this;
}

From source file:Script.java

License:Open Source License

/** Evaluates the Javascript expressions contained in a
 *  DataInputStream serialized file and passed over the distributed
 *  cache./* www .  j  a  va 2s.c o m*/
 *  @param conf       The Hadoop configuration object
 *  @param pathString The path string of the cached file
 *  @param name       The name of the file added to the cache
 *  @return           The result of the Javascript evaluation
 */
public Object evalCache(Configuration conf, String pathString, String name) throws IOException {
    FSDataInputStream in;
    FileSystem fs = FileSystem.getLocal(conf);
    try {
        Path path = new Path(pathString);
        in = fs.open(path);
    } catch (FileNotFoundException e) { // must be running in standalone mode
        Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name);
        in = fs.open(path); // read it from the eggshell script directory instead
    }
    String buf = in.readUTF();
    in.close();
    return evalString(buf);
}

From source file:Script.java

License:Open Source License

/** Serialize the Javascript object into a file on HDFS and then add
 *  the file to the distributed cache.//  w ww . j  a va2s  . co  m
 *  @param conf       The Hadoop configuration object
 *  @param o          The Javascript object to serialize
 *  @param name       The name of file to save the serialized object to
 */
public void serialize(Configuration conf, Object o, String name) throws IOException {
    FileSystem hdfs = FileSystem.get(conf);
    Path path = new Path(Eggshell.SCRIPT_DIR + "/" + name);
    FSDataOutputStream out = hdfs.create(path); // create the file
    String buf;
    if (!(o instanceof NativeObject)) {
        buf = cx.toString(o); // serialize
        if (o instanceof NativeArray)
            buf = "[" + buf + "]"; // if array
    } else {
        buf = "{";
        NativeObject obj = (NativeObject) o;
        Object[] propIds = obj.getPropertyIds(obj);
        for (Object propId : propIds) {
            String key = propId.toString();
            Object value = obj.getProperty(obj, key);
            buf += key + ":" + cx.toString(value) + ",";
        }
        buf += "}";
    }
    buf = "(" + buf + ")"; // force evaluation
    out.writeUTF(buf);
    out.close();
    DistributedCache.addCacheFile(path.toUri(), conf);
}