Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:RHBlockToKeyRangeIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: rhblockindex <in> <out>");
        System.exit(2);/*from  ww  w.j  ava 2  s .c om*/
    }
    Job job = new Job(conf, "rhblockindex");
    job.setJarByClass(RHBlockToKeyRangeIndex.class);
    job.setMapperClass(RMapper.class);
    job.setCombinerClass(RReducer.class);
    job.setReducerClass(RReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DumpRecordsExtended.java

License:Apache License

/**
 * Runs this tool./*from   w ww  .  j a v  a 2 s  . c om*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);

    LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(DumpRecordsExtended.class.getSimpleName());
    job.setJarByClass(DumpRecordsExtended.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:LookupQuery.java

License:Apache License

public static void wordLookup(String word) throws IOException {

    key.set(word);/*from w  w w .  j a v a 2  s. c o  m*/
    reader.get(key, value);

    Writable w = reader.get(key, value);
    if (w == null) {
        return;
    }

    double idf = Math.log(numDocs / value.size()) / Math.log(2);
    double querytf = queryTF.get(word);
    double qfidf = querytf * idf;

    Qvalue += (qfidf * qfidf);

    for (PairOfInts pair : value) {

        //Open the collection and buffered reader.
        collection = fs.open(new Path(collectionPath));
        d = new BufferedReader(new InputStreamReader(collection));

        //This line seeks out the location of the document in the file with the given offset
        collection.seek(pair.getLeftElement());
        String s = d.readLine();
        String[] terms = s.split("\\s+");
        String filename = terms[1];

        double normalizedTF = pair.getnormTF();

        idf = Math.log(numDocs / value.size()) / Math.log(2);
        double tfidf = normalizedTF * idf;

        double TfidfQtfIdf = tfidf * (querytf * idf);

        if (!docTFIDF.containsKey(filename))
            docTFIDF.put(filename, TfidfQtfIdf);
        else {
            double temp = docTFIDF.get(filename);
            temp += TfidfQtfIdf;
            docTFIDF.remove(filename);
            docTFIDF.put(filename, temp);
        }

        collection.close();
        d.close();

    } //End for loop of PairOfInts in value

}

From source file:LookupQuery.java

License:Apache License

public static void CountDocs() throws IOException {
    collection = fs.open(new Path(collectionPath));
    d = new BufferedReader(new InputStreamReader(collection));

    numDocs = 0;/*from   w w  w  .  j  ava  2  s  .  c  o  m*/
    while (d.readLine() != null)
        numDocs++;

    d.close();
    collection.close();
}

From source file:StripesPMI_nocombiner.java

License:Apache License

/**
 * Runs this tool.//from   www.  j a  v  a  2 s  . com
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT) + "_TMP";// cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + StripesPMI_nocombiner.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_first = Job.getInstance(getConf());
    job_first.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_first.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_first.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_first, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_first, new Path(outputPath));

    job_first.setMapOutputKeyClass(Text.class);
    job_first.setMapOutputValueClass(String2IntOpenHashMapWritable.class);
    job_first.setOutputKeyClass(PairOfStrings.class);// Text.class);// PairOfStrings.class);
    job_first.setOutputValueClass(DoubleWritable.class);
    job_first.setOutputFormatClass(TextOutputFormat.class);// changed

    job_first.setMapperClass(MyMapper_first.class);
    // job_first.setCombinerClass(MyCombiner.class);
    job_first.setReducerClass(MyReducer_first.class);

    long startTime = System.currentTimeMillis();
    job_first.waitForCompletion(true);

    // ////////////////START.: run the second MR job to just aggregate result////////////////
    inputPath = outputPath;// cmdline.getOptionValue(INPUT);
    outputPath = cmdline.getOptionValue(OUTPUT);

    Job job_second = Job.getInstance(getConf());
    job_second.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_second.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_second.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_second, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_second, new Path(outputPath));

    job_second.setMapOutputKeyClass(Text.class);
    job_second.setMapOutputValueClass(DoubleWritable.class);
    job_second.setOutputKeyClass(Text.class);// PairOfStrings.class);
    job_second.setOutputValueClass(DoubleWritable.class);
    // job_second.setOutputFormatClass(TextOutputFormat.class);// changed

    job_second.setMapperClass(MyMapper_second.class);
    // job_second.setCombinerClass(MyCombiner.class);
    job_second.setReducerClass(MyReducer_second.class);

    job_second.waitForCompletion(true);

    // END////////////

    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:BuildInvertedIndex.java

License:Apache License

/**
     * Runs this tool./*from   w w  w .ja v a  2 s  . c o m*/
     */
    @SuppressWarnings({ "static-access" })
    public int run(String[] args) throws Exception {
        Options options = new Options();

        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
        options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
                .create(NUM_REDUCERS));

        CommandLine cmdline;
        CommandLineParser parser = new GnuParser();

        try {
            cmdline = parser.parse(options, args);
        } catch (ParseException exp) {
            System.err.println("Error parsing command line: " + exp.getMessage());
            return -1;
        }

        if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
            System.out.println("args: " + Arrays.toString(args));
            HelpFormatter formatter = new HelpFormatter();
            formatter.setWidth(120);
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            return -1;
        }

        String inputPath = cmdline.getOptionValue(INPUT);
        String outputPath = cmdline.getOptionValue(OUTPUT);
        int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
                : 1;

        LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName());
        LOG.info(" - input path: " + inputPath);
        LOG.info(" - output path: " + outputPath);
        LOG.info(" - num reducers: " + reduceTasks);

        Job job = Job.getInstance(getConf());
        job.setJobName(BuildInvertedIndex.class.getSimpleName());
        job.setJarByClass(BuildInvertedIndex.class);

        job.setNumReduceTasks(reduceTasks);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(PairOfInts.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(PairOfWritables.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        // Delete the output directory if it exists already.
        Path outputDir = new Path(outputPath);
        FileSystem.get(getConf()).delete(outputDir, true);

        long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        return 0;
    }

From source file:FDFGenData.java

License:Open Source License

public static void testwritefile(String tabledir, int num) throws Exception {

    String rawtmp = "/tmp/raw/rawfile";

    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] fss = fs.listStatus(new Path(tabledir));
    int x = 0;/*from   w  w w . j ava2 s .c  o m*/
    if (fss != null) {
        x = fss.length;
    }

    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 1), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 2), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 3), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 4), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 5), rawtmp, false, (short) -1);
}

From source file:FDFGenData.java

License:Open Source License

static void testgenrawfile(FileSystem fs, String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(i / 1000);/*  w  w w . java2  s.c  o m*/
        fos.writeShort(i / 1000);
        fos.writeInt(i / 1000);
        fos.writeLong(i / 1000);
        fos.writeFloat(i / 1000);
        fos.writeDouble(i / 1000);
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + j));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentLineIndexInfo() {
    try {//from  w  w  w  .  j a v  a  2 s  .  c  o  m
        String fileName = prefix + "testPersistentLineIndexInfo";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        IndexInfo info = new IndexInfo();
        info.beginLine = 11;
        info.endLine = 22;
        info.offset = 33;
        info.len = 44;
        info.idx = 55;

        info.persistentLineIndexInfo(out);
        out.close();

        FSDataInputStream in = fs.open(path);

        int beginLine = in.readInt();
        int endLine = in.readInt();
        long offset = in.readLong();
        long len = in.readLong();
        int idx = in.readInt();
        in.close();

        if (beginLine != 11) {
            fail("beginLine fail:" + beginLine);
        }
        if (endLine != 22) {
            fail("endLine fail:" + endLine);
        }
        if (offset != 33) {
            fail("offset fail:" + offset);
        }
        if (len != 44) {
            fail("len fail:" + len);
        }
        if (idx != 55) {
            fail("idx fail:" + idx);
        }

    } catch (IOException e) {
        fail(e.getMessage());
    }

}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentKeyIndexInfo() {
    try {// ww  w  .j  a va 2s  .  c  om
        String fileName = prefix + "testPersistentKeyIndexInfo";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        IndexInfo info = new IndexInfo();
        info.beginKey = 111;
        info.endKey = 222;

        info.persistentKeyIndexInfo(out);
        out.close();

        FSDataInputStream in = fs.open(path);

        int beginKey = in.readInt();
        int endKey = in.readInt();
        in.close();

        if (beginKey != 111) {
            fail("beginKey fail:" + beginKey);
        }
        if (endKey != 222) {
            fail("beginKey fail:" + beginKey);
        }

    } catch (IOException e) {
        fail(e.getMessage());
    }

}