Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:RHBlockToKeyRangeIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: rhblockindex <in> <out>");
        System.exit(2);/*from  ww  w.j  ava 2  s .c om*/
    }
    Job job = new Job(conf, "rhblockindex");
    job.setJarByClass(RHBlockToKeyRangeIndex.class);
    job.setMapperClass(RMapper.class);
    job.setCombinerClass(RReducer.class);
    job.setReducerClass(RReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:DumpRecordsExtended.java

License:Apache License

/**
 * Runs this tool./*from   w ww  .  j a v  a 2 s  . c om*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);

    LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(DumpRecordsExtended.class.getSimpleName());
    job.setJarByClass(DumpRecordsExtended.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:LookupQuery.java

License:Apache License

public static void wordLookup(String word) throws IOException {

    key.set(word);/*from w  w w .  j a v a 2  s. c o  m*/
    reader.get(key, value);

    Writable w = reader.get(key, value);
    if (w == null) {
        return;
    }

    double idf = Math.log(numDocs / value.size()) / Math.log(2);
    double querytf = queryTF.get(word);
    double qfidf = querytf * idf;

    Qvalue += (qfidf * qfidf);

    for (PairOfInts pair : value) {

        //Open the collection and buffered reader.
        collection = fs.open(new Path(collectionPath));
        d = new BufferedReader(new InputStreamReader(collection));

        //This line seeks out the location of the document in the file with the given offset
        collection.seek(pair.getLeftElement());
        String s = d.readLine();
        String[] terms = s.split("\\s+");
        String filename = terms[1];

        double normalizedTF = pair.getnormTF();

        idf = Math.log(numDocs / value.size()) / Math.log(2);
        double tfidf = normalizedTF * idf;

        double TfidfQtfIdf = tfidf * (querytf * idf);

        if (!docTFIDF.containsKey(filename))
            docTFIDF.put(filename, TfidfQtfIdf);
        else {
            double temp = docTFIDF.get(filename);
            temp += TfidfQtfIdf;
            docTFIDF.remove(filename);
            docTFIDF.put(filename, temp);
        }

        collection.close();
        d.close();

    } //End for loop of PairOfInts in value

}

From source file:LookupQuery.java

License:Apache License

public static void CountDocs() throws IOException {
    collection = fs.open(new Path(collectionPath));
    d = new BufferedReader(new InputStreamReader(collection));

    numDocs = 0;/*from   w w  w  .  j  ava  2  s  .  c  o  m*/
    while (d.readLine() != null)
        numDocs++;

    d.close();
    collection.close();
}

From source file:StripesPMI_nocombiner.java

License:Apache License

/**
 * Runs this tool.//from   www.  j a  v  a  2 s  . com
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT) + "_TMP";// cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + StripesPMI_nocombiner.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job_first = Job.getInstance(getConf());
    job_first.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_first.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_first.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_first, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_first, new Path(outputPath));

    job_first.setMapOutputKeyClass(Text.class);
    job_first.setMapOutputValueClass(String2IntOpenHashMapWritable.class);
    job_first.setOutputKeyClass(PairOfStrings.class);// Text.class);// PairOfStrings.class);
    job_first.setOutputValueClass(DoubleWritable.class);
    job_first.setOutputFormatClass(TextOutputFormat.class);// changed

    job_first.setMapperClass(MyMapper_first.class);
    // job_first.setCombinerClass(MyCombiner.class);
    job_first.setReducerClass(MyReducer_first.class);

    long startTime = System.currentTimeMillis();
    job_first.waitForCompletion(true);

    // ////////////////START.: run the second MR job to just aggregate result////////////////
    inputPath = outputPath;// cmdline.getOptionValue(INPUT);
    outputPath = cmdline.getOptionValue(OUTPUT);

    Job job_second = Job.getInstance(getConf());
    job_second.setJobName(StripesPMI_nocombiner.class.getSimpleName());
    job_second.setJarByClass(StripesPMI_nocombiner.class);

    // Delete the output directory if it exists already.
    outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job_second.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job_second, new Path(inputPath));
    FileOutputFormat.setOutputPath(job_second, new Path(outputPath));

    job_second.setMapOutputKeyClass(Text.class);
    job_second.setMapOutputValueClass(DoubleWritable.class);
    job_second.setOutputKeyClass(Text.class);// PairOfStrings.class);
    job_second.setOutputValueClass(DoubleWritable.class);
    // job_second.setOutputFormatClass(TextOutputFormat.class);// changed

    job_second.setMapperClass(MyMapper_second.class);
    // job_second.setCombinerClass(MyCombiner.class);
    job_second.setReducerClass(MyReducer_second.class);

    job_second.waitForCompletion(true);

    // END////////////

    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:BuildInvertedIndex.java

License:Apache License

/**
     * Runs this tool./*from   w w  w .ja v a  2 s  . c o m*/
     */
    @SuppressWarnings({ "static-access" })
    public int run(String[] args) throws Exception {
        Options options = new Options();

        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
        options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
        options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
                .create(NUM_REDUCERS));

        CommandLine cmdline;
        CommandLineParser parser = new GnuParser();

        try {
            cmdline = parser.parse(options, args);
        } catch (ParseException exp) {
            System.err.println("Error parsing command line: " + exp.getMessage());
            return -1;
        }

        if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
            System.out.println("args: " + Arrays.toString(args));
            HelpFormatter formatter = new HelpFormatter();
            formatter.setWidth(120);
            formatter.printHelp(this.getClass().getName(), options);
            ToolRunner.printGenericCommandUsage(System.out);
            return -1;
        }

        String inputPath = cmdline.getOptionValue(INPUT);
        String outputPath = cmdline.getOptionValue(OUTPUT);
        int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
                : 1;

        LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName());
        LOG.info(" - input path: " + inputPath);
        LOG.info(" - output path: " + outputPath);
        LOG.info(" - num reducers: " + reduceTasks);

        Job job = Job.getInstance(getConf());
        job.setJobName(BuildInvertedIndex.class.getSimpleName());
        job.setJarByClass(BuildInvertedIndex.class);

        job.setNumReduceTasks(reduceTasks);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(PairOfInts.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(PairOfWritables.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        // Delete the output directory if it exists already.
        Path outputDir = new Path(outputPath);
        FileSystem.get(getConf()).delete(outputDir, true);

        long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

        return 0;
    }

From source file:FDFGenData.java

License:Open Source License

public static void testwritefile(String tabledir, int num) throws Exception {

    String rawtmp = "/tmp/raw/rawfile";

    FileSystem fs = FileSystem.get(new Configuration());
    FileStatus[] fss = fs.listStatus(new Path(tabledir));
    int x = 0;/*from   w  w w . j ava2 s .c  o m*/
    if (fss != null) {
        x = fss.length;
    }

    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 1), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 2), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 3), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 4), rawtmp, false, (short) -1);
    PT.testgenrawfiler(rawtmp, num);
    PT.testwritefdf(tabledir + "file" + (x + 5), rawtmp, false, (short) -1);
}

From source file:FDFGenData.java

License:Open Source License

static void testgenrawfile(FileSystem fs, String filename, int recordnum) throws IOException {
    Random r = new Random();
    FSDataOutputStream fos = fs.create(new Path(filename));
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < recordnum; i++) {
        fos.writeByte(i / 1000);/*  w  w w . java2  s.c  o m*/
        fos.writeShort(i / 1000);
        fos.writeInt(i / 1000);
        fos.writeLong(i / 1000);
        fos.writeFloat(i / 1000);
        fos.writeDouble(i / 1000);
        int strnum = r.nextInt(12) + 7;
        sb.setLength(0);
        for (int j = 0; j < strnum; j++) {
            sb.append((char) ('a' + j));
        }
        fos.writeUTF(sb.toString());
        if (i % 1000000 == 0) {
        }
    }
    fos.close();
}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentLineIndexInfo() {
    try {//from  w  w  w  .  j a v  a  2 s  .  c  o  m
        String fileName = prefix + "testPersistentLineIndexInfo";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        IndexInfo info = new IndexInfo();
        info.beginLine = 11;
        info.endLine = 22;
        info.offset = 33;
        info.len = 44;
        info.idx = 55;

        info.persistentLineIndexInfo(out);
        out.close();

        FSDataInputStream in = fs.open(path);

        int beginLine = in.readInt();
        int endLine = in.readInt();
        long offset = in.readLong();
        long len = in.readLong();
        int idx = in.readInt();
        in.close();

        if (beginLine != 11) {
            fail("beginLine fail:" + beginLine);
        }
        if (endLine != 22) {
            fail("endLine fail:" + endLine);
        }
        if (offset != 33) {
            fail("offset fail:" + offset);
        }
        if (len != 44) {
            fail("len fail:" + len);
        }
        if (idx != 55) {
            fail("idx fail:" + idx);
        }

    } catch (IOException e) {
        fail(e.getMessage());
    }

}

From source file:FormatStorageBasicTest.java

License:Open Source License

public void testPersistentKeyIndexInfo() {
    try {// ww  w  .j  a va 2s  .  c  om
        String fileName = prefix + "testPersistentKeyIndexInfo";
        Path path = new Path(fileName);
        FileSystem fs = FileSystem.get(new Configuration());
        FSDataOutputStream out = fs.create(path);

        IndexInfo info = new IndexInfo();
        info.beginKey = 111;
        info.endKey = 222;

        info.persistentKeyIndexInfo(out);
        out.close();

        FSDataInputStream in = fs.open(path);

        int beginKey = in.readInt();
        int endKey = in.readInt();
        in.close();

        if (beginKey != 111) {
            fail("beginKey fail:" + beginKey);
        }
        if (endKey != 222) {
            fail("beginKey fail:" + beginKey);
        }

    } catch (IOException e) {
        fail(e.getMessage());
    }

}