List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:RHBlockToKeyRangeIndex.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: rhblockindex <in> <out>"); System.exit(2);/*from ww w.j ava 2 s .c om*/ } Job job = new Job(conf, "rhblockindex"); job.setJarByClass(RHBlockToKeyRangeIndex.class); job.setMapperClass(RMapper.class); job.setCombinerClass(RReducer.class); job.setReducerClass(RReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:DumpRecordsExtended.java
License:Apache License
/** * Runs this tool./*from w ww . j a v a 2 s . c om*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); LOG.info("Tool name: " + DumpRecordsExtended.class.getSimpleName()); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); Configuration conf = new Configuration(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(DumpRecordsExtended.class.getSimpleName()); job.setJarByClass(DumpRecordsExtended.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:LookupQuery.java
License:Apache License
public static void wordLookup(String word) throws IOException { key.set(word);/*from w w w . j a v a 2 s. c o m*/ reader.get(key, value); Writable w = reader.get(key, value); if (w == null) { return; } double idf = Math.log(numDocs / value.size()) / Math.log(2); double querytf = queryTF.get(word); double qfidf = querytf * idf; Qvalue += (qfidf * qfidf); for (PairOfInts pair : value) { //Open the collection and buffered reader. collection = fs.open(new Path(collectionPath)); d = new BufferedReader(new InputStreamReader(collection)); //This line seeks out the location of the document in the file with the given offset collection.seek(pair.getLeftElement()); String s = d.readLine(); String[] terms = s.split("\\s+"); String filename = terms[1]; double normalizedTF = pair.getnormTF(); idf = Math.log(numDocs / value.size()) / Math.log(2); double tfidf = normalizedTF * idf; double TfidfQtfIdf = tfidf * (querytf * idf); if (!docTFIDF.containsKey(filename)) docTFIDF.put(filename, TfidfQtfIdf); else { double temp = docTFIDF.get(filename); temp += TfidfQtfIdf; docTFIDF.remove(filename); docTFIDF.put(filename, temp); } collection.close(); d.close(); } //End for loop of PairOfInts in value }
From source file:LookupQuery.java
License:Apache License
public static void CountDocs() throws IOException { collection = fs.open(new Path(collectionPath)); d = new BufferedReader(new InputStreamReader(collection)); numDocs = 0;/*from w w w . j ava 2 s . c o m*/ while (d.readLine() != null) numDocs++; d.close(); collection.close(); }
From source file:StripesPMI_nocombiner.java
License:Apache License
/** * Runs this tool.//from www. j a v a 2 s . com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT) + "_TMP";// cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + StripesPMI_nocombiner.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Job job_first = Job.getInstance(getConf()); job_first.setJobName(StripesPMI_nocombiner.class.getSimpleName()); job_first.setJarByClass(StripesPMI_nocombiner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job_first.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_first, new Path(inputPath)); FileOutputFormat.setOutputPath(job_first, new Path(outputPath)); job_first.setMapOutputKeyClass(Text.class); job_first.setMapOutputValueClass(String2IntOpenHashMapWritable.class); job_first.setOutputKeyClass(PairOfStrings.class);// Text.class);// PairOfStrings.class); job_first.setOutputValueClass(DoubleWritable.class); job_first.setOutputFormatClass(TextOutputFormat.class);// changed job_first.setMapperClass(MyMapper_first.class); // job_first.setCombinerClass(MyCombiner.class); job_first.setReducerClass(MyReducer_first.class); long startTime = System.currentTimeMillis(); job_first.waitForCompletion(true); // ////////////////START.: run the second MR job to just aggregate result//////////////// inputPath = outputPath;// cmdline.getOptionValue(INPUT); outputPath = cmdline.getOptionValue(OUTPUT); Job job_second = Job.getInstance(getConf()); job_second.setJobName(StripesPMI_nocombiner.class.getSimpleName()); job_second.setJarByClass(StripesPMI_nocombiner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job_second.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job_second, new Path(inputPath)); FileOutputFormat.setOutputPath(job_second, new Path(outputPath)); job_second.setMapOutputKeyClass(Text.class); job_second.setMapOutputValueClass(DoubleWritable.class); job_second.setOutputKeyClass(Text.class);// PairOfStrings.class); job_second.setOutputValueClass(DoubleWritable.class); // job_second.setOutputFormatClass(TextOutputFormat.class);// changed job_second.setMapperClass(MyMapper_second.class); // job_second.setCombinerClass(MyCombiner.class); job_second.setReducerClass(MyReducer_second.class); job_second.waitForCompletion(true); // END//////////// System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:BuildInvertedIndex.java
License:Apache License
/** * Runs this tool./*from w w w .ja v a 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BuildInvertedIndex.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BuildInvertedIndex.class.getSimpleName()); job.setJarByClass(BuildInvertedIndex.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(PairOfInts.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(PairOfWritables.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:FDFGenData.java
License:Open Source License
public static void testwritefile(String tabledir, int num) throws Exception { String rawtmp = "/tmp/raw/rawfile"; FileSystem fs = FileSystem.get(new Configuration()); FileStatus[] fss = fs.listStatus(new Path(tabledir)); int x = 0;/*from w w w . j ava2 s .c o m*/ if (fss != null) { x = fss.length; } PT.testgenrawfiler(rawtmp, num); PT.testwritefdf(tabledir + "file" + (x + 1), rawtmp, false, (short) -1); PT.testgenrawfiler(rawtmp, num); PT.testwritefdf(tabledir + "file" + (x + 2), rawtmp, false, (short) -1); PT.testgenrawfiler(rawtmp, num); PT.testwritefdf(tabledir + "file" + (x + 3), rawtmp, false, (short) -1); PT.testgenrawfiler(rawtmp, num); PT.testwritefdf(tabledir + "file" + (x + 4), rawtmp, false, (short) -1); PT.testgenrawfiler(rawtmp, num); PT.testwritefdf(tabledir + "file" + (x + 5), rawtmp, false, (short) -1); }
From source file:FDFGenData.java
License:Open Source License
static void testgenrawfile(FileSystem fs, String filename, int recordnum) throws IOException { Random r = new Random(); FSDataOutputStream fos = fs.create(new Path(filename)); StringBuffer sb = new StringBuffer(); for (int i = 0; i < recordnum; i++) { fos.writeByte(i / 1000);/* w w w . java2 s.c o m*/ fos.writeShort(i / 1000); fos.writeInt(i / 1000); fos.writeLong(i / 1000); fos.writeFloat(i / 1000); fos.writeDouble(i / 1000); int strnum = r.nextInt(12) + 7; sb.setLength(0); for (int j = 0; j < strnum; j++) { sb.append((char) ('a' + j)); } fos.writeUTF(sb.toString()); if (i % 1000000 == 0) { } } fos.close(); }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testPersistentLineIndexInfo() { try {//from w w w . j a v a 2 s . c o m String fileName = prefix + "testPersistentLineIndexInfo"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); IndexInfo info = new IndexInfo(); info.beginLine = 11; info.endLine = 22; info.offset = 33; info.len = 44; info.idx = 55; info.persistentLineIndexInfo(out); out.close(); FSDataInputStream in = fs.open(path); int beginLine = in.readInt(); int endLine = in.readInt(); long offset = in.readLong(); long len = in.readLong(); int idx = in.readInt(); in.close(); if (beginLine != 11) { fail("beginLine fail:" + beginLine); } if (endLine != 22) { fail("endLine fail:" + endLine); } if (offset != 33) { fail("offset fail:" + offset); } if (len != 44) { fail("len fail:" + len); } if (idx != 55) { fail("idx fail:" + idx); } } catch (IOException e) { fail(e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testPersistentKeyIndexInfo() { try {// ww w .j a va 2s . c om String fileName = prefix + "testPersistentKeyIndexInfo"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); IndexInfo info = new IndexInfo(); info.beginKey = 111; info.endKey = 222; info.persistentKeyIndexInfo(out); out.close(); FSDataInputStream in = fs.open(path); int beginKey = in.readInt(); int endKey = in.readInt(); in.close(); if (beginKey != 111) { fail("beginKey fail:" + beginKey); } if (endKey != 222) { fail("beginKey fail:" + beginKey); } } catch (IOException e) { fail(e.getMessage()); } }