List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:BigramRelativeFrequency.java
License:Apache License
/** * Runs this tool./* w w w . j av a2s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + BigramRelativeFrequency.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequency.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequency.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); //job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:TestIndexMergeMR.java
License:Open Source License
public void testIndexMergeMR() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String indexdir = "indexdir"; String indexdir1 = "indexdir1"; int filenum = 10; int recnum = 1000; short idx = 0; TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true); StringBuffer sb = new StringBuffer(); FileStatus[] ss = fs.listStatus(new Path(indexdir)); for (FileStatus fileStatus : ss) { sb.append(fileStatus.getPath().toString()).append(","); }//from w w w.j a v a2s. com IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf); IFormatDataFile ifdf = new IFormatDataFile(conf); ifdf.open(indexdir1 + "/part-00000"); for (int i = 0; i < 100; i++) { ifdf.next().show(); } ifdf.close(); fs.delete(new Path(indexdir), true); fs.delete(new Path(indexdir1), true); }
From source file:TestColumnStorageOutputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException { try {/* www . j ava 2s . c o m*/ if (argv.length != 2) { System.out.println("TestColumnStorageOutputFormat <output> <count>"); System.exit(-1); } JobConf conf = new JobConf(TestColumnStorageOutputFormat.class); conf.setJobName("TestColumnStorageOutputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setOutputFormat(ColumnStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.output.dir", argv[0]); Head head = new Head(); initHead(head); head.toJobConf(conf); Path outputPath = new Path(argv[0]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = FileSystem.get(conf); MyColumnOutputFormat output = new MyColumnOutputFormat(head, conf, outputPath); long begin = System.currentTimeMillis(); int count = Integer.valueOf(argv[1]); String string = "hello konten"; for (int i = 0; i < count; i++) { Record record = new Record((short) 210); for (short j = 0; j < 30; j++) { record.addValue(new FieldValue((byte) 1, (short) (j * 7 + 0))); record.addValue(new FieldValue((short) 2, (short) (j * 7 + 1))); record.addValue(new FieldValue((int) 3, (short) (j * 7 + 2))); record.addValue(new FieldValue((long) 4, (short) (j * 7 + 3))); record.addValue(new FieldValue((float) 5.5, (short) (j * 7 + 4))); record.addValue(new FieldValue((double) 6.6, (short) (j * 7 + 5))); record.addValue(new FieldValue((double) 7.7, (short) (j * 7 + 6))); } output.doWrite(record); if (i % 100000 == 0) { long end = System.currentTimeMillis(); System.out.println(i + "record write, delay:" + (end - begin) / 1000 + "s"); } } long end = System.currentTimeMillis(); System.out.println(count + "record write over, delay:" + (end - begin) / 1000 + "s"); } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:CouchbaseExportStressTest.java
License:Apache License
public void createFile(int fileId) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("CouchbaseExportStressTest"); fs.mkdirs(dirPath);/* w w w . j a v a 2s. c o m*/ Path filePath = new Path(dirPath, "input-" + fileId); OutputStream os = fs.create(filePath); Writer w = new BufferedWriter(new OutputStreamWriter(os)); for (int i = 0; i < RECORDS_PER_FILE; i++) { long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId); w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n"); } w.close(); os.close(); }
From source file:CouchbaseExportStressTest.java
License:Apache License
/** Create a set of data files to export. */ public void createData() throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("CouchbaseExportStressTest"); if (fs.exists(dirPath)) { System.out.println("Export directory appears to already exist. Skipping data-gen."); return;//from w w w. j av a 2 s . c o m } for (int i = 0; i < NUM_FILES; i++) { createFile(i); } }
From source file:PerformanceEvaluation.java
License:Apache License
private Path writeInputFile(final Configuration c) throws IOException { FileSystem fs = FileSystem.get(c); if (!fs.exists(PERF_EVAL_DIR)) { fs.mkdirs(PERF_EVAL_DIR);/*from w ww .j a va 2 s . c om*/ } SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmmss"); Path subdir = new Path(PERF_EVAL_DIR, formatter.format(new Date())); fs.mkdirs(subdir); Path inputFile = new Path(subdir, "input.txt"); PrintStream out = new PrintStream(fs.create(inputFile)); // Make input random. Map<Integer, String> m = new TreeMap<Integer, String>(); Hash h = MurmurHash.getInstance(); int perClientRows = (this.R / this.N); try { for (int i = 0; i < 10; i++) { for (int j = 0; j < N; j++) { String s = "startRow=" + ((j * perClientRows) + (i * (perClientRows / 10))) + ", perClientRunRows=" + (perClientRows / 10) + ", totalRows=" + this.R + ", clients=" + this.N + ", flushCommits=" + this.flushCommits + ", writeToWAL=" + this.writeToWAL + ", scanCache=" + this.S; int hash = h.hash(Bytes.toBytes(s)); m.put(hash, s); } } for (Map.Entry<Integer, String> e : m.entrySet()) { out.println(e.getValue()); } } finally { out.close(); } return subdir; }
From source file:Eggshell.java
License:Open Source License
/** In charge of setting up and submitting the Hadoop job * The method receives the remaining command-line arguments * after first being processed by Hadoop. * * @param args Hadoop processed command-line arguments * @return Returns 0 for sucess *//*from ww w. ja v a 2 s .com*/ public int run(String[] args) throws Exception { String name = args[0]; String[] params = Arrays.copyOfRange(args, 1, args.length); Object[] arguments = Arrays.copyOf(params, params.length, Object[].class); script = new Script(); // start the Javascript interpreter script.putProperty("arguments", script.newArray(arguments)); EggGlobal.script = script; Egg.script = script; Egg.name = name; Egg.conf = getConf(); Scriptable global = script.newObject("EggGlobal", null); script.setGlobalScope(global); script.evalLibrary(); // load the Eggshell Javascript library script.evalFile(name); // load the javascript job file /* create a temporary directory in hdfs to hold the seralized functions */ FileSystem fs = FileSystem.get(getConf()); Path dir = new Path(SCRIPT_DIR); if (fs.exists(dir)) fs.delete(dir, true); fs.mkdirs(dir); /* call the 'eggshell' function */ Object o = script.getProperty("eggshell"); if (o instanceof Function) { Scriptable thisObj = script.newObject("Egg", null); Function f = (Function) o; o = script.callFunction(f, thisObj, params); script.exit(); /* return the result of the 'eggshell' function */ if (o instanceof NativeJavaObject) o = ((NativeJavaObject) o).unwrap(); if (o instanceof Boolean) return (Boolean) o ? 0 : 1; if (o instanceof Integer) return (Integer) o; if (o instanceof Double) return ((Double) o).intValue(); } return 0; }
From source file:LungDriver.java
License:Creative Commons License
private void cleanOutputPath(Configuration conf, String outputPath) { try {//from w w w .java 2s .c o m FileSystem fs = FileSystem.get(conf); Path output = new Path(outputPath); fs.delete(output, true); } catch (IOException e) { System.err.println("Failed to delete temporary path"); e.printStackTrace(); } System.out.println("[DONE]\n"); }
From source file:TestDistinct.java
License:Apache License
public void testDistinct() throws IOException { FileSystem fs = FileSystem.get(new Configuration()); fs.delete(new Path("/tmp/test_distinct_file"), true); fs.delete(new Path("/tmp/test_distinct_file_results"), true); FSDataOutputStream out = fs.create(new Path("/tmp/test_distinct_file")); PrintWriter pw = new PrintWriter(out); pw.println("distinct1"); pw.println("distinct2"); pw.println("distinct2"); pw.println("distinct3"); pw.println("distinct2"); pw.flush();/*from w w w .jav a2 s . co m*/ out.close(); Map<String, Tap> sources = new HashMap<String, Tap>(); Map<String, Tap> sinks = new HashMap<String, Tap>(); Tap inTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file"); Pipe inPipe = new Pipe("inPipe"); sources.put("inPipe", inTap); Distinct distinct = new Distinct(inPipe); Tap outTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file_results"); Pipe outPipe = new Pipe("outPipe", distinct); sinks.put("outPipe", outTap); Flow flow = new FlowConnector().connect(sources, sinks, inPipe, outPipe); flow.complete(); FSDataInputStream in = fs.open(new Path("/tmp/test_distinct_file_results/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); ArrayList<String> results = new ArrayList<String>(); results.add("distinct1"); results.add("distinct2"); results.add("distinct3"); try { while (true) { String s = reader.readLine(); if (s == null) { break; } assertEquals(results.remove(0), s); } } catch (Exception e) { fail("Got an exception while trying to verify the results: " + e.toString()); } assertEquals("All results must be consumed!", 0, results.size()); }
From source file:FindMaxPageRankNodes.java
License:Apache License
/** * Runs this tool./*w ww . j av a 2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(TOP)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(TOP)); LOG.info("Tool name: " + FindMaxPageRankNodes.class.getSimpleName()); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); LOG.info(" - top: " + n); Configuration conf = getConf(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setInt("n", n); Job job = Job.getInstance(conf); job.setJobName(FindMaxPageRankNodes.class.getName() + ":" + inputPath); job.setJarByClass(FindMaxPageRankNodes.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }