List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:CouchbaseExportStressTest.java
License:Apache License
public void createFile(int fileId) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("CouchbaseExportStressTest"); fs.mkdirs(dirPath);/*from ww w. j a va2 s .com*/ Path filePath = new Path(dirPath, "input-" + fileId); OutputStream os = fs.create(filePath); Writer w = new BufferedWriter(new OutputStreamWriter(os)); for (int i = 0; i < RECORDS_PER_FILE; i++) { long v = (long) i + ((long) RECORDS_PER_FILE * (long) fileId); w.write("" + v + "," + ALPHABET + ALPHABET + ALPHABET + ALPHABET + "\n"); } w.close(); os.close(); }
From source file:CouchbaseExportStressTest.java
License:Apache License
/** Create a set of data files to export. */ public void createData() throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path dirPath = new Path("CouchbaseExportStressTest"); if (fs.exists(dirPath)) { System.out.println("Export directory appears to already exist. Skipping data-gen."); return;// ww w . j a va2s .com } for (int i = 0; i < NUM_FILES; i++) { createFile(i); } }
From source file:BMTColumnLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), BMTColumnLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setJobName("BMTColumnLoader"); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);// w w w .j a va 2 s. co m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]); } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:Eggshell.java
License:Open Source License
/** In charge of setting up and submitting the Hadoop job * The method receives the remaining command-line arguments * after first being processed by Hadoop. * * @param args Hadoop processed command-line arguments * @return Returns 0 for sucess *//*from w w w . java2s. c o m*/ public int run(String[] args) throws Exception { String name = args[0]; String[] params = Arrays.copyOfRange(args, 1, args.length); Object[] arguments = Arrays.copyOf(params, params.length, Object[].class); script = new Script(); // start the Javascript interpreter script.putProperty("arguments", script.newArray(arguments)); EggGlobal.script = script; Egg.script = script; Egg.name = name; Egg.conf = getConf(); Scriptable global = script.newObject("EggGlobal", null); script.setGlobalScope(global); script.evalLibrary(); // load the Eggshell Javascript library script.evalFile(name); // load the javascript job file /* create a temporary directory in hdfs to hold the seralized functions */ FileSystem fs = FileSystem.get(getConf()); Path dir = new Path(SCRIPT_DIR); if (fs.exists(dir)) fs.delete(dir, true); fs.mkdirs(dir); /* call the 'eggshell' function */ Object o = script.getProperty("eggshell"); if (o instanceof Function) { Scriptable thisObj = script.newObject("Egg", null); Function f = (Function) o; o = script.callFunction(f, thisObj, params); script.exit(); /* return the result of the 'eggshell' function */ if (o instanceof NativeJavaObject) o = ((NativeJavaObject) o).unwrap(); if (o instanceof Boolean) return (Boolean) o ? 0 : 1; if (o instanceof Integer) return (Integer) o; if (o instanceof Double) return ((Double) o).intValue(); } return 0; }
From source file:LungDriver.java
License:Creative Commons License
private void cleanOutputPath(Configuration conf, String outputPath) { try {//from w w w .j av a 2 s . c o m FileSystem fs = FileSystem.get(conf); Path output = new Path(outputPath); fs.delete(output, true); } catch (IOException e) { System.err.println("Failed to delete temporary path"); e.printStackTrace(); } System.out.println("[DONE]\n"); }
From source file:DFSBenchmark.java
License:Apache License
public static void main(String[] args) throws Exception { Preconditions.checkArgument(args.length == 2, "Please provide the threads and base directory path details"); DFSBenchmark benchmark = new DFSBenchmark((Integer.parseInt(args[0])), new Path(args[1])); benchmark.runBenchmark();//from w w w. j a v a2 s . c o m }
From source file:TestDistinct.java
License:Apache License
public void testDistinct() throws IOException { FileSystem fs = FileSystem.get(new Configuration()); fs.delete(new Path("/tmp/test_distinct_file"), true); fs.delete(new Path("/tmp/test_distinct_file_results"), true); FSDataOutputStream out = fs.create(new Path("/tmp/test_distinct_file")); PrintWriter pw = new PrintWriter(out); pw.println("distinct1"); pw.println("distinct2"); pw.println("distinct2"); pw.println("distinct3"); pw.println("distinct2"); pw.flush();/*w w w . j a va 2s. c o m*/ out.close(); Map<String, Tap> sources = new HashMap<String, Tap>(); Map<String, Tap> sinks = new HashMap<String, Tap>(); Tap inTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file"); Pipe inPipe = new Pipe("inPipe"); sources.put("inPipe", inTap); Distinct distinct = new Distinct(inPipe); Tap outTap = new Hfs(new TextLine(new Fields("line")), "/tmp/test_distinct_file_results"); Pipe outPipe = new Pipe("outPipe", distinct); sinks.put("outPipe", outTap); Flow flow = new FlowConnector().connect(sources, sinks, inPipe, outPipe); flow.complete(); FSDataInputStream in = fs.open(new Path("/tmp/test_distinct_file_results/part-00000")); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); ArrayList<String> results = new ArrayList<String>(); results.add("distinct1"); results.add("distinct2"); results.add("distinct3"); try { while (true) { String s = reader.readLine(); if (s == null) { break; } assertEquals(results.remove(0), s); } } catch (Exception e) { fail("Got an exception while trying to verify the results: " + e.toString()); } assertEquals("All results must be consumed!", 0, results.size()); }
From source file:DateExample_Month.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/* w w w.ja va2 s . c om*/ } Job job = new Job(conf, "word count fs"); job.setJarByClass(DateExample_Month.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(IsValidKeyFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start// ww w . j a v a 2 s .c om final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); //Change partitioner here ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik Super Column Support ? // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:TestPerformance.java
License:Open Source License
private static void genfdf(String filedir, int filenum, int recordnum) throws Exception { int size = 0; FileStatus[] fss = fs.listStatus(new Path(filedir)); if (fss != null) { size += fss.length;/* ww w . jav a2 s .c o m*/ } for (int i = 0; i < filenum; i++) { System.err.println("generate a fdf"); FormatDataFile fdf = createfdf(filedir, false, (short) -1); for (int j = 0; j < recordnum; j++) { Record record = new Record((short) 6); record.addValue(new FieldValue((byte) r.nextInt(), (short) 0)); record.addValue(new FieldValue((short) r.nextInt(), (short) 1)); record.addValue(new FieldValue(r.nextInt(), (short) 2)); record.addValue(new FieldValue(r.nextLong(), (short) 3)); record.addValue(new FieldValue(r.nextFloat() * 10000, (short) 4)); record.addValue(new FieldValue(r.nextDouble() * 100000000, (short) 5)); if ((j + 1) % 1000000 == 0) System.err.println((j + 1) + "records written"); fdf.addRecord(record); } fdf.close(); } }