List of usage examples for org.apache.hadoop.fs Path Path
public Path(URI aUri)
From source file:Inlinks.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: inlinks <in> [<in>...] <out>"); System.exit(2);/* w w w. ja v a 2 s . co m*/ } Job job = new Job(conf, "inlinks"); job.setJarByClass(Inlinks.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IdentityReducer.class); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(10); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start//w ww .j a v a2s . c om final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); //System.out.println("tessssssaaat"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:AggregatedLogsPurger.java
License:Apache License
@Override public int run(String[] args) throws Exception { this.conf = getConf(); this.deleteOlderThanDays = getConf().getInt("deleteOlderThan", 0); Preconditions.checkArgument(deleteOlderThanDays > 1, "Usage: yarn jar " + "./target/yarn-logs-purger-1.0-SNAPSHOT.jar -DdeleteOlderThan=300 " + "-DdeleteFiles=true. Please provide valid argument for deleteOlderThanDays. It has to " + "be > 0"); this.shouldDelete = getConf().getBoolean("deleteFiles", false); this.suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf); this.rootLogDir = new Path( conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); return (purge()) ? 0 : -1; }
From source file:TopFiveAverageMoviesRatedByFemales.java
public static void main(String[] args) throws Exception { JobConf conf1 = new JobConf(); Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class); job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job1.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job1, new Path(args[3])); boolean flag = job1.waitForCompletion(true); boolean flag1 = false; boolean flag2 = false; if (flag) {// w w w . j av a2s.c o m JobConf conf2 = new JobConf(); Job job2 = new Job(conf2, "AverageCalculation"); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class); job2.setMapperClass(MapAverage.class); job2.setReducerClass(ReduceAverage.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job2.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job2, new Path(args[3])); FileOutputFormat.setOutputPath(job2, new Path(args[4])); flag1 = job2.waitForCompletion(true); } if (flag1) { JobConf conf3 = new JobConf(); Job job3 = new Job(conf3, "AverageCalculation"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]), TextInputFormat.class, MapAverageTop5.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]), TextInputFormat.class, MapMovieName.class); //job3.setMapperClass(MapAverageTop5.class); job3.setReducerClass(ReduceAverageTop5.class); job3.setMapOutputKeyClass(Text.class); job3.setMapOutputValueClass(Text.class); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job3.setOutputFormatClass(TextOutputFormat.class); //FileInputFormat.addInputPath(job3, new Path(args[4])); FileOutputFormat.setOutputPath(job3, new Path(args[5])); flag2 = job3.waitForCompletion(true); } }
From source file:GenIndex.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*w w w . j a v a2s. co m*/ } String tmpPath = "/local_scratch/wordcount/tmp"; String stopWord = "/local_scratch/wordcount/stopword"; // Job to count the words Job count_job = new Job(conf, "word count"); count_job.setJarByClass(GenIndex.class); count_job.setMapperClass(Mapper1_Count.class); count_job.setCombinerClass(Reducer1_Count.class); count_job.setReducerClass(Reducer1_Count.class); count_job.setOutputKeyClass(Text.class); count_job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(count_job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(count_job, new Path(tmpPath)); count_job.waitForCompletion(true); Job sort_job = new Job(conf, "word sort"); sort_job.setJarByClass(GenIndex.class); sort_job.setMapperClass(Mapper2_Sort.class); sort_job.setCombinerClass(Reducer2_Sort.class); sort_job.setReducerClass(Reducer2_Sort.class); sort_job.setSortComparatorClass(SortReducerByValuesKeyComparator.class); sort_job.setOutputKeyClass(IntWritable.class); sort_job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(sort_job, new Path(tmpPath)); FileOutputFormat.setOutputPath(sort_job, new Path(stopWord)); sort_job.waitForCompletion(true); // job to generate the index Job index_job = new Job(conf, "word index"); index_job.setJarByClass(GenIndex.class); index_job.setMapperClass(Mapper3_index.class); index_job.setCombinerClass(Reducer3_index.class); index_job.setReducerClass(Reducer3_index.class); index_job.setOutputKeyClass(Text.class); index_job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(index_job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(index_job, new Path(otherArgs[1])); index_job.waitForCompletion(true); System.exit(0); }
From source file:SingleFileReader.java
License:Apache License
private void writeFile(String cached, String fSize) throws Exception { fileSize = Double.parseDouble((fSize.split("g|G"))[0]) * 1024 * 1024 * 1024; String hdfsFolder = "/hdfs_test/"; String hdfsFile = hdfsFolder + fSize; short replication = 1; boolean overWrite = true; int blockSize = 536870912; double numIters = fileSize / (double) bufferSize; Configuration conf = getConf(); fs = FileSystem.get(conf);/* w ww . ja v a 2s . c o m*/ hdfsFilePath = new Path(hdfsFile); OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize); /* Initialize byte buffer */ ByteBuffer buf = ByteBuffer.allocate(bufferSize); buf.order(ByteOrder.nativeOrder()); for (int k = 0; k < bufferSize / Integer.SIZE; k++) { buf.putInt(k); } buf.flip(); /* Write the content of the byte buffer to the HDFS file*/ t = new Timer(); t.start(0); for (long i = 0; i < numIters; i++) { os.write(buf.array()); buf.flip(); } t.end(0); os.close(); /* Check to see if the file needs to be cached */ t.start(1); if (cached.equals("cache")) { String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; // System.out.println(cmdStr); Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOutBuf = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOutBuf.append(cmdOutLine + "\n"); } // System.out.println(cmdOutBuf.toString()); } t.end(1); }
From source file:BMTKeyValueLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), CassandraTableLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("BMTKeyValueLoader"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]);//w w w. j a va 2s .c o m } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:RawParascaleFileSystem.java
License:Apache License
/** * {@inheritDoc}//from ww w. ja v a 2s. com */ @Override public Path getHomeDirectory() { return new Path(getRawHomeDirectory()).makeQualified(this.getUri(), this.getWorkingDirectory()); }
From source file:HoopRemoteTask.java
License:Open Source License
/** * *//*from w w w . j a v a2s .c om*/ public static int countTerms(Configuration conf) { dbg("postProcess ()"); int count = 0; String output = conf.get("mapred.output.dir"); if (output != null) { if (output.isEmpty() == true) output = HoopLink.outputpath; } else output = HoopLink.outputpath; Path inFile = new Path(output + "/part-r-00000"); FSDataInputStream in = null; @SuppressWarnings("unused") String thisLine = null; try { in = HoopRemoteTask.hdfs.open(inFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); while ((thisLine = reader.readLine()) != null) { count++; } in.close(); } catch (IOException e) { e.printStackTrace(); dbg("Error opening file in HDFS"); } return (count); }
From source file:HoopRemoteTask.java
License:Open Source License
/** * */// w ww. j a v a 2s.c o m public static void postProcess(Configuration conf) { dbg("postProcess ()"); if (HoopLink.nrshards == 1) { dbg("Only 1 shard needed, skipping post processing"); return; } if (HoopLink.shardcreate.equals("mos") == true) { dbg("We shouldn't be pos-processing since the HoopShardedOutputFormat class already did this"); return; } if (HoopLink.shardcreate.equals("hdfs") == true) { dbg("Starting shard post-process task ..."); int termCount = countTerms(conf); String output = conf.get("mapred.output.dir"); if (output != null) { if (output.isEmpty() == true) output = HoopLink.outputpath; } else output = HoopLink.outputpath; dbg("Post processing " + termCount + " items in: " + output); Path inFile = new Path(output + "/part-r-00000"); Path outFile = null; FSDataInputStream in = null; FSDataOutputStream out = null; try { in = HoopRemoteTask.hdfs.open(inFile); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String thisLine; int count = 0; int split = Math.round(termCount / HoopLink.nrshards); int partition = 0; outFile = new Path(output + "/partition-" + partition + "-00000.txt"); out = HoopRemoteTask.hdfs.create(outFile); if (out != null) { while ((thisLine = reader.readLine()) != null) { StringBuffer formatted = new StringBuffer(); formatted.append(thisLine); formatted.append("\n"); count++; if (count > split) { out.close(); partition++; outFile = new Path(output + "/partition-" + partition + "-00000.txt"); out = HoopRemoteTask.hdfs.create(outFile); split++; count = 0; } byte[] utf8Bytes = formatted.toString().getBytes("UTF8"); // We get an additional 0 because of Java string encoding. leave it out! out.write(utf8Bytes); } if (in != null) in.close(); if (out != null) out.close(); } else dbg("Error: unable to open output file"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } dbg("Starting rudimentary sharding into " + HoopLink.nrshards); if (in != null) { try { in.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } HoopStatistics stats = new HoopStatistics(); String results = stats.printStatistics(null); dbg(results); }