List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java
License:Open Source License
private static void readVertices(FileStatus status, ArrayList<MRVertex> vertices, Configuration conf) throws IOException { Path path = status.getPath(); if (path.getName().startsWith("part")) { System.out.println(path); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) vertices.add(new MRVertex(value, conf)); reader.close();/* w w w . java 2 s . c o m*/ } }
From source file:com.philiphubbard.digraph.MRCompressChainsTest.java
License:Open Source License
private static void cleanupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); ArrayList<MRVertex> vertices = new ArrayList<MRVertex>(); FileStatus[] files = fileSystem.listStatus(new Path(testOutput)); for (FileStatus status : files) { Path path = status.getPath(); if (path.getName().startsWith("part")) { System.out.println(path); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) vertices.add(new MRVertex(value, conf)); reader.close();// w w w. ja va2 s .com } } for (MRVertex vertex : vertices) System.out.println(vertex.toDisplayString()); fileSystem.delete(new Path(testInput), true); fileSystem.delete(new Path(testOutput), true); fileSystem.close(); }
From source file:com.philiphubbard.sabe.MRAssembler.java
License:Open Source License
private void readVertices(FileStatus status, ArrayList<MRMerVertex> vertices, Configuration conf) throws IOException { Path path = status.getPath(); if (path.getName().startsWith("part")) { SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) vertices.add(new MRMerVertex(value, conf)); reader.close();//from ww w. ja va2s .co m } }
From source file:com.scaleoutsoftware.soss.hserver.examples.SingleWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { throw new RuntimeException("Required args: wordToCount (string)"); }// w w w. j a v a 2 s . c o m // Create parameter argument to send to the reducers MapArguments mapArgs = new MapArguments(args[0]); // Create the invocation grid InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class) .addClass(IntSumReducer.class).addClass(MapArguments.class).load(); // Create a default configuration Configuration conf = new Configuration(); // Create the input map NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap", new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class)); // Create the output map NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap", new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class)); // Clear the input and output maps inputMap.clear(); outputMap.clear(); // Create a BulkPut object BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader(); IntWritable key = new IntWritable(); Text value = new Text(); // Build the input map from generated text Scanner scanner = new Scanner(SAMPLE_INPUT); for (int count = 0; scanner.hasNext(); count++) { value.set(scanner.next()); key.set(count); loader.put(key, value); } scanner.close(); // Close the bulk loader loader.close(); // Assign the invocation grid to the maps, so parallel operations can be performed inputMap.setInvocationGrid(grid); outputMap.setInvocationGrid(grid); // Create an hServer job HServerJob job = new HServerJob(conf, "word count", false, grid); job.setJarByClass(SingleWordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); // pass the map arguments object to the job job.setJobParameter(mapArgs); // Set named maps for the input and output formats NamedMapInputFormat.setNamedMap(job, inputMap); GridOutputFormat.setNamedMap(job, outputMap); // Execute the job IntWritable result = (IntWritable) job.runAndGetResult(); // Unload the invocation grid grid.unload(); // Output the single result and it's frequency. System.out.println("The word: " + args[0] + " was used - " + result + " times!"); }
From source file:com.scaleoutsoftware.soss.hserver.examples.WordCountParameterPassing.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new RuntimeException("Required args: wordMinLength wordMaxLength"); }/*from ww w . j av a 2 s . c o m*/ int minLength = Integer.parseInt(args[0]); int maxLength = Integer.parseInt(args[1]); // Create parameter argument to send to the reducers MapArguments mapArgs = new MapArguments(minLength, maxLength); // Create the invocation grid InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class) .addClass(IntSumReducer.class).addClass(MapArguments.class).load(); // Create a default configuration Configuration conf = new Configuration(); // Create the input map NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap", new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class)); // Create the output map NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap", new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class)); // Clear the input and output maps inputMap.clear(); outputMap.clear(); // Create a BulkPut object BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader(); IntWritable key = new IntWritable(); Text value = new Text(); // Build the input map from generated text Scanner scanner = new Scanner(SAMPLE_INPUT); for (int count = 0; scanner.hasNext(); count++) { value.set(scanner.next()); key.set(count); loader.put(key, value); } scanner.close(); // Close the bulk loader loader.close(); // Assign the invocation grid to the maps, so parallel operations can be performed inputMap.setInvocationGrid(grid); outputMap.setInvocationGrid(grid); // Create an hServer job HServerJob job = new HServerJob(conf, "word count", false, grid); job.setJarByClass(WordCountParameterPassing.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); // Pass the map arguments object to the job job.setJobParameter(mapArgs); // Set named maps for the input and output formats NamedMapInputFormat.setNamedMap(job, inputMap); GridOutputFormat.setNamedMap(job, outputMap); // Execute the job job.waitForCompletion(true); // Unload the invocation grid grid.unload(); // Output resulting words and their frequencies Iterable<Text> results = outputMap.keySet(); System.out.println("Following words were longer than " + mapArgs.minWordLength + " and shorter than " + mapArgs.maxWordLength + ":"); for (Text word : results) { System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times."); } }
From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopy.java
License:Apache License
public static void main(String argv[]) throws Exception { final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("map-i", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("map-o", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); inputMap.clear();/* w ww . ja v a2s. c o m*/ outputMap.clear(); Thread.sleep(10000); BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader(); String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; Text contentW = new Text(content); IntWritable count = new IntWritable(); for (int i = 0; i < 1000; i++) { count.set(i); put.put(count, contentW); } put.close(); InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis()) .addClass(Test_MapToMapCopy.class).load(); HServerJob job; Configuration configuration; for (int i = 0; i < 100; i++) { // MMF configuration = new Configuration(); configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 1); configuration.setInt("mapred.hserver.setting.namedmap.usememorymappedfiles", 1); configuration.setInt("mapred.hserver.setting.map.maxtempmemorykb", 100000); job = new HServerJob(configuration, "Sample job"); job.setGrid(grid); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); NamedMapInputFormat.setNamedMap(job, inputMap); NamedMapInputFormat.setSuggestedNumberOfSplits(job, 64); GridOutputFormat.setNamedMap(job, outputMap); job.waitForCompletion(false); assertEquals(inputMap.size(), outputMap.size()); outputMap.clear(); } grid.unload(); }
From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopyMapred.java
License:Apache License
public int run(String[] args) throws Exception { final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("mapr-i", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("mapr-o", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); inputMap.clear();// w ww. ja v a2 s . com outputMap.clear(); Thread.sleep(15000); BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader(); String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; Text contentW = new Text(content); IntWritable count = new IntWritable(); int expectedSize = 10000; for (int i = 0; i < expectedSize; i++) { count.set(i); put.put(count, contentW); } put.close(); InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis()) .addClass(Test_MapToMapCopyMapred.class).load(); JobConf configuration = new JobConf(getConf(), Test_MapToMapCopyMapred.class); configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 0); configuration.setMapOutputKeyClass(IntWritable.class); configuration.setMapOutputValueClass(Text.class); configuration.setOutputKeyClass(IntWritable.class); configuration.setOutputValueClass(Text.class); configuration.setInputFormat(NamedMapInputFormatMapred.class); configuration.setOutputFormat(NamedMapOutputFormatMapred.class); NamedMapInputFormatMapred.setNamedMap(configuration, inputMap); NamedMapOutputFormatMapred.setNamedMap(configuration, outputMap); assertEquals(inputMap.size(), outputMap.size() + expectedSize); // should be 0 + expected HServerJobClient.runJob(configuration, false, grid); assertEquals(inputMap.size(), outputMap.size()); inputMap.clear(); outputMap.clear(); grid.unload(); return 1; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Convert an in-memory representation of a matrix to a distributed MapDir * format. It then can be used in distributed jobs * //w ww .j a v a 2 s .c om * @param oriMatrix * @return path that will contain the matrix files * @throws Exception */ public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label) throws Exception { Configuration conf = new Configuration(); Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols()); FileSystem fs = FileSystem.get(outputDir.toUri(), conf); if (!fs.exists(outputDir)) { Path mapDir = new Path(outputDir, "matrix-k-0"); Path outputFile = new Path(mapDir, "data"); @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class, VectorWritable.class); VectorWritable vectorw = new VectorWritable(); IntWritable intw = new IntWritable(); try { for (int r = 0; r < origMatrix.numRows(); r++) { Vector vector = origMatrix.viewRow(r); vectorw.set(vector); intw.set(r); writer.append(intw, vectorw); } } finally { writer.close(); } MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf); } else { log.warn("----------- Skip matrix " + outputDir + " - already exists"); } DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(), origMatrix.numCols()); dMatrix.setConf(conf); return dMatrix; }
From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java
License:Apache License
private static void printSequenceFile(String inputStr, int printRow) throws IOException { Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr); SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); Vector printVector = null;/* w ww . j a v a 2 s. c om*/ while (reader.next(key, value)) { if (key.get() == printRow) printVector = value.get(); int cnt = 0; Iterator<Element> iter = value.get().nonZeroes().iterator(); for (; iter.hasNext(); iter.next()) cnt++; sum += value.get().zSum(); System.out.println("# " + key + " " + cnt + " " + value.get().zSum()); } System.out.println("SUM " + sum); reader.close(); if (printVector != null) System.out.println("##### " + printRow + " " + printVector); else System.out.println("##### " + key + " " + value.get()); }
From source file:com.twitter.algebra.nmf.NMFCommon.java
License:Apache License
public static HashMap<Long, Integer> readHashMap(String inputStr) throws IOException { HashMap<Long, Integer> hashMap = new HashMap<Long, Integer>(); Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr + "/part-r-00000"); @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; LongWritable key = new LongWritable(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { hashMap.put(key.get(), value.get()); }//from ww w . j a va 2 s . c o m System.out.println("SUM " + sum); reader.close(); return hashMap; }