List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java
License:Apache License
private void createFile(File dir, String fileName, int key, int count) throws IOException { File file = new File(dir, fileName); Writer writer = SequenceFile.createWriter(FileSystem.get(job), job, new Path(file.getAbsolutePath()), Text.class, IntWritable.class); Text text = new Text(Integer.toString(key)); IntWritable value = new IntWritable(); for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) { value.set(j); writer.append(text, value);/* w ww.j a v a2s .co m*/ } writer.close(); }
From source file:com.ibm.bi.dml.runtime.matrix.sort.ReadWithZeros.java
License:Open Source License
public void readNextKeyValuePairs(DoubleWritable readKey, IntWritable readValue) throws IOException { try {/* www .j a v a 2s .co m*/ if (contain0s && justFound0) { readKey.set(keyAfterZero.get()); readValue.set(valueAfterZero.get()); contain0s = false; } else { readKey.readFields(currentStream); readValue.readFields(currentStream); } } catch (EOFException e) { // case in which zero is the maximum value in the matrix. // The zero value from the last entry is not present in the input sorted matrix, but needs to be accounted for. if (contain0s && !justFound0) { justFound0 = true; readKey.set(0); readValue.set((int) numZeros); } else { throw e; } } if (contain0s && !justFound0 && readKey.get() >= 0) { justFound0 = true; keyAfterZero.set(readKey.get()); valueAfterZero.set(readValue.get()); readKey.set(0); readValue.set((int) numZeros); } }
From source file:com.jeffy.fbds.SequenceFileWriter.java
License:Apache License
public static void main(String[] args) throws IOException { // ?// w w w . j a va 2 s . co m String uri = args[0]; Configuration conf = new Configuration(); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()))) { for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } }
From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java
License:Apache License
public IntWritable readInt(IntWritable iw) throws IOException { if (iw == null) { iw = new IntWritable(); }/* w w w.j a v a2s. co m*/ int val = in.readInt(); if (val == RType.NA_INTEGER) { return null; } iw.set(val); return iw; }
From source file:com.m6d.filecrush.crush.Crush.java
License:Apache License
void writeDirs() throws IOException { print(Verbosity.INFO, "\nUsing temporary directory " + tmpDir.toUri().getPath() + "\n"); FileStatus status = fs.getFileStatus(srcDir); Path tmpIn = new Path(tmpDir, "in"); bucketFiles = new Path(tmpIn, "dirs"); partitionMap = new Path(tmpIn, "partition-map"); counters = new Path(tmpIn, "counters"); skippedFiles = new HashSet<String>(); removableFiles = new HashSet<String>(); /*/*from w ww.j a va 2 s . co m*/ * Prefer the path returned by the status because it is always fully qualified. */ List<Path> dirs = asList(status.getPath()); Text key = new Text(); Text value = new Text(); Bucketer partitionBucketer = new Bucketer(maxTasks, 0, false); partitionBucketer.reset("partition-map"); jobCounters = new Counters(); int fileCount = 0; //Path bucketFile = new Path(tmpIn, "dirs_" + fileCount++); Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class, CompressionType.BLOCK); try { while (!dirs.isEmpty()) { List<Path> nextLevel = new LinkedList<Path>(); for (Path dir : dirs) { String dirPath = dir.toUri().getPath(); print(Verbosity.INFO, "\n\n[" + dirPath + "]"); jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); FileStatus[] contents = fs.listStatus(dir, new PathFilter() { @Override public boolean accept(Path testPath) { if (ignoredFilesMatcher == null) return true; ignoredFilesMatcher.reset(testPath.toUri().getPath()); boolean ignores = ignoredFilesMatcher.matches(); if (ignores) LOG.info("Ignoring file " + testPath); return !ignores; } }); if (contents == null || contents.length == 0) { print(Verbosity.INFO, "\n Directory is empty"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length); Set<String> uncrushedFiles = new HashSet<String>(contents.length); long crushableBytes = 0; /* * Queue sub directories for subsequent inspection and examine the files in this directory. */ for (FileStatus content : contents) { Path path = content.getPath(); if (content.isDir()) { nextLevel.add(path); } else { String filePath = path.toUri().getPath(); boolean skipFile = false; if (skippedFilesMatcher != null) { skippedFilesMatcher.reset(filePath); if (skippedFilesMatcher.matches()) { skipFile = true; } } boolean changed = uncrushedFiles.add(filePath); assert changed : path.toUri().getPath(); long fileLength = content.getLen(); if (!skipFile && fileLength <= maxEligibleSize) { if (removeEmptyFiles && fileLength == 0) removableFiles.add(filePath); else { crushables.add(content); crushableBytes += fileLength; } } } } /* * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the * number of files we found. */ if (!uncrushedFiles.isEmpty()) { if (-1 == findMatcher(dir)) { throw new IllegalArgumentException( "Could not find matching regex for directory: " + dir); } jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size()); } if (0 == crushableBytes) { print(Verbosity.INFO, "\n Directory has no crushable files"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { /* * We found files to consider for crushing. */ long nBlocks = crushableBytes / dfsBlockSize; if (nBlocks * dfsBlockSize != crushableBytes) { nBlocks++; } /* * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory. */ long dirBuckets = nBlocks / maxFileBlocks; if (dirBuckets * maxFileBlocks != nBlocks) { dirBuckets++; } if (dirBuckets > Integer.MAX_VALUE) { throw new AssertionError("Too many buckets: " + dirBuckets); } Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs); directoryBucketer.reset(getPathPart(dir)); for (FileStatus file : crushables) { directoryBucketer.add(new FileStatusHasSize(file)); } List<Bucket> crushFiles = directoryBucketer.createBuckets(); if (crushFiles.isEmpty()) { jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); print(Verbosity.INFO, "\n Directory skipped"); } else { nBuckets += crushFiles.size(); jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); print(Verbosity.INFO, "\n Generating " + crushFiles.size() + " output files"); /* * Write out the mapping between a bucket and a file. */ for (Bucket crushFile : crushFiles) { String bucketId = crushFile.name(); List<String> filesInBucket = crushFile.contents(); print(Verbosity.INFO, format("\n Output %s will include %,d input bytes from %,d files", bucketId, crushFile.size(), filesInBucket.size())); key.set(bucketId); for (String f : filesInBucket) { boolean changed = uncrushedFiles.remove(f); assert changed : f; pathMatcher.reset(f); pathMatcher.matches(); value.set(pathMatcher.group(5)); /* * Write one row per file to maximize the number of mappers */ writer.append(key, value); /* * Print the input file with four leading spaces. */ print(Verbosity.VERBOSE, "\n " + f); } jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, filesInBucket.size()); partitionBucketer.add(crushFile); } } } if (!removableFiles.isEmpty()) { print(Verbosity.INFO, "\n Marked " + removableFiles.size() + " files for removal"); for (String removable : removableFiles) { uncrushedFiles.remove(removable); print(Verbosity.VERBOSE, "\n " + removable); } jobCounters.incrCounter(MapperCounter.FILES_REMOVED, removableFiles.size()); } if (!uncrushedFiles.isEmpty()) { print(Verbosity.INFO, "\n Skipped " + uncrushedFiles.size() + " files"); for (String uncrushed : uncrushedFiles) { print(Verbosity.VERBOSE, "\n " + uncrushed); } jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size()); } skippedFiles.addAll(uncrushedFiles); } } dirs = nextLevel; } } finally { writer.close(); } /* * Now that we have processed all the directories, write the partition map. */ List<Bucket> partitions = partitionBucketer.createBuckets(); assert partitions.size() <= maxTasks; writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); IntWritable partNum = new IntWritable(); int totalReducers = 0; for (Bucket partition : partitions) { String partitionName = partition.name(); int p = Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1)); partNum.set(p); if (partition.contents().size() > 0) totalReducers++; for (String bucketId : partition.contents()) { key.set(bucketId); writer.append(key, partNum); } } writer.close(); print(Verbosity.INFO, "\n\nNumber of allocated reducers = " + totalReducers); job.setInt("mapreduce.job.reduces", totalReducers); DataOutputStream countersStream = fs.create(this.counters); jobCounters.write(countersStream); countersStream.close(); }
From source file:com.microsoft.canberra.tf.util.DoubleMatrixRecordReader.java
License:Open Source License
@Override public boolean next(final IntWritable rowId, final DoubleMatrix matrixRow) throws IOException { if (!this.textRecordReader.next(this.offset, this.text)) { return false; }/*from ww w. j a v a 2 s. c o m*/ LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { this.offset, this.text }); final String[] fields = this.text.toString().split("\\s+"); if (fields.length <= 1) { return false; } rowId.set(Integer.parseInt(fields[0])); matrixRow.resize(fields.length - 1, 1); for (int i = 1; i < fields.length; ++i) { matrixRow.put(i - 1, Double.parseDouble(fields[i])); } return true; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public void putNext(Tuple t) throws IOException { IntWritable outputKey = new IntWritable(); VectorWritable outputValue = new VectorWritable(); outputKey.set((Integer) t.get(0)); Tuple currRow = (Tuple) t.get(1);/*from w ww. j ava2 s .co m*/ Vector currRowVector; if (dimensions == 0) { throw new IllegalArgumentException("Trying to create 0 dimension vector"); } if (STORE_AS_DENSE) { currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString()); } else if (STORE_AS_SEQUENTIAL) { currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } else { currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } for (int ii = 0; ii < currRow.size(); ii++) { Object o = currRow.get(ii); switch (currRow.getType(ii)) { case DataType.INTEGER: case DataType.LONG: case DataType.FLOAT: case DataType.DOUBLE: currRowVector.set(ii, (Double) o); break; case DataType.TUPLE: // If this is a tuple then we want to set column and element Tuple subt = (Tuple) o; currRowVector.set((Integer) subt.get(0), (Double) subt.get(1)); break; default: throw new RuntimeException("Unexpected tuple form"); } } outputValue.set(currRowVector); try { writer.write(outputKey, outputValue); } catch (InterruptedException e) { LOG.error("Interrupted while writing", e); } }
From source file:com.scaleoutsoftware.soss.hserver.examples.SingleWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { throw new RuntimeException("Required args: wordToCount (string)"); }// ww w .ja va 2s . com // Create parameter argument to send to the reducers MapArguments mapArgs = new MapArguments(args[0]); // Create the invocation grid InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class) .addClass(IntSumReducer.class).addClass(MapArguments.class).load(); // Create a default configuration Configuration conf = new Configuration(); // Create the input map NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap", new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class)); // Create the output map NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap", new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class)); // Clear the input and output maps inputMap.clear(); outputMap.clear(); // Create a BulkPut object BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader(); IntWritable key = new IntWritable(); Text value = new Text(); // Build the input map from generated text Scanner scanner = new Scanner(SAMPLE_INPUT); for (int count = 0; scanner.hasNext(); count++) { value.set(scanner.next()); key.set(count); loader.put(key, value); } scanner.close(); // Close the bulk loader loader.close(); // Assign the invocation grid to the maps, so parallel operations can be performed inputMap.setInvocationGrid(grid); outputMap.setInvocationGrid(grid); // Create an hServer job HServerJob job = new HServerJob(conf, "word count", false, grid); job.setJarByClass(SingleWordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); // pass the map arguments object to the job job.setJobParameter(mapArgs); // Set named maps for the input and output formats NamedMapInputFormat.setNamedMap(job, inputMap); GridOutputFormat.setNamedMap(job, outputMap); // Execute the job IntWritable result = (IntWritable) job.runAndGetResult(); // Unload the invocation grid grid.unload(); // Output the single result and it's frequency. System.out.println("The word: " + args[0] + " was used - " + result + " times!"); }
From source file:com.scaleoutsoftware.soss.hserver.examples.WordCountParameterPassing.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 2) { throw new RuntimeException("Required args: wordMinLength wordMaxLength"); }/*from w ww .j av a 2 s .c o m*/ int minLength = Integer.parseInt(args[0]); int maxLength = Integer.parseInt(args[1]); // Create parameter argument to send to the reducers MapArguments mapArgs = new MapArguments(minLength, maxLength); // Create the invocation grid InvocationGrid grid = HServerJob.getInvocationGridBuilder("WordCountIG").addClass(TokenizerMapper.class) .addClass(IntSumReducer.class).addClass(MapArguments.class).load(); // Create a default configuration Configuration conf = new Configuration(); // Create the input map NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("InputMap", new WritableSerializer<IntWritable>(IntWritable.class), new WritableSerializer<Text>(Text.class)); // Create the output map NamedMap<Text, IntWritable> outputMap = NamedMapFactory.getMap("OutputMap", new WritableSerializer<Text>(Text.class), new WritableSerializer<IntWritable>(IntWritable.class)); // Clear the input and output maps inputMap.clear(); outputMap.clear(); // Create a BulkPut object BulkLoader<IntWritable, Text> loader = inputMap.getBulkLoader(); IntWritable key = new IntWritable(); Text value = new Text(); // Build the input map from generated text Scanner scanner = new Scanner(SAMPLE_INPUT); for (int count = 0; scanner.hasNext(); count++) { value.set(scanner.next()); key.set(count); loader.put(key, value); } scanner.close(); // Close the bulk loader loader.close(); // Assign the invocation grid to the maps, so parallel operations can be performed inputMap.setInvocationGrid(grid); outputMap.setInvocationGrid(grid); // Create an hServer job HServerJob job = new HServerJob(conf, "word count", false, grid); job.setJarByClass(WordCountParameterPassing.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); // Pass the map arguments object to the job job.setJobParameter(mapArgs); // Set named maps for the input and output formats NamedMapInputFormat.setNamedMap(job, inputMap); GridOutputFormat.setNamedMap(job, outputMap); // Execute the job job.waitForCompletion(true); // Unload the invocation grid grid.unload(); // Output resulting words and their frequencies Iterable<Text> results = outputMap.keySet(); System.out.println("Following words were longer than " + mapArgs.minWordLength + " and shorter than " + mapArgs.maxWordLength + ":"); for (Text word : results) { System.out.println("\"" + word.toString() + "\" was used " + outputMap.get(word) + " times."); } }
From source file:com.scaleoutsoftware.soss.hserver.Test_MapToMapCopy.java
License:Apache License
public static void main(String argv[]) throws Exception { final NamedMap<IntWritable, Text> inputMap = NamedMapFactory.getMap("map-i", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); final NamedMap<IntWritable, Text> outputMap = NamedMapFactory.getMap("map-o", new WritableSerializer(IntWritable.class), new WritableSerializer(Text.class)); inputMap.clear();/*from w w w . j a v a 2 s . c om*/ outputMap.clear(); Thread.sleep(10000); BulkLoader<IntWritable, Text> put = inputMap.getBulkLoader(); String content = "xcccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; Text contentW = new Text(content); IntWritable count = new IntWritable(); for (int i = 0; i < 1000; i++) { count.set(i); put.put(count, contentW); } put.close(); InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis()) .addClass(Test_MapToMapCopy.class).load(); HServerJob job; Configuration configuration; for (int i = 0; i < 100; i++) { // MMF configuration = new Configuration(); configuration.setInt("mapred.hserver.setting.reducer.usememorymappedfiles", 1); configuration.setInt("mapred.hserver.setting.namedmap.usememorymappedfiles", 1); configuration.setInt("mapred.hserver.setting.map.maxtempmemorykb", 100000); job = new HServerJob(configuration, "Sample job"); job.setGrid(grid); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); NamedMapInputFormat.setNamedMap(job, inputMap); NamedMapInputFormat.setSuggestedNumberOfSplits(job, 64); GridOutputFormat.setNamedMap(job, outputMap); job.waitForCompletion(false); assertEquals(inputMap.size(), outputMap.size()); outputMap.clear(); } grid.unload(); }