List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:com.github.ygf.pagerank.PageRankTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("pagerank.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); float[] pageRanks = new float[topN.size()]; String[] titles = new String[topN.size()]; // The order of the entries is reversed. The priority queue is in // non-decreasing order and we want the highest PageRank first. for (int i = pageRanks.length - 1; i >= 0; i--) { Map.Entry<Float, Integer> entry = topN.poll(); // Get the title of the page from the title index. page.set(entry.getValue());//from w w w. ja v a 2s . co m MapFileOutputFormat.getEntry(readers, partitioner, page, title); pageRanks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < pageRanks.length; i++) { context.write(new FloatWritable(pageRanks[i]), new Text(titles[i])); } }
From source file:com.gotometrics.orderly.example.IntExample.java
License:Apache License
public void serializationExamples() throws Exception { IntWritableRowKey i = new IntWritableRowKey(); IntWritable w = new IntWritable(); ImmutableBytesWritable buffer = new ImmutableBytesWritable(); byte[] b;/*ww w. j a v a2 s . com*/ /* Serialize and deserialize into an immutablebyteswritable */ w.set(-93214); b = new byte[i.getSerializedLength(w)]; buffer.set(b); i.serialize(w, buffer); buffer.set(b, 0, b.length); System.out.println("deserialize(serialize(-93214)) = " + ((IntWritable) i.deserialize(buffer)).get()); /* Serialize and deserialize into a byte array (descending sort, * with two reserved bits set to 0x3) */ i.setReservedBits(2).setReservedValue(0x3).setOrder(Order.DESCENDING); w.set(0); System.out.println("deserialize(serialize(0)) = " + ((IntWritable) i.deserialize(i.serialize(w))).get()); /* Serialize and deserialize NULL into a byte array */ System.out.println("deserialize(serialize(NULL)) = " + i.deserialize(i.serialize(null))); }
From source file:com.gotometrics.orderly.FixedIntegerRowKey.java
License:Apache License
protected Object toIntWritable(Object o) { if (o == null || o instanceof IntWritable) return o; if (iw == null) iw = new IntWritable(); iw.set((Integer) o);/* w w w . j a va 2 s . c o m*/ return iw; }
From source file:com.gotometrics.orderly.FixedIntWritableRowKey.java
License:Apache License
@Override public Object deserialize(ImmutableBytesWritable w) throws IOException { int offset = w.getOffset(); byte[] s = w.get(); int i = Bytes.toInt(s, offset) ^ Integer.MIN_VALUE ^ order.mask(); RowKeyUtils.seek(w, Bytes.SIZEOF_INT); if (iw == null) iw = new IntWritable(); iw.set(i);/* w ww .ja v a 2s. c o m*/ return iw; }
From source file:com.gotometrics.orderly.IntWritableRowKey.java
License:Apache License
@Override
Writable createWritable() {
return new IntWritable();
}
From source file:com.gsvic.csmr.io.InputData.java
License:Apache License
/** * Reads the Document-Frequency file/*from w w w.j ava 2 s. co m*/ * @param conf * @param dfFile * @return Returns the Document-Frequency data in a HashMap * @throws IOException */ public static HashMap<IntWritable, LongWritable> readDf(Configuration conf, Path dfFile) throws IOException { FileSystem filesystem = FileSystem.get(conf); SequenceFile.Reader reader; reader = new SequenceFile.Reader(filesystem, dfFile, conf); HashMap<IntWritable, LongWritable> dcf = new HashMap<>(); IntWritable key = new IntWritable(); LongWritable value = new LongWritable(); while (reader.next(key, value)) { dcf.put(new IntWritable(key.get()), new LongWritable(value.get())); } return dcf; }
From source file:com.gsvic.csmr.io.InputData.java
License:Apache License
/** * Reads the dictionary file/*from www .j ava2 s .co m*/ * @param conf * @param dict * @return returns the dictionary in a HashMap * @throws IOException */ public static HashMap<Text, IntWritable> readDictionary(Configuration conf, Path dict) throws IOException { FileSystem filesystem = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(filesystem, dict, conf); HashMap<Text, IntWritable> dictMap = new HashMap<>(); Text key = new Text(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { dictMap.put(new Text(key), new IntWritable(value.get())); } return dictMap; }
From source file:com.hazelcast.jet.hadoop.impl.ReadHdfsPTest.java
License:Open Source License
private static void writeToSequenceFile(Configuration conf, Path path) throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); Option fileOption = Writer.file(path); Option keyClassOption = Writer.keyClass(key.getClass()); Option valueClassOption = Writer.valueClass(value.getClass()); try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) { for (int i = 0; i < ENTRIES.length; i++) { key.set(i);//from w ww.j a v a2 s . c o m value.set(ENTRIES[i]); writer.append(key, value); } } }
From source file:com.hazelcast.jet.impl.connector.hadoop.ReadHdfsPTest.java
License:Open Source License
private void writeToSequenceFile(Configuration conf, Path path) throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); Option fileOption = Writer.file(path); Option keyClassOption = Writer.keyClass(key.getClass()); Option valueClassOption = Writer.valueClass(value.getClass()); try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) { for (int i = 0; i < ENTRIES.length; i++) { key.set(i);//from w w w . j a v a 2 s . c o m value.set(ENTRIES[i]); writer.append(key, value); } } }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
void writeDirs() throws IOException { print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath()); FileStatus status = fs.getFileStatus(srcDir); Path tmpIn = new Path(tmpDir, "in"); bucketFiles = new Path(tmpIn, "dirs"); partitionMap = new Path(tmpIn, "partition-map"); counters = new Path(tmpIn, "counters"); skippedFiles = new HashSet<String>(); /*//from w w w . j a v a 2 s .c om * Prefer the path returned by the status because it is always fully qualified. */ List<Path> dirs = asList(status.getPath()); Text key = new Text(); Text value = new Text(); Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class, CompressionType.BLOCK); int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks")); Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false); partitionBucketer.reset("partition-map"); jobCounters = new Counters(); try { while (!dirs.isEmpty()) { List<Path> nextLevel = new LinkedList<Path>(); for (Path dir : dirs) { jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); print(Verbosity.INFO, "\n\n" + dir.toUri().getPath()); FileStatus[] contents = fs.listStatus(dir, new PathFilter() { @Override public boolean accept(Path testPath) { if (ignoredFiles == null) return true; ignoredFiles.reset(testPath.toUri().getPath()); return !ignoredFiles.matches(); } }); if (contents == null || contents.length == 0) { print(Verbosity.INFO, " is empty"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length); Set<String> uncrushedFiles = new HashSet<String>(contents.length); long crushableBytes = 0; /* * Queue sub directories for subsequent inspection and examine the files in this directory. */ for (FileStatus content : contents) { Path path = content.getPath(); if (content.isDir()) { nextLevel.add(path); } else { boolean changed = uncrushedFiles.add(path.toUri().getPath()); assert changed : path.toUri().getPath(); long fileLength = content.getLen(); if (fileLength <= maxEligibleSize) { crushables.add(content); crushableBytes += fileLength; } } } /* * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the * number of files we found. */ if (!uncrushedFiles.isEmpty()) { if (-1 == findMatcher(dir)) { throw new IllegalArgumentException( "Could not find matching regex for directory: " + dir); } jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size()); } if (0 == crushableBytes) { print(Verbosity.INFO, " has no crushable files"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { /* * We found files to consider for crushing. */ long nBlocks = crushableBytes / dfsBlockSize; if (nBlocks * dfsBlockSize != crushableBytes) { nBlocks++; } /* * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory. */ long dirBuckets = nBlocks / maxFileBlocks; if (dirBuckets * maxFileBlocks != nBlocks) { dirBuckets++; } if (dirBuckets > Integer.MAX_VALUE) { throw new AssertionError("Too many buckets: " + dirBuckets); } Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs); directoryBucketer.reset(getPathPart(dir)); for (FileStatus file : crushables) { directoryBucketer.add(new FileStatusHasSize(file)); } List<Bucket> crushFiles = directoryBucketer.createBuckets(); if (crushFiles.isEmpty()) { jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { nBuckets += crushFiles.size(); jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); print(Verbosity.INFO, " => " + crushFiles.size() + " output files"); /* * Write out the mapping between a bucket and a file. */ for (Bucket crushFile : crushFiles) { String bucketId = crushFile.name(); List<String> bucketFiles = crushFile.contents(); print(Verbosity.INFO, format("\n Output %s will include %,d input bytes from %,d files", bucketId, crushFile.size(), bucketFiles.size())); key.set(bucketId); for (String f : bucketFiles) { boolean changed = uncrushedFiles.remove(f); assert changed : f; pathMatcher.reset(f); pathMatcher.matches(); value.set(pathMatcher.group(5)); writer.append(key, value); /* * Print the input file with four leading spaces. */ print(Verbosity.VERBOSE, "\n " + f); } jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size()); partitionBucketer.add(crushFile); } } } if (!uncrushedFiles.isEmpty()) { print(Verbosity.INFO, "\n\n Skipped " + uncrushedFiles.size() + " files"); for (String uncrushed : uncrushedFiles) { print(Verbosity.VERBOSE, "\n " + uncrushed); } jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size()); } skippedFiles.addAll(uncrushedFiles); } } dirs = nextLevel; } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + bucketFiles, e); } } /* * Now that we have processed all the directories, write the partition map. */ List<Bucket> partitions = partitionBucketer.createBuckets(); assert partitions.size() <= numPartitions; writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); IntWritable partNum = new IntWritable(); try { for (Bucket partition : partitions) { String partitionName = partition.name(); partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1))); for (String bucketId : partition.contents()) { key.set(bucketId); writer.append(key, partNum); } } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } DataOutputStream countersStream = fs.create(this.counters); try { jobCounters.write(countersStream); } finally { try { countersStream.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } }