List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:com.gotometrics.orderly.example.IntExample.java
License:Apache License
public void serializationExamples() throws Exception { IntWritableRowKey i = new IntWritableRowKey(); IntWritable w = new IntWritable(); ImmutableBytesWritable buffer = new ImmutableBytesWritable(); byte[] b;/* w w w. ja va 2 s .c om*/ /* Serialize and deserialize into an immutablebyteswritable */ w.set(-93214); b = new byte[i.getSerializedLength(w)]; buffer.set(b); i.serialize(w, buffer); buffer.set(b, 0, b.length); System.out.println("deserialize(serialize(-93214)) = " + ((IntWritable) i.deserialize(buffer)).get()); /* Serialize and deserialize into a byte array (descending sort, * with two reserved bits set to 0x3) */ i.setReservedBits(2).setReservedValue(0x3).setOrder(Order.DESCENDING); w.set(0); System.out.println("deserialize(serialize(0)) = " + ((IntWritable) i.deserialize(i.serialize(w))).get()); /* Serialize and deserialize NULL into a byte array */ System.out.println("deserialize(serialize(NULL)) = " + i.deserialize(i.serialize(null))); }
From source file:com.gotometrics.orderly.FixedUnsignedIntWritableRowKey.java
License:Apache License
protected IntWritable invertSign(IntWritable iw) { iw.set(iw.get() ^ Integer.MIN_VALUE); return iw; }
From source file:com.hazelcast.jet.hadoop.impl.ReadHdfsPTest.java
License:Open Source License
private static void writeToSequenceFile(Configuration conf, Path path) throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); Option fileOption = Writer.file(path); Option keyClassOption = Writer.keyClass(key.getClass()); Option valueClassOption = Writer.valueClass(value.getClass()); try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) { for (int i = 0; i < ENTRIES.length; i++) { key.set(i); value.set(ENTRIES[i]);/*from w w w . j ava2 s .co m*/ writer.append(key, value); } } }
From source file:com.hazelcast.jet.impl.connector.hadoop.ReadHdfsPTest.java
License:Open Source License
private void writeToSequenceFile(Configuration conf, Path path) throws IOException { IntWritable key = new IntWritable(); Text value = new Text(); Option fileOption = Writer.file(path); Option keyClassOption = Writer.keyClass(key.getClass()); Option valueClassOption = Writer.valueClass(value.getClass()); try (Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOption, valueClassOption)) { for (int i = 0; i < ENTRIES.length; i++) { key.set(i); value.set(ENTRIES[i]);// w w w. jav a2s.c o m writer.append(key, value); } } }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
void writeDirs() throws IOException { print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath()); FileStatus status = fs.getFileStatus(srcDir); Path tmpIn = new Path(tmpDir, "in"); bucketFiles = new Path(tmpIn, "dirs"); partitionMap = new Path(tmpIn, "partition-map"); counters = new Path(tmpIn, "counters"); skippedFiles = new HashSet<String>(); /*/*from w w w . j a va 2s.c o m*/ * Prefer the path returned by the status because it is always fully qualified. */ List<Path> dirs = asList(status.getPath()); Text key = new Text(); Text value = new Text(); Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class, CompressionType.BLOCK); int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks")); Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false); partitionBucketer.reset("partition-map"); jobCounters = new Counters(); try { while (!dirs.isEmpty()) { List<Path> nextLevel = new LinkedList<Path>(); for (Path dir : dirs) { jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); print(Verbosity.INFO, "\n\n" + dir.toUri().getPath()); FileStatus[] contents = fs.listStatus(dir, new PathFilter() { @Override public boolean accept(Path testPath) { if (ignoredFiles == null) return true; ignoredFiles.reset(testPath.toUri().getPath()); return !ignoredFiles.matches(); } }); if (contents == null || contents.length == 0) { print(Verbosity.INFO, " is empty"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length); Set<String> uncrushedFiles = new HashSet<String>(contents.length); long crushableBytes = 0; /* * Queue sub directories for subsequent inspection and examine the files in this directory. */ for (FileStatus content : contents) { Path path = content.getPath(); if (content.isDir()) { nextLevel.add(path); } else { boolean changed = uncrushedFiles.add(path.toUri().getPath()); assert changed : path.toUri().getPath(); long fileLength = content.getLen(); if (fileLength <= maxEligibleSize) { crushables.add(content); crushableBytes += fileLength; } } } /* * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the * number of files we found. */ if (!uncrushedFiles.isEmpty()) { if (-1 == findMatcher(dir)) { throw new IllegalArgumentException( "Could not find matching regex for directory: " + dir); } jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size()); } if (0 == crushableBytes) { print(Verbosity.INFO, " has no crushable files"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { /* * We found files to consider for crushing. */ long nBlocks = crushableBytes / dfsBlockSize; if (nBlocks * dfsBlockSize != crushableBytes) { nBlocks++; } /* * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory. */ long dirBuckets = nBlocks / maxFileBlocks; if (dirBuckets * maxFileBlocks != nBlocks) { dirBuckets++; } if (dirBuckets > Integer.MAX_VALUE) { throw new AssertionError("Too many buckets: " + dirBuckets); } Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs); directoryBucketer.reset(getPathPart(dir)); for (FileStatus file : crushables) { directoryBucketer.add(new FileStatusHasSize(file)); } List<Bucket> crushFiles = directoryBucketer.createBuckets(); if (crushFiles.isEmpty()) { jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { nBuckets += crushFiles.size(); jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); print(Verbosity.INFO, " => " + crushFiles.size() + " output files"); /* * Write out the mapping between a bucket and a file. */ for (Bucket crushFile : crushFiles) { String bucketId = crushFile.name(); List<String> bucketFiles = crushFile.contents(); print(Verbosity.INFO, format("\n Output %s will include %,d input bytes from %,d files", bucketId, crushFile.size(), bucketFiles.size())); key.set(bucketId); for (String f : bucketFiles) { boolean changed = uncrushedFiles.remove(f); assert changed : f; pathMatcher.reset(f); pathMatcher.matches(); value.set(pathMatcher.group(5)); writer.append(key, value); /* * Print the input file with four leading spaces. */ print(Verbosity.VERBOSE, "\n " + f); } jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size()); partitionBucketer.add(crushFile); } } } if (!uncrushedFiles.isEmpty()) { print(Verbosity.INFO, "\n\n Skipped " + uncrushedFiles.size() + " files"); for (String uncrushed : uncrushedFiles) { print(Verbosity.VERBOSE, "\n " + uncrushed); } jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size()); } skippedFiles.addAll(uncrushedFiles); } } dirs = nextLevel; } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + bucketFiles, e); } } /* * Now that we have processed all the directories, write the partition map. */ List<Bucket> partitions = partitionBucketer.createBuckets(); assert partitions.size() <= numPartitions; writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); IntWritable partNum = new IntWritable(); try { for (Bucket partition : partitions) { String partitionName = partition.name(); partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1))); for (String bucketId : partition.contents()) { key.set(bucketId); writer.append(key, partNum); } } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } DataOutputStream countersStream = fs.create(this.counters); try { jobCounters.write(countersStream); } finally { try { countersStream.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partition() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0); writer.append(key, partNum);/*from ww w .ja v a 2 s .c om*/ key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-3"); partNum.set(1); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(2); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); partitioner.configure(job); Text fileName = new Text(); key.set("bucket-1"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-2"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-3"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1)); } key.set("bucket-4"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-5"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-6"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionWithFewerPartitionsThanReduceTasks() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0); writer.append(key, partNum);//w w w . j a v a 2 s . com key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-3"); partNum.set(1); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(2); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(40); partitioner.configure(job); Text fileName = new Text(); key.set("bucket-1"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-2"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-3"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1)); } key.set("bucket-4"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-5"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-6"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void noDupes() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable value = new IntWritable(); key.set("bucket-1"); value.set(0); writer.append(key, value);/*from w ww. j a v a2 s .com*/ key.set("bucket-2"); value.set(0); writer.append(key, value); key.set("bucket-2"); value.set(1); writer.append(key, value); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail(); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("bucket-2")) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionTooLow() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0); writer.append(key, partNum);//from w ww .j ava 2s . c o m key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(-1); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail("No such thing as a negitave partition"); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("Partition -1")) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionTooHigh() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0); writer.append(key, partNum);/*from ww w . ja va 2 s . c o m*/ key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(3); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail("Parition with id 3 is not allowed with 3 reduce tasks"); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("Partition 3")) { throw e; } } }