List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:com.hdfs.concat.crush.CrushPartitioner.java
License:Apache License
@Override public void configure(JobConf job) { String path = job.get("crush.partition.map"); int expPartitions = job.getNumReduceTasks(); bucketToPartition = new HashMap<Text, Integer>(100); try {//w ww . j ava2 s .c o m FileSystem fs = FileSystem.get(job); Reader reader = new Reader(fs, new Path(path), job); Text bucket = new Text(); IntWritable partNum = new IntWritable(); while (reader.next(bucket, partNum)) { int partNumValue = partNum.get(); if (partNumValue < 0 || partNumValue >= expPartitions) { throw new IllegalArgumentException( "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks"); } Integer prev = bucketToPartition.put(new Text(bucket), partNumValue); if (null != prev) { throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path); } } } catch (IOException e) { throw new RuntimeException("Could not read partition map from " + path, e); } if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) { throw new IllegalArgumentException( path + " contains more than " + expPartitions + " distinct partitions"); } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partition() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0);//from w ww.j a v a2 s . c o m writer.append(key, partNum); key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-3"); partNum.set(1); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(2); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); partitioner.configure(job); Text fileName = new Text(); key.set("bucket-1"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-2"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-3"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1)); } key.set("bucket-4"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-5"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-6"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionWithFewerPartitionsThanReduceTasks() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0);//from w w w .jav a 2s . c o m writer.append(key, partNum); key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-3"); partNum.set(1); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(2); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(40); partitioner.configure(job); Text fileName = new Text(); key.set("bucket-1"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-2"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(0)); } key.set("bucket-3"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(1)); } key.set("bucket-4"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-5"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } key.set("bucket-6"); for (int file = 0; file < 4; file++) { fileName.set("file" + file); assertThat(partitioner.getPartition(key, fileName, 3), equalTo(2)); } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void noDupes() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable value = new IntWritable(); key.set("bucket-1"); value.set(0);//w w w .j ava 2 s . c om writer.append(key, value); key.set("bucket-2"); value.set(0); writer.append(key, value); key.set("bucket-2"); value.set(1); writer.append(key, value); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail(); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("bucket-2")) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionTooLow() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0);//from w w w . jav a 2 s . c om writer.append(key, partNum); key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(-1); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail("No such thing as a negitave partition"); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("Partition -1")) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushPartitionerTest.java
License:Apache License
@Test public void partitionTooHigh() throws IOException { Writer writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); Text key = new Text(); IntWritable partNum = new IntWritable(); key.set("bucket-1"); partNum.set(0);// w w w . j av a 2s. c om writer.append(key, partNum); key.set("bucket-2"); partNum.set(0); writer.append(key, partNum); key.set("bucket-4"); partNum.set(2); writer.append(key, partNum); key.set("bucket-5"); partNum.set(2); writer.append(key, partNum); key.set("bucket-6"); partNum.set(3); writer.append(key, partNum); writer.close(); job.setNumReduceTasks(3); try { partitioner.configure(job); fail("Parition with id 3 is not allowed with 3 reduce tasks"); } catch (IllegalArgumentException e) { if (!e.getMessage().contains("Partition 3")) { throw e; } } }
From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java
License:Apache License
private void verifyCrushOutput(File crushOutput, int[]... keyCounts) throws IOException { List<String> actual = new ArrayList<String>(); Text text = new Text(); IntWritable value = new IntWritable(); Reader reader = new Reader(FileSystem.get(job), new Path(crushOutput.getAbsolutePath()), job); while (reader.next(text, value)) { actual.add(format("%s\t%d", text, value.get())); }/*from ww w .j a v a 2 s. co m*/ reader.close(); int expLines = 0; List<List<String>> expected = new ArrayList<List<String>>(); for (int[] keyCount : keyCounts) { int key = keyCount[0]; int count = keyCount[1]; List<String> lines = new ArrayList<String>(); expected.add(lines); for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) { String line = format("%d\t%d", key, j); lines.add(line); } expLines += count; } /* * Make sure each file's data is contiguous in the crush output file. */ for (List<String> list : expected) { int idx = actual.indexOf(list.get(0)); assertThat(idx, greaterThanOrEqualTo(0)); assertThat(actual.subList(idx, idx + list.size()), equalTo(list)); } assertThat(actual.size(), equalTo(expLines)); }
From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java
License:Apache License
private void createFile(File dir, String fileName, int key, int count) throws IOException { File file = new File(dir, fileName); Writer writer = SequenceFile.createWriter(FileSystem.get(job), job, new Path(file.getAbsolutePath()), Text.class, IntWritable.class); Text text = new Text(Integer.toString(key)); IntWritable value = new IntWritable(); for (int i = 0, j = 0; i < count; i++, j = j == 9 ? 0 : j + 1) { value.set(j);/*from w w w .j av a 2 s . c o m*/ writer.append(text, value); } writer.close(); }
From source file:com.hdfs.concat.crush.CrushStandAloneSequenceFileTest.java
License:Apache License
private void verifyFile(File dir, String fileName, int key, int count) throws IOException { File file = new File(dir, fileName); Reader reader = new Reader(FileSystem.get(job), new Path(file.getAbsolutePath()), job); int i = 0;// w w w.j a v a 2 s .c o m int actual = 0; Text text = new Text(); IntWritable value = new IntWritable(); while (reader.next(text, value)) { assertThat(text.toString(), equalTo(Integer.toString(key))); assertThat(value.get(), equalTo(i)); if (i == 9) { i = 0; } else { i++; } actual++; } reader.close(); assertThat(actual, equalTo(count)); }
From source file:com.hdfs.concat.crush.CrushTest.java
License:Apache License
@Test public void bucketing() throws Exception { File in = tmp.newFolder("in"); Counters expectedCounters = new Counters(); List<String> expectedBucketFiles = new ArrayList<String>(); /*/* w w w .j a v a 2s. co m*/ * Create a hierarchy of directories. Directories are distinguished by a trailing slash in these comments. * * 1/ * 1.1/ * file1 10 bytes * file2 20 bytes * file3 30 bytes * file4 41 bytes * file5 15 bytes * file6 30 bytes * file7 20 bytes * 1.2/ * file1 20 bytes * file2 10 bytes * 1.3/ * 2/ * file1 70 bytes * file2 30 bytes * file3 25 bytes * file4 30 bytes * file5 35 bytes * 2.1/ * file1 10 bytes * 2.2/ * file1 25 bytes * file2 15 bytes * file3 35 bytes * 2.3/ * file1 41 bytes * file2 10 bytes * 2.4/ * 2.4.1/ * file1 100 bytes * file2 30 bytes * 2.4.2/ * file1 20 bytes * file2 20 bytes * file3 10 bytes */ /* * in contains 2 dirs and no files so it is skipped. * * in/ * 1/ * 2/ */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); tmp.newFolder("in/1"); File dir2 = tmp.newFolder("in/2"); /* * in/1 contains three dirs and no files so it is skipped. * * in/ * 1/ * 1.1/ * 1.2/ * 1.3/ */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); File dir1_1 = tmp.newFolder("in/1/1.1"); File dir1_2 = tmp.newFolder("in/1/1.2"); tmp.newFolder("in/1/1.3"); /* * in/2 contains five files and four dirs. * * in/ * 2/ * file1 70 bytes * file2 30 bytes * file3 25 bytes * file4 30 bytes * file5 35 bytes * 2.1/ * 2.2/ * 2.3/ * 2.4/ * * 0 1 2 * file5 35 file2 30 file4 30 * file3 25 * * Buckets 0 and 2 have a single file each so they are ignored. */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 5); expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 3); File dir2_1 = tmp.newFolder("in/2/2.1"); File dir2_2 = tmp.newFolder("in/2/2.2"); File dir2_3 = tmp.newFolder("in/2/2.3"); tmp.newFolder("in/2/2.4"); createFile(dir2, "file1", 70); createFile(dir2, "file2", 30); createFile(dir2, "file3", 25); createFile(dir2, "file4", 30); createFile(dir2, "file5", 35); expectedBucketFiles .add(format("%s %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file2").getAbsolutePath())); expectedBucketFiles .add(format("%s %s", dir2.getAbsolutePath() + "-1", new File(dir2, "file3").getAbsolutePath())); /* * in/1/1.1 contains seven files and no dirs. * * in/ * 1/ * 1.1/ * file1 10 bytes * file2 20 bytes * file3 30 bytes * file4 41 bytes * file5 15 bytes * file6 30 bytes * file7 20 bytes * * 0 1 2 * file3 30 file6 30 file2 20 * file5 15 file1 10 file7 20 * * file4 is > 50 * 0.8 so it is ignored. */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 7); expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 6); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1); createFile(dir1_1, "file1", 10); createFile(dir1_1, "file2", 20); createFile(dir1_1, "file3", 30); createFile(dir1_1, "file4", 41); createFile(dir1_1, "file5", 15); createFile(dir1_1, "file6", 30); createFile(dir1_1, "file7", 20); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file3").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-0", new File(dir1_1, "file5").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file6").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-1", new File(dir1_1, "file1").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file2").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_1.getAbsolutePath() + "-2", new File(dir1_1, "file7").getAbsolutePath())); /* * in/1/1.2 contains to files. * * in/ * 1/ * 1.2/ * file1 20 bytes * file2 10 bytes */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2); expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2); createFile(dir1_2, "file1", 20); createFile(dir1_2, "file2", 10); expectedBucketFiles.add( format("%s %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file1").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir1_2.getAbsolutePath() + "-0", new File(dir1_2, "file2").getAbsolutePath())); /* * in/1/1.3 is empty. * * in/ * 1/ * 1.3/ */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); tmp.newFolder("in/1/1.3"); /* * in/2/2.1 contains on file. * * in/ * 2/ * 2.1/ * file1 10 bytes * * Single file dirs are ignored. */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 1); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1); createFile(dir2_1, "file1", 10); /* * in/2/2.2 contains three files. * * in/ * 2/ * 2.2/ * file1 25 bytes * file2 15 bytes * file3 35 bytes * * 0 1 * file3 35 file1 25 * file2 15 * * Bucket 0 with a single file is ignored. */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3); expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 2); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 1); createFile(dir2_2, "file1", 25); createFile(dir2_2, "file2", 15); createFile(dir2_2, "file3", 35); expectedBucketFiles.add( format("%s %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file1").getAbsolutePath())); expectedBucketFiles.add( format("%s %s", dir2_2.getAbsolutePath() + "-1", new File(dir2_2, "file2").getAbsolutePath())); /* * in/2/2.3 contains 2 files. * * in/ * 2/ * 2.3/ * file1 41 bytes * file2 10 bytes * * file1 is too big and leaving file2 as a single file, which is also ignored. */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2); createFile(dir2_3, "file1", 41); createFile(dir2_3, "file2", 10); /* * in/2/2.4 contains two sub directories and no files. * * in/ * 2/ * 2.4/ * 2.4.1/ * 2.4.2/ */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); tmp.newFolder("in/2/2.4"); File dir2_4_1 = tmp.newFolder("in/2/2.4/2.4.1"); File dir2_4_2 = tmp.newFolder("in/2/2.4/2.4.2"); /* * in/ * 2/ * 2.4/ * 2.4.1/ * file1 100 bytes * file2 30 bytes */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 2); expectedCounters.incrCounter(MapperCounter.FILES_SKIPPED, 2); createFile(dir2_4_1, "file1", 100); createFile(dir2_4_1, "file2", 30); /* * in/ * 2/ * 2.4/ * 2.4.2/ * file1 20 bytes * file2 20 bytes * file3 10 bytes * 0 * file1 20 * file2 20 * file3 10 */ expectedCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); expectedCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); expectedCounters.incrCounter(MapperCounter.FILES_FOUND, 3); expectedCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, 3); createFile(dir2_4_2, "file1", 20); createFile(dir2_4_2, "file2", 20); createFile(dir2_4_2, "file3", 10); expectedBucketFiles.add(format("%s %s", dir2_4_2.getAbsolutePath() + "-0", new File(dir2_4_2, "file1").getAbsolutePath())); expectedBucketFiles.add(format("%s %s", dir2_4_2.getAbsolutePath() + "-0", new File(dir2_4_2, "file2").getAbsolutePath())); expectedBucketFiles.add(format("%s %s", dir2_4_2.getAbsolutePath() + "-0", new File(dir2_4_2, "file3").getAbsolutePath())); Crush crush = new Crush(); crush.setConf(job); crush.setFileSystem(fileSystem); /* * Call these in the same order that run() does. */ crush.createJobConfAndParseArgs("--compress=none", "--max-file-blocks=1", in.getAbsolutePath(), new File(tmp.getRoot(), "out").getAbsolutePath(), "20101124171730"); crush.writeDirs(); /* * Verify bucket contents. */ List<String> actualBucketFiles = new ArrayList<String>(); Text key = new Text(); Text value = new Text(); Reader reader = new Reader(FileSystem.get(job), crush.getBucketFiles(), job); while (reader.next(key, value)) { actualBucketFiles.add(format("%s\t%s", key, value)); } reader.close(); Collections.sort(expectedBucketFiles); Collections.sort(actualBucketFiles); assertThat(actualBucketFiles, equalTo(expectedBucketFiles)); /* * Verify the partition map. */ Reader partitionMapReader = new Reader(FileSystem.get(job), crush.getPartitionMap(), job); IntWritable partNum = new IntWritable(); Map<String, Integer> actualPartitions = new HashMap<String, Integer>(); while (partitionMapReader.next(key, partNum)) { actualPartitions.put(key.toString(), partNum.get()); } partitionMapReader.close(); /* * These crush files need to allocated into 5 partitions: * * in/2-1 55 bytes * in/1/1.1-0 45 bytes * in/1/1.1-2 40 bytes * in/1/1.1-1 40 bytes * in/1/1.2-0 30 bytes * in/2/2.2-1 40 bytes * in/2/2.4/2.4.2-0 50 bytes * * 0 1 2 3 4 * in/2-1 55 in/2/2.4/2.4.2-0 50 in/1/1.1-0 45 in/1/1.1-2 40 in/1/1.1-1 40 * in/2/2.2-1 40 in/1/1.2-0 39 */ Map<String, Integer> expectedPartitions = new HashMap<String, Integer>(); //TODO: this may not be deterministic due to jvm/hashmap/filesystem expectedPartitions.put(dir2.getAbsolutePath() + "-1", 0); expectedPartitions.put(dir2_4_2.getAbsolutePath() + "-0", 1); expectedPartitions.put(dir1_1.getAbsolutePath() + "-0", 2); expectedPartitions.put(dir1_1.getAbsolutePath() + "-2", 4); expectedPartitions.put(dir2_2.getAbsolutePath() + "-1", 3); expectedPartitions.put(dir1_1.getAbsolutePath() + "-1", 3); expectedPartitions.put(dir1_2.getAbsolutePath() + "-0", 4); assertThat(actualPartitions, equalTo(expectedPartitions)); /* * Verify counters. */ Counters actualCounters = new Counters(); DataInputStream countersStream = FileSystem.get(job).open(crush.getCounters()); actualCounters.readFields(countersStream); countersStream.close(); assertThat(actualCounters, equalTo(expectedCounters)); }