List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:cn.lhfei.hadoop.ch04.SequenceFileWriteDemo.java
License:Apache License
public static void main(String[] args) { String uri = args[0];/* w w w. j a v a 2 s . c om*/ Configuration conf = new Configuration(); FileSystem fs = null; SequenceFile.Writer writer = null; try { fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); //writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); writer = SequenceFile.createWriter(conf, Writer.keyClass(key.getClass()), writer.valueClass(value.getClass())); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:co.nubetech.hiho.common.HihoTestCase.java
License:Apache License
public void createSequenceFileInHdfs(HashMap inputData, String filePath, String nameOfFile) throws Exception { Configuration conf = new Configuration(); FileSystem fs = getFileSystem(); Path inputFile = new Path(filePath + "/" + nameOfFile); SequenceFile.Writer writer = null; SequenceFile.Reader reader = null; try {//from w w w . ja v a2 s . c o m Set key = inputData.keySet(); Object keyValue = key.iterator().next(); writer = SequenceFile.createWriter(fs, conf, inputFile, keyValue.getClass(), inputData.get(keyValue).getClass()); logger.debug("key class is: " + keyValue.getClass()); logger.debug("val class is: " + inputData.get(keyValue).getClass()); Iterator valIterator = inputData.values().iterator(); Iterator keyIterator = inputData.keySet().iterator(); while (keyIterator.hasNext()) { writer.append(keyIterator.next(), valIterator.next()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(writer); } }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByIntWritableKeyWithSequenceFileInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); inputData1.put(new IntWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); inputData1.put(new IntWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); inputData2.put(new IntWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); inputData2.put(new IntWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text", "-dedupBy", "key" }; DedupJob job = runDedupJob(args);//w w w . j a v a 2 s . c o m assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); assertEquals(2, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<IntWritable> expectedOutput = new ArrayList<IntWritable>(); expectedOutput.add(new IntWritable(1)); expectedOutput.add(new IntWritable(2)); expectedOutput.add(new IntWritable(3)); expectedOutput.add(new IntWritable(4)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByValueWithSequenceFileInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); inputData1.put(new IntWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); inputData1.put(new IntWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); inputData2.put(new IntWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); inputData2.put(new IntWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text", "-dedupBy", "value" }; DedupJob job = runDedupJob(args);/*from w w w . j a va 2 s . c o m*/ assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); assertEquals(1, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<Text> expectedOutput = new ArrayList<Text>(); expectedOutput.add(new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); expectedOutput.add(new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); expectedOutput.add(new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); expectedOutput.add(new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); expectedOutput.add(new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableValue, expectedOutput.contains(writableValue)); count++; } IOUtils.closeStream(reader); assertEquals(5, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByLongWritableKeyWithSequenceFileInputFormat() throws Exception { HashMap<LongWritable, Text> inputData1 = new HashMap<LongWritable, Text>(); inputData1.put(new LongWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); inputData1.put(new LongWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); inputData1.put(new LongWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<LongWritable, Text> inputData2 = new HashMap<LongWritable, Text>(); inputData2.put(new LongWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); inputData2.put(new LongWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); inputData2.put(new LongWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.LongWritable", "-inputValueClassName", "org.apache.hadoop.io.Text", "-dedupBy", "key" }; DedupJob job = runDedupJob(args);//from w w w. j a v a2 s . c om assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); assertEquals(2, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<LongWritable> expectedOutput = new ArrayList<LongWritable>(); expectedOutput.add(new LongWritable(1)); expectedOutput.add(new LongWritable(2)); expectedOutput.add(new LongWritable(3)); expectedOutput.add(new LongWritable(4)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByCustomObjectKeyWithSequenceFileInputFormat() throws Exception { Student student1 = setStudent(new Text("Sam"), new Text("US"), new IntWritable(1), new LongWritable(9999999998l), new DoubleWritable(99.12)); Student student2 = setStudent(new Text("John"), new Text("AUS"), new IntWritable(2), new LongWritable(9999999999l), new DoubleWritable(90.12)); Student student3 = setStudent(new Text("Mary"), new Text("UK"), new IntWritable(3), new LongWritable(9999999988l), new DoubleWritable(69.12)); Student student4 = setStudent(new Text("Kelvin"), new Text("UK"), new IntWritable(4), new LongWritable(9999998888l), new DoubleWritable(59.12)); HashMap<Student, Text> inputData1 = new HashMap<Student, Text>(); inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<Student, Text> inputData2 = new HashMap<Student, Text>(); inputData2.put(student2, new Text("Austin Farley,4794 Donec Ave,1-230-823-8164,13508")); inputData2.put(student3, new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(student4, new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputPath", "/input1,/input2", "-outputPath", "output", "-dedupBy", "key", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "co.nubetech.hiho.testdata.Student", "-inputValueClassName", "org.apache.hadoop.io.Text" }; DedupJob job = runDedupJob(args);/* w w w. jav a2s .c o m*/ assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); assertEquals(2, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<Student> expectedOutput = new ArrayList<Student>(); expectedOutput.add(student1); expectedOutput.add(student2); expectedOutput.add(student3); expectedOutput.add(student4); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java
License:Apache License
@Override public void close() throws IOException { if (stream != null) { IOUtils.closeStream(stream); } }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByCustomObjectKeyWithSequenceFileInputFormat() throws Exception { Student student1 = setStudent(new Text("Sam"), new Text("US"), new IntWritable(1), new LongWritable(9999999998l), new DoubleWritable(99.12)); Student student2 = setStudent(new Text("John"), new Text("AUS"), new IntWritable(2), new LongWritable(9999999999l), new DoubleWritable(90.12)); Student student3 = setStudent(new Text("Mary"), new Text("UK"), new IntWritable(3), new LongWritable(9999999988l), new DoubleWritable(69.12)); Student student4 = setStudent(new Text("Kelvin"), new Text("UK"), new IntWritable(4), new LongWritable(9999998888l), new DoubleWritable(59.12)); HashMap<Student, Text> inputData1 = new HashMap<Student, Text>(); inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<Student, Text> inputData2 = new HashMap<Student, Text>(); inputData2.put(student2, new Text("Austin Farley,4794 Donec Ave,1-230-823-8164,13508")); inputData2.put(student3, new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(student4, new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "co.nubetech.hiho.testdata.Student", "-inputValueClassName", "org.apache.hadoop.io.Text" }; MergeJob job = runMergeJobs(args);// ww w .j a v a 2 s . com assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<Student> expectedOutput = new ArrayList<Student>(); expectedOutput.add(student1); expectedOutput.add(student2); expectedOutput.add(student3); expectedOutput.add(student4); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByIntWritableKeyWithSequenceFileInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(new IntWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.IntWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" }; MergeJob job = runMergeJobs(args);//from w w w . j av a2s . c om assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<IntWritable> expectedOutput = new ArrayList<IntWritable>(); expectedOutput.add(new IntWritable(1)); expectedOutput.add(new IntWritable(2)); expectedOutput.add(new IntWritable(3)); expectedOutput.add(new IntWritable(4)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByLongWritableKeyWithSequenceFileInputFormat() throws Exception { HashMap<LongWritable, Text> inputData1 = new HashMap<LongWritable, Text>(); inputData1.put(new LongWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(new LongWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(new LongWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<LongWritable, Text> inputData2 = new HashMap<LongWritable, Text>(); inputData2.put(new LongWritable(1), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(new LongWritable(2), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); inputData2.put(new LongWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.LongWritable", "-inputValueClassName", "org.apache.hadoop.io.Text" }; MergeJob job = runMergeJobs(args);//from w w w . j a va 2 s . c o m assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<LongWritable> expectedOutput = new ArrayList<LongWritable>(); expectedOutput.add(new LongWritable(1)); expectedOutput.add(new LongWritable(2)); expectedOutput.add(new LongWritable(3)); expectedOutput.add(new LongWritable(4)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }