List of usage examples for org.apache.hadoop.io LongWritable LongWritable
public LongWritable(long value)
From source file:co.cask.tephra.persist.AbstractTransactionLog.java
License:Apache License
@Override public void append(TransactionEdit edit) throws IOException { long startTime = System.nanoTime(); synchronized (this) { ensureAvailable();//w ww. j a va 2 s . c om Entry entry = new Entry(new LongWritable(logSequence.getAndIncrement()), edit); // add to pending edits append(entry); } // wait for sync to complete sync(); long durationMillis = (System.nanoTime() - startTime) / 1000000L; if (durationMillis > SLOW_APPEND_THRESHOLD) { LOG.info("Slow append to log " + getName() + ", took " + durationMillis + " msec."); } }
From source file:co.cask.tephra.persist.AbstractTransactionLog.java
License:Apache License
@Override public void append(List<TransactionEdit> edits) throws IOException { long startTime = System.nanoTime(); synchronized (this) { ensureAvailable();/*from w w w. j a v a 2 s . co m*/ for (TransactionEdit edit : edits) { Entry entry = new Entry(new LongWritable(logSequence.getAndIncrement()), edit); // add to pending edits append(entry); } } // wait for sync to complete sync(); long durationMillis = (System.nanoTime() - startTime) / 1000000L; if (durationMillis > SLOW_APPEND_THRESHOLD) { LOG.info("Slow append to log " + getName() + ", took " + durationMillis + " msec."); } }
From source file:co.cask.tephra.persist.HDFSTransactionLogTest.java
License:Apache License
private void testTransactionLogSync(int totalCount, int batchSize, boolean withMarker, boolean isComplete) throws Exception { List<TransactionEdit> edits = TransactionEditUtil.createRandomEdits(totalCount); long timestamp = System.currentTimeMillis(); Configuration configuration = getConfiguration(); FileSystem fs = FileSystem.newInstance(FileSystem.getDefaultUri(configuration), configuration); SequenceFile.Writer writer = getSequenceFileWriter(configuration, fs, timestamp, withMarker); AtomicLong logSequence = new AtomicLong(); HDFSTransactionLog transactionLog = getHDFSTransactionLog(configuration, fs, timestamp); AbstractTransactionLog.Entry entry;/* www .j av a2s . com*/ for (int i = 0; i < totalCount - batchSize; i += batchSize) { if (withMarker) { writeNumWrites(writer, batchSize); } for (int j = 0; j < batchSize; j++) { entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(j)); writer.append(entry.getKey(), entry.getEdit()); } writer.syncFs(); } if (withMarker) { writeNumWrites(writer, batchSize); } for (int i = totalCount - batchSize; i < totalCount - 1; i++) { entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(i)); writer.append(entry.getKey(), entry.getEdit()); } entry = new AbstractTransactionLog.Entry(new LongWritable(logSequence.getAndIncrement()), edits.get(totalCount - 1)); if (isComplete) { writer.append(entry.getKey(), entry.getEdit()); } else { byte[] bytes = Longs.toByteArray(entry.getKey().get()); writer.appendRaw(bytes, 0, bytes.length, new SequenceFile.ValueBytes() { @Override public void writeUncompressedBytes(DataOutputStream outStream) throws IOException { byte[] test = new byte[] { 0x2 }; outStream.write(test, 0, 1); } @Override public void writeCompressedBytes(DataOutputStream outStream) throws IllegalArgumentException, IOException { // no-op } @Override public int getSize() { // mimic size longer than the actual byte array size written, so we would reach EOF return 12; } }); } writer.syncFs(); Closeables.closeQuietly(writer); // now let's try to read this log TransactionLogReader reader = transactionLog.getReader(); int syncedEdits = 0; while (reader.next() != null) { // testing reading the transaction edits syncedEdits++; } if (isComplete) { Assert.assertEquals(totalCount, syncedEdits); } else { Assert.assertEquals(totalCount - batchSize, syncedEdits); } }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByLongWritableKeyWithSequenceFileInputFormat() throws Exception { HashMap<LongWritable, Text> inputData1 = new HashMap<LongWritable, Text>(); inputData1.put(new LongWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney")); inputData1.put(new LongWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson")); inputData1.put(new LongWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<LongWritable, Text> inputData2 = new HashMap<LongWritable, Text>(); inputData2.put(new LongWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos")); inputData2.put(new LongWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson")); inputData2.put(new LongWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputPath", "/input1,/input2", "-outputPath", "output", "-inputKeyClassName", "org.apache.hadoop.io.LongWritable", "-inputValueClassName", "org.apache.hadoop.io.Text", "-dedupBy", "key" }; DedupJob job = runDedupJob(args);/*from www .j a va 2 s. co m*/ assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); assertEquals(2, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<LongWritable> expectedOutput = new ArrayList<LongWritable>(); expectedOutput.add(new LongWritable(1)); expectedOutput.add(new LongWritable(2)); expectedOutput.add(new LongWritable(3)); expectedOutput.add(new LongWritable(4)); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.dedup.TestDedupJob.java
License:Apache License
@Test public void testDedupByCustomObjectKeyWithSequenceFileInputFormat() throws Exception { Student student1 = setStudent(new Text("Sam"), new Text("US"), new IntWritable(1), new LongWritable(9999999998l), new DoubleWritable(99.12)); Student student2 = setStudent(new Text("John"), new Text("AUS"), new IntWritable(2), new LongWritable(9999999999l), new DoubleWritable(90.12)); Student student3 = setStudent(new Text("Mary"), new Text("UK"), new IntWritable(3), new LongWritable(9999999988l), new DoubleWritable(69.12)); Student student4 = setStudent(new Text("Kelvin"), new Text("UK"), new IntWritable(4), new LongWritable(9999998888l), new DoubleWritable(59.12)); HashMap<Student, Text> inputData1 = new HashMap<Student, Text>(); inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(student3, new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<Student, Text> inputData2 = new HashMap<Student, Text>(); inputData2.put(student2, new Text("Austin Farley,4794 Donec Ave,1-230-823-8164,13508")); inputData2.put(student3, new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(student4, new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-inputPath", "/input1,/input2", "-outputPath", "output", "-dedupBy", "key", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat", "-inputKeyClassName", "co.nubetech.hiho.testdata.Student", "-inputValueClassName", "org.apache.hadoop.io.Text" }; DedupJob job = runDedupJob(args);/*from ww w . java2 s . c om*/ assertEquals(6, job.getTotalRecordsRead()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); assertEquals(2, job.getDuplicateRecords()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000"); Configuration conf = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf); Writable writableKey = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable writableValue = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); List<Student> expectedOutput = new ArrayList<Student>(); expectedOutput.add(student1); expectedOutput.add(student2); expectedOutput.add(student3); expectedOutput.add(student4); int count = 0; while (reader.next(writableKey, writableValue)) { logger.debug("key and value is: " + writableKey + ", " + writableValue); assertTrue("Matched output " + writableKey, expectedOutput.contains(writableKey)); count++; } IOUtils.closeStream(reader); assertEquals(4, count); }
From source file:co.nubetech.hiho.dedup.TestDedupKeyReducer.java
License:Apache License
@Test public void testReducerForLongWritableKey() throws IOException, InterruptedException { LongWritable key = new LongWritable(Long.parseLong("123")); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);/*from w w w. ja v a 2 s. c om*/ Text value1 = new Text("value1"); ArrayList<Text> values = new ArrayList<Text>(); values.add(value1); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(DedupRecordCounter.OUTPUT); when(context.getCounter(DedupRecordCounter.OUTPUT)).thenReturn(counter); DedupKeyReducer dedupReducer = new DedupKeyReducer(); dedupReducer.reduce(hihoTuple, values, context); verify(context).write(key, value1); assertEquals(1, context.getCounter(DedupRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.dedup.TestDedupValueReducer.java
License:Apache License
@Test public void testReducerForLongWritableKey() throws IOException, InterruptedException { LongWritable key = new LongWritable(Long.parseLong("123")); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key);//from w w w . ja v a 2 s .com Text value1 = new Text("value1"); ArrayList<Text> values = new ArrayList<Text>(); values.add(value1); Reducer.Context context = mock(Reducer.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(DedupRecordCounter.OUTPUT); when(context.getCounter(DedupRecordCounter.OUTPUT)).thenReturn(counter); DedupValueReducer dedupReducer = new DedupValueReducer(); dedupReducer.reduce(hihoTuple, values, context); verify(context).write(value1, key); assertEquals(1, context.getCounter(DedupRecordCounter.OUTPUT).getValue()); }
From source file:co.nubetech.hiho.dedup.TestHashUtility.java
License:Apache License
@Test public void testMD5HashForLongWritableKey() throws IOException { LongWritable key = new LongWritable(Long.parseLong("123")); MD5Hash md5HashKey1 = HashUtility.getMD5Hash(key); MD5Hash md5HashKey2 = HashUtility.getMD5Hash(key); assertEquals(md5HashKey1, md5HashKey2); }
From source file:co.nubetech.hiho.mapreduce.TestDBInputAvroMapper.java
License:Apache License
@Test public final void testMapperValidValues() throws IOException, InterruptedException { OutputCollector<AvroValue<Pair>, NullWritable> output = mock(OutputCollector.class); Reporter reporter = mock(Reporter.class); DBInputAvroMapper mapper = new DBInputAvroMapper(); ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn"); ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn"); ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn"); ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn"); ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn"); ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn"); ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, "floatColumn"); ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn"); ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn"); ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn"); ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>(); columns.add(intColumn);//w w w.j a v a 2 s . c o m columns.add(stringColumn); columns.add(dateColumn); columns.add(longColumn); columns.add(booleanColumn); columns.add(doubleColumn); columns.add(floatColumn); columns.add(charColumn); columns.add(timeColumn); columns.add(timeStampColumn); ArrayList values = new ArrayList(); values.add(new Integer(12)); values.add(new String("sam")); values.add(new Date()); values.add(new Long(26564l)); values.add(true); values.add(1.235); values.add(new Float(1.0f)); values.add('a'); values.add(new Time(new Date().getTime())); values.add(new Time(new Date().getTime())); GenericDBWritable val = new GenericDBWritable(columns, values); LongWritable key = new LongWritable(1); Schema pairSchema = DBMapper.getPairSchema(val.getColumns()); Schema keySchema = Pair.getKeySchema(pairSchema); Schema valueSchema = Pair.getValueSchema(pairSchema); Pair pair = new Pair<GenericRecord, GenericRecord>(pairSchema); GenericRecord keyRecord = new GenericData.Record(keySchema); keyRecord.put(0, key.get()); GenericRecord valueRecord = new GenericData.Record(valueSchema); List<Schema.Field> fieldSchemas = valueSchema.getFields(); for (int i = 0; i < val.getValues().size(); ++i) { Schema.Type type = fieldSchemas.get(i).schema().getType(); if (type.equals(Schema.Type.STRING)) { Utf8 utf8 = new Utf8((String) val.getValues().get(i).toString()); valueRecord.put(i, utf8); } else { valueRecord.put(i, val.getValues().get(i)); } } pair.key(keyRecord); pair.value(valueRecord); mapper.map(key, val, output, reporter); verify(output).collect(new AvroValue<Pair>(pair), nullWritable); }
From source file:co.nubetech.hiho.mapreduce.TestDBInputAvroMapper.java
License:Apache License
@Test public void testGetKeyRecord() { DBInputAvroMapper mapper = new DBInputAvroMapper(); ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn"); ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn"); ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn"); ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn"); ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn"); ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn"); // ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, // "floatColumn"); ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn"); ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn"); ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn"); ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>(); columns.add(intColumn);//from w ww. j av a2 s . c om columns.add(stringColumn); columns.add(dateColumn); columns.add(longColumn); columns.add(booleanColumn); columns.add(doubleColumn); // columns.add(floatColumn); columns.add(charColumn); columns.add(timeColumn); columns.add(timeStampColumn); ArrayList values = new ArrayList(); values.add(new Integer(12)); values.add(new String("sam")); values.add(new Date()); values.add(new Long(26564l)); values.add(true); values.add(1.235); // values.add(new Float(1.0f)); values.add('a'); values.add(new Time(new Date().getTime())); values.add(new Time(new Date().getTime())); GenericDBWritable val = new GenericDBWritable(columns, values); LongWritable key = new LongWritable(1); Schema pairSchema = DBMapper.getPairSchema(val.getColumns()); Schema keySchema = Pair.getKeySchema(pairSchema); GenericRecord keyRecord = new GenericData.Record(keySchema); keyRecord.put(0, key.get()); assertEquals(keyRecord, mapper.getKeyRecord(keySchema, key)); }