List of usage examples for org.apache.hadoop.io LongWritable set
public void set(long value)
From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java
License:Apache License
@Override public boolean next(LongWritable key, Text value) throws IOException { while (pos < end) { String json = parser.nextObjectContainingMember(jsonMemberName); pos = start + parser.getBytesRead(); if (json == null) { return false; }//ww w. ja v a2s .c o m long jsonStart = pos - json.length(); // if the "begin-object" position is after the end of our split, we should ignore it if (jsonStart >= end) { return false; } if (json.length() > maxObjectLength) { LOG.warn("Skipped JSON object of size " + json.length() + " at pos " + jsonStart); } else { key.set(jsonStart); value.set(json); return true; } } return false; }
From source file:org.apache.hive.storage.jdbc.JdbcRecordReader.java
License:Apache License
@Override public boolean next(LongWritable key, MapWritable value) throws IOException { try {//from w w w. java 2 s . c o m LOGGER.debug("JdbcRecordReader.next called"); if (dbAccessor == null) { dbAccessor = DatabaseAccessorFactory.getAccessor(conf); iterator = dbAccessor.getRecordIterator(conf, split.getLimit(), split.getOffset()); } if (iterator.hasNext()) { LOGGER.debug("JdbcRecordReader has more records to read."); key.set(pos); pos++; Map<String, Object> record = iterator.next(); if ((record != null) && (!record.isEmpty())) { for (Entry<String, Object> entry : record.entrySet()) { value.put(new Text(entry.getKey()), entry.getValue() == null ? NullWritable.get() : new ObjectWritable(entry.getValue())); } return true; } else { LOGGER.debug("JdbcRecordReader got null record."); return false; } } else { LOGGER.debug("JdbcRecordReader has no more records to read."); return false; } } catch (Exception e) { LOGGER.error("An error occurred while reading the next record from DB.", e); return false; } }
From source file:org.apache.kylin.engine.mr.steps.RowKeyDistributionCheckerMapper.java
License:Apache License
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { LongWritable outputValue = new LongWritable(); for (Entry<Text, Long> kv : resultMap.entrySet()) { outputValue.set(kv.getValue()); context.write(kv.getKey(), outputValue); }// w w w. j ava 2 s.c o m }
From source file:org.apache.mahout.classifier.df.mapreduce.partial.Step1MapperTest.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test/* w w w . j a v a2 s .c o m*/ public void testMapper() throws Exception { Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES); double[][] source = Utils.randomDoubles(rng, descriptor, false, NUM_INSTANCES); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, false, sData); String[][] splits = Utils.splitData(sData, NUM_MAPPERS); MockTreeBuilder treeBuilder = new MockTreeBuilder(); LongWritable key = new LongWritable(); Text value = new Text(); int treeIndex = 0; for (int partition = 0; partition < NUM_MAPPERS; partition++) { String[] split = splits[partition]; treeBuilder.setExpected(DataLoader.loadData(dataset, split)); // expected number of trees that this mapper will build int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition); Mapper.Context context = EasyMock.createNiceMock(Mapper.Context.class); Capture<TreeID> capturedKeys = new TreeIDCapture(); context.write(EasyMock.capture(capturedKeys), EasyMock.anyObject()); EasyMock.expectLastCall().anyTimes(); EasyMock.replay(context); MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, null, partition, NUM_MAPPERS, NUM_TREES); // make sure the mapper computed firstTreeId correctly assertEquals(treeIndex, mapper.getFirstTreeId()); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, context); } mapper.cleanup(context); EasyMock.verify(context); // make sure the mapper built all its trees assertEquals(mapNbTrees, capturedKeys.getValues().size()); // check the returned keys for (TreeID k : capturedKeys.getValues()) { assertEquals(partition, k.partition()); assertEquals(treeIndex, k.treeId()); treeIndex++; } } }
From source file:org.apache.mahout.df.mapred.partial.Step1MapperTest.java
License:Apache License
public void testMapper() throws Exception { Long seed = null;/*from w w w. ja v a 2 s . co m*/ Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, nbAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, sData); String[][] splits = Utils.splitData(sData, nbMappers); MockTreeBuilder treeBuilder = new MockTreeBuilder(); LongWritable key = new LongWritable(); Text value = new Text(); int treeIndex = 0; for (int partition = 0; partition < nbMappers; partition++) { String[] split = splits[partition]; treeBuilder.setExpected(DataLoader.loadData(dataset, split)); // expected number of trees that this mapper will build int mapNbTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition); PartialOutputCollector output = new PartialOutputCollector(mapNbTrees); MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed, partition, nbMappers, nbTrees); // make sure the mapper computed firstTreeId correctly assertEquals(treeIndex, mapper.getFirstTreeId()); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, output, Reporter.NULL); } mapper.close(); // make sure the mapper built all its trees assertEquals(mapNbTrees, output.nbOutputs()); // check the returned keys for (TreeID k : output.getKeys()) { assertEquals(partition, k.partition()); assertEquals(treeIndex, k.treeId()); treeIndex++; } } }
From source file:org.apache.mahout.df.mapred.partial.Step2MapperTest.java
License:Apache License
public void testMapper() throws Exception { Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, nbAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, nbInstances); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, sData); String[][] splits = Utils.splitData(sData, nbMappers); // prepare first step output TreeID[] keys = new TreeID[nbTrees]; Node[] trees = new Node[nbTrees]; int[] sizes = new int[nbMappers]; int treeIndex = 0; for (int partition = 0; partition < nbMappers; partition++) { int nbMapTrees = Step1Mapper.nbTrees(nbMappers, nbTrees, partition); for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) { keys[treeIndex] = new TreeID(partition, treeIndex); // put the partition in the leaf's label // this way we can track the outputs trees[treeIndex] = new Leaf(partition); }// w w w . j a v a 2 s . c o m sizes[partition] = splits[partition].length; } // store the first step outputs in a file FileSystem fs = FileSystem.getLocal(new Configuration()); Path forestPath = new Path("testdata/Step2MapperTest.forest"); InterResults.store(fs, forestPath, keys, trees, sizes); LongWritable key = new LongWritable(); Text value = new Text(); for (int partition = 0; partition < nbMappers; partition++) { String[] split = splits[partition]; // number of trees that will be handled by the mapper int nbConcerned = Step2Mapper.nbConcerned(nbMappers, nbTrees, partition); PartialOutputCollector output = new PartialOutputCollector(nbConcerned); // load the current mapper's (key, tree) pairs TreeID[] curKeys = new TreeID[nbConcerned]; Node[] curTrees = new Node[nbConcerned]; InterResults.load(fs, forestPath, nbMappers, nbTrees, partition, curKeys, curTrees); // simulate the job MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, output, Reporter.NULL); } mapper.close(); // make sure the mapper did not return its own trees assertEquals(nbConcerned, output.nbOutputs()); // check the returned results int current = 0; for (int index = 0; index < nbTrees; index++) { if (keys[index].partition() == partition) { // should not be part of the results continue; } TreeID k = output.getKeys()[current]; // the tree should receive the partition's index assertEquals(partition, k.partition()); // make sure all the trees of the other partitions are handled in the // correct order assertEquals(index, k.treeId()); int[] predictions = output.getValues()[current].getPredictions(); // all the instances of the partition should be classified assertEquals(split.length, predictions.length); assertEquals("at least one instance of the partition was not classified", -1, ArrayUtils.indexOf(predictions, -1)); // the tree must not belong to the mapper's partition int treePartition = predictions[0]; assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition); current++; } } }
From source file:org.apache.mahout.df.mapreduce.partial.Step1MapperTest.java
License:Apache License
public void testMapper() throws Exception { Long seed = null;//from ww w.ja v a 2 s . c o m Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES); double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, sData); String[][] splits = Utils.splitData(sData, NUM_MAPPERS); MockTreeBuilder treeBuilder = new MockTreeBuilder(); LongWritable key = new LongWritable(); Text value = new Text(); int treeIndex = 0; for (int partition = 0; partition < NUM_MAPPERS; partition++) { String[] split = splits[partition]; treeBuilder.setExpected(DataLoader.loadData(dataset, split)); // expected number of trees that this mapper will build int mapNbTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition); MockContext context = new MockContext(new Step1Mapper(), new Configuration(), new TaskAttemptID(), mapNbTrees); MockStep1Mapper mapper = new MockStep1Mapper(treeBuilder, dataset, seed, partition, NUM_MAPPERS, NUM_TREES); // make sure the mapper computed firstTreeId correctly assertEquals(treeIndex, mapper.getFirstTreeId()); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, context); } mapper.cleanup(context); // make sure the mapper built all its trees assertEquals(mapNbTrees, context.nbOutputs()); // check the returned keys for (TreeID k : context.getKeys()) { assertEquals(partition, k.partition()); assertEquals(treeIndex, k.treeId()); treeIndex++; } } }
From source file:org.apache.mahout.df.mapreduce.partial.Step2MapperTest.java
License:Apache License
public void testMapper() throws Exception { Random rng = RandomUtils.getRandom(); // prepare the data String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES); double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES); String[] sData = Utils.double2String(source); Dataset dataset = DataLoader.generateDataset(descriptor, sData); String[][] splits = Utils.splitData(sData, NUM_MAPPERS); // prepare first step output TreeID[] keys = new TreeID[NUM_TREES]; Node[] trees = new Node[NUM_TREES]; int[] sizes = new int[NUM_MAPPERS]; int treeIndex = 0; for (int partition = 0; partition < NUM_MAPPERS; partition++) { int nbMapTrees = Step1Mapper.nbTrees(NUM_MAPPERS, NUM_TREES, partition); for (int tree = 0; tree < nbMapTrees; tree++, treeIndex++) { keys[treeIndex] = new TreeID(partition, treeIndex); // put the partition in the leaf's label // this way we can track the outputs trees[treeIndex] = new Leaf(partition); }// w w w . j a v a 2 s. c om sizes[partition] = splits[partition].length; } // store the first step outputs in a file FileSystem fs = FileSystem.getLocal(new Configuration()); Path forestPath = new Path("testdata/Step2MapperTest.forest"); InterResults.store(fs, forestPath, keys, trees, sizes); LongWritable key = new LongWritable(); Text value = new Text(); for (int partition = 0; partition < NUM_MAPPERS; partition++) { String[] split = splits[partition]; // number of trees that will be handled by the mapper int nbConcerned = Step2Mapper.nbConcerned(NUM_MAPPERS, NUM_TREES, partition); MockContext context = new MockContext(new Step2Mapper(), new Configuration(), new TaskAttemptID(), nbConcerned); // load the current mapper's (key, tree) pairs TreeID[] curKeys = new TreeID[nbConcerned]; Node[] curTrees = new Node[nbConcerned]; InterResults.load(fs, forestPath, NUM_MAPPERS, NUM_TREES, partition, curKeys, curTrees); // simulate the job MockStep2Mapper mapper = new MockStep2Mapper(partition, dataset, curKeys, curTrees, split.length); for (int index = 0; index < split.length; index++) { key.set(index); value.set(split[index]); mapper.map(key, value, context); } mapper.cleanup(context); // make sure the mapper did not return its own trees assertEquals(nbConcerned, context.nbOutputs()); // check the returned results int current = 0; for (int index = 0; index < NUM_TREES; index++) { if (keys[index].partition() == partition) { // should not be part of the results continue; } TreeID k = context.getKeys()[current]; // the tree should receive the partition's index assertEquals(partition, k.partition()); // make sure all the trees of the other partitions are handled in the // correct order assertEquals(index, k.treeId()); int[] predictions = context.getValues()[current].getPredictions(); // all the instances of the partition should be classified assertEquals(split.length, predictions.length); assertEquals("at least one instance of the partition was not classified", -1, ArrayUtils.indexOf(predictions, -1)); // the tree must not belong to the mapper's partition int treePartition = predictions[0]; assertFalse("Step2Mapper returned a tree from its own partition", partition == treePartition); current++; } } }
From source file:org.apache.metron.spout.pcap.deserializer.FromKeyDeserializer.java
License:Apache License
@Override public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) { Long ts = converter.toNanoseconds(fromBytes(key)); outKey.set(ts); byte[] packetHeaderized = PcapHelper.addPacketHeader(ts, value, endianness); byte[] globalHeaderized = PcapHelper.addGlobalHeader(packetHeaderized, endianness); outValue.set(globalHeaderized, 0, globalHeaderized.length); return true;/*from w w w. ja v a 2 s . com*/ }
From source file:org.apache.metron.spout.pcap.deserializer.FromPacketDeserializer.java
License:Apache License
@Override public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) { Long ts = PcapHelper.getTimestamp(value); if (ts != null) { outKey.set(ts); outValue.set(value, 0, value.length); return true; } else {//w ww.j a v a 2 s . c o m return false; } }