List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:com.benchmark.mapred.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param conf the job to sample//w w w . ja v a2 s .c o m * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(JobConf conf, Path partFile) throws IOException { TeraInputFormat inFormat = new TeraInputFormat(); TextSampler sampler = new TextSampler(); Text key = new Text(); Text value = new Text(); int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; long records = 0; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null); while (reader.next(key, value)) { sampler.addKey(key); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); for (Text split : sampler.createPartitions(partitions)) { writer.append(split, nullValue); } writer.close(); }
From source file:com.bixolabs.cascading.avro.AvroScheme.java
License:Apache License
@SuppressWarnings("unchecked") @Override/* w w w . j a v a 2 s . co m*/ public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException { // Create the appropriate AvroWrapper<T> from the result, and pass that // as the key for the collect Fields sinkFields = getSinkFields(); Tuple result = sinkFields != null ? tupleEntry.selectTuple(sinkFields) : tupleEntry.getTuple(); Schema schema = getSchema(); // Create a Generic data using the sink field names GenericData.Record datum = new GenericData.Record(schema); for (int i = 0; i < sinkFields.size(); i++) { String fieldName = sinkFields.get(i).toString(); Object inObj = result.get(i); Schema objSchema = schema.getField(fieldName).schema(); datum.put(fieldName, convertToAvro(inObj, objSchema)); } AvroWrapper<GenericData.Record> wrapper = new AvroWrapper<GenericData.Record>(datum); outputCollector.collect(NullWritable.get(), wrapper); }
From source file:com.blackberry.logdriver.pig.BoomHourlyStoreFunc.java
License:Apache License
@Override public void putNext(Tuple tuple) throws IOException { try {/*w w w. jav a2 s . c o m*/ writer.write(tuple, NullWritable.get()); } catch (InterruptedException e) { throw new IOException(e); } }
From source file:com.ci.backports.avro.mapreduce.AvroRecordReader.java
License:Apache License
@Override public NullWritable getCurrentValue() throws IOException, InterruptedException { return NullWritable.get(); }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
/** * Write out a {@link SequenceFile} that can be read by * {@link TotalOrderPartitioner} that contains the split points in * startKeys./*from w w w . j a v a2 s.c o m*/ */ private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException { LOG.info("Writing partition information to " + partitionsPath); if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys); ImmutableBytesWritable first = sorted.first(); if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get())); } sorted.remove(first); // Write the actual file FileSystem fs = partitionsPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class); try { for (ImmutableBytesWritable startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
From source file:com.cloudera.castagna.logparser.mr.TranscodeLogsMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { log.debug("< ({}, {})", key, value); try {//from ww w . j a v a 2 s . c o m Map<String, String> logLine = parser.parseLine(value.toString()); StringBuilder outValue = new StringBuilder(); outValue.append(logLine.get(LogParser.REMOTE_HOSTNAME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.USERNAME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.HTTP_METHOD)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.URL)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_YEAR)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_MONTH)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_DAY)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_HOUR)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_MINUTE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIME_SECOND)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.TIMESTAMP)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.STATUS_CODE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.SIZE)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.ELAPSED_TIME)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.USER_AGENT)); outValue.append(Constants.TAB); outValue.append(logLine.get(LogParser.REFERER)); outValue.append(Constants.TAB); outValue.append(logLine.get("JSESSIONID")); outValue.append(Constants.TAB); outValue.append(logLine.get("SITESERVER")); outValue.append(Constants.TAB); outTextValue.clear(); outTextValue.set(outValue.toString()); context.write(NullWritable.get(), outTextValue); log.debug("> ({}, {})", NullWritable.get(), outTextValue); } catch (ParseException e) { log.debug("Error parsing: {} {}", key, value); } }
From source file:com.cloudera.crunch.io.seq.SeqFileReaderFactory.java
License:Open Source License
public SeqFileReaderFactory(PType<T> ptype, Configuration conf) { this.mapFn = SeqFileHelper.getInputMapFn(ptype); this.key = NullWritable.get(); this.value = SeqFileHelper.newInstance(ptype, conf); this.conf = conf; }
From source file:com.cloudera.crunch.type.avro.AvroKeyConverter.java
License:Open Source License
@Override public Object outputValue(K value) { return NullWritable.get(); }
From source file:com.cloudera.crunch.type.avro.AvroRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (!reader.hasNext() || reader.pastSync(end)) { key = null;/* w w w .ja v a2 s . c o m*/ value = null; return false; } if (key == null) { key = new AvroWrapper<T>(); } if (value == null) { value = NullWritable.get(); } key.datum(reader.next(key.datum())); return true; }
From source file:com.cloudera.crunch.type.writable.WritableValueConverter.java
License:Open Source License
@Override public Object outputKey(Object input) { return NullWritable.get(); }