Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:com.benchmark.mapred.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample//w  w w .  ja v  a2  s  .c  o  m
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
 */
public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
    TeraInputFormat inFormat = new TeraInputFormat();
    TextSampler sampler = new TextSampler();
    Text key = new Text();
    Text value = new Text();
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    long records = 0;
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        while (reader.next(key, value)) {
            sampler.addKey(key);
            records += 1;
            if ((i + 1) * recordsPerSample <= records) {
                break;
            }
        }
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    for (Text split : sampler.createPartitions(partitions)) {
        writer.append(split, nullValue);
    }
    writer.close();
}

From source file:com.bixolabs.cascading.avro.AvroScheme.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/* w  w w . j a v a  2  s . co  m*/
public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException {
    // Create the appropriate AvroWrapper<T> from the result, and pass that
    // as the key for the collect
    Fields sinkFields = getSinkFields();
    Tuple result = sinkFields != null ? tupleEntry.selectTuple(sinkFields) : tupleEntry.getTuple();
    Schema schema = getSchema();
    // Create a Generic data using the sink field names
    GenericData.Record datum = new GenericData.Record(schema);

    for (int i = 0; i < sinkFields.size(); i++) {
        String fieldName = sinkFields.get(i).toString();
        Object inObj = result.get(i);
        Schema objSchema = schema.getField(fieldName).schema();
        datum.put(fieldName, convertToAvro(inObj, objSchema));
    }

    AvroWrapper<GenericData.Record> wrapper = new AvroWrapper<GenericData.Record>(datum);
    outputCollector.collect(NullWritable.get(), wrapper);
}

From source file:com.blackberry.logdriver.pig.BoomHourlyStoreFunc.java

License:Apache License

@Override
public void putNext(Tuple tuple) throws IOException {
    try {/*w w  w. jav a2 s  . c  o m*/
        writer.write(tuple, NullWritable.get());
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:com.ci.backports.avro.mapreduce.AvroRecordReader.java

License:Apache License

@Override
public NullWritable getCurrentValue() throws IOException, InterruptedException {
    return NullWritable.get();
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Write out a {@link SequenceFile} that can be read by
 * {@link TotalOrderPartitioner} that contains the split points in
 * startKeys./*from w w w  .  j  a v a2  s.c o  m*/
 */
private static void writePartitions(Configuration conf, Path partitionsPath,
        List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);

    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
            ImmutableBytesWritable.class, NullWritable.class);

    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

From source file:com.cloudera.castagna.logparser.mr.TranscodeLogsMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    log.debug("< ({}, {})", key, value);

    try {//from  ww  w . j a v  a 2  s . c  o  m
        Map<String, String> logLine = parser.parseLine(value.toString());

        StringBuilder outValue = new StringBuilder();
        outValue.append(logLine.get(LogParser.REMOTE_HOSTNAME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.USERNAME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.HTTP_METHOD));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.URL));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_YEAR));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_MONTH));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_DAY));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_HOUR));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_MINUTE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIME_SECOND));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.TIMESTAMP));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.STATUS_CODE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.SIZE));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.ELAPSED_TIME));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.USER_AGENT));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get(LogParser.REFERER));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get("JSESSIONID"));
        outValue.append(Constants.TAB);
        outValue.append(logLine.get("SITESERVER"));
        outValue.append(Constants.TAB);

        outTextValue.clear();
        outTextValue.set(outValue.toString());

        context.write(NullWritable.get(), outTextValue);
        log.debug("> ({}, {})", NullWritable.get(), outTextValue);
    } catch (ParseException e) {
        log.debug("Error parsing: {} {}", key, value);
    }
}

From source file:com.cloudera.crunch.io.seq.SeqFileReaderFactory.java

License:Open Source License

public SeqFileReaderFactory(PType<T> ptype, Configuration conf) {
    this.mapFn = SeqFileHelper.getInputMapFn(ptype);
    this.key = NullWritable.get();
    this.value = SeqFileHelper.newInstance(ptype, conf);
    this.conf = conf;
}

From source file:com.cloudera.crunch.type.avro.AvroKeyConverter.java

License:Open Source License

@Override
public Object outputValue(K value) {
    return NullWritable.get();
}

From source file:com.cloudera.crunch.type.avro.AvroRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!reader.hasNext() || reader.pastSync(end)) {
        key = null;/* w  w w .ja v  a2  s  . c  o  m*/
        value = null;
        return false;
    }
    if (key == null) {
        key = new AvroWrapper<T>();
    }
    if (value == null) {
        value = NullWritable.get();
    }
    key.datum(reader.next(key.datum()));
    return true;
}

From source file:com.cloudera.crunch.type.writable.WritableValueConverter.java

License:Open Source License

@Override
public Object outputKey(Object input) {
    return NullWritable.get();
}