Example usage for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get()

Source Link

Document

Returns the single instance of this class.

Usage

From source file:ipldataanalysis4.BloomMapper.java

@Override
public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    if (value != null) {
        String arrAttributes[] = value.toString().split(",");
        if (arrAttributes != null) {
            if (!arrAttributes[0].equals("match_id")) {
                //int total_runs = Integer.parseInt(String.valueOf(arrAttributes[17]));
                //int extra_runs = Integer.parseInt(String.valueOf(arrAttributes[16]));
                //int sc = total_runs - extra_runs;
                String playerName = arrAttributes[6];
                int match_id = Integer.parseInt(String.valueOf(arrAttributes[0]));
                Scores s = new Scores(playerName);
                if (friends.mightContain(s)) {
                    //IntWritable sco = new IntWritable(sc);
                    String st = playerName + " " + value.toString();
                    Text v = new Text(st);
                    context.write(v, NullWritable.get());
                }//www.  j  av  a 2 s  .co m
            }
        }
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub.java

License:Apache License

public void binaryProtocolStub() {
    try {//w  w w . j  a  va  2  s .c  o  m

        initSoket();
        System.out.println("start OK");

        // RUN_MAP.code
        // should be 3

        int answer = WritableUtils.readVInt(dataInput);
        System.out.println("RunMap:" + answer);
        FileSplit split = new FileSplit();
        readObject(split, dataInput);

        WritableUtils.readVInt(dataInput);
        WritableUtils.readVInt(dataInput);
        // end runMap
        // get InputTypes
        WritableUtils.readVInt(dataInput);
        String inText = Text.readString(dataInput);
        System.out.println("Key class:" + inText);
        inText = Text.readString(dataInput);
        System.out.println("Value class:" + inText);

        @SuppressWarnings("unused")
        int inCode = 0;

        // read all data from sender and write to output
        while ((inCode = WritableUtils.readVInt(dataInput)) == 4) {
            FloatWritable key = new FloatWritable();
            NullWritable value = NullWritable.get();
            readObject(key, dataInput);
            System.out.println("value:" + key.get());
            readObject(value, dataInput);
        }

        WritableUtils.writeVInt(dataOut, 54);

        dataOut.flush();
        dataOut.close();

    } catch (Exception x) {
        x.printStackTrace();
    } finally {
        closeSoket();
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.PipesMapper.java

License:Apache License

@Override
public void run(Context context) throws IOException, InterruptedException {
    setup(context);/*from   w ww .j a  v  a  2 s . c  o  m*/
    Configuration conf = context.getConfiguration();
    InputSplit split = context.getInputSplit();
    // FIXME: do we really need to be so convoluted?
    InputFormat<K1, V1> inputFormat;
    try {
        inputFormat = (InputFormat<K1, V1>) ReflectionUtils.newInstance(context.getInputFormatClass(), conf);
    } catch (ClassNotFoundException ce) {
        throw new RuntimeException("class not found", ce);
    }
    RecordReader<K1, V1> input = inputFormat.createRecordReader(split, context);
    input.initialize(split, context);
    boolean isJavaInput = Submitter.getIsJavaRecordReader(conf);
    try {
        // FIXME: what happens for a java mapper and no java record reader?
        DummyRecordReader fakeInput = (!isJavaInput && !Submitter.getIsJavaMapper(conf))
                ? (DummyRecordReader) input
                : null;
        application = new Application<K1, V1, K2, V2>(context, fakeInput);
    } catch (InterruptedException ie) {
        throw new RuntimeException("interrupted", ie);
    }
    DownwardProtocol<K1, V1> downlink = application.getDownlink();
    // FIXME: InputSplit is not Writable, but still, this is ugly...
    downlink.runMap((FileSplit) context.getInputSplit(), context.getNumReduceTasks(), isJavaInput);
    boolean skipping = conf.getBoolean(context.SKIP_RECORDS, false);
    boolean sent_input_types = false;
    try {
        if (isJavaInput) {
            // FIXME
            while (input.nextKeyValue()) {
                if (!sent_input_types) {
                    sent_input_types = true;
                    NullWritable n = NullWritable.get();
                    String kclass_name = n.getClass().getName();
                    String vclass_name = n.getClass().getName();
                    if (input.getCurrentKey() != null) {
                        kclass_name = input.getCurrentKey().getClass().getName();
                    }
                    if (input.getCurrentValue() != null) {
                        vclass_name = input.getCurrentValue().getClass().getName();
                    }
                    downlink.setInputTypes(kclass_name, vclass_name);
                }
                downlink.mapItem(input.getCurrentKey(), input.getCurrentValue());
                if (skipping) {
                    //flush the streams on every record input if running in skip mode
                    //so that we don't buffer other records surrounding a bad record.
                    downlink.flush();
                }
            }
            downlink.endOfInput();
        }
        application.waitForFinish();
    } catch (Throwable t) {
        application.abort(t);
    } finally {
        cleanup(context);
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipesNonJavaInputFormat.java

License:Apache License

/**
 *  test PipesNonJavaInputFormat/*from w w w  .  j ava 2s. c o m*/
  */

@Test
public void testFormat() throws IOException, InterruptedException {
    JobID jobId = new JobID("201408272347", 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

    Job job = new Job(new Configuration());
    job.setJobID(jobId);
    Configuration conf = job.getConfiguration();

    TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

    PipesNonJavaInputFormat input_format = new PipesNonJavaInputFormat();

    DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(new FileSplit(), tcontext);
    assertEquals(0.0f, reader.getProgress(), 0.001);

    // input and output files
    File input1 = new File(workSpace + File.separator + "input1");
    if (!input1.getParentFile().exists()) {
        Assert.assertTrue(input1.getParentFile().mkdirs());
    }

    if (!input1.exists()) {
        Assert.assertTrue(input1.createNewFile());
    }

    File input2 = new File(workSpace + File.separator + "input2");
    if (!input2.exists()) {
        Assert.assertTrue(input2.createNewFile());
    }

    // THIS fill fail without hdfs support.
    // // set data for splits
    // conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
    //          StringUtils.escapeString(input1.getAbsolutePath()) + ","
    //          + StringUtils.escapeString(input2.getAbsolutePath()));
    // List<InputSplit> splits = input_format.getSplits(job);
    // assertTrue(splits.size() >= 2);

    PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(
            new FileSplit(), tcontext);
    // empty dummyRecordReader
    assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
    // test method next
    assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
    assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
    dummyRecordReader.close();
}

From source file:it.crs4.seal.tsv_sort.TextSampler.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 20 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param inFormat The input to sample/*from  w ww .  j  ava2  s. c o m*/
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
 */
public static void writePartitionFile(FileInputFormat<Text, Text> inFormat, JobContext job, Path partFile)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    TaskAttemptContext taskContext = Utils.getTaskAttemptContext(conf);

    TextSampler sampler = new TextSampler();
    Text key = new Text();
    Text value = new Text();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE_CONF, SAMPLE_SIZE_DEFAULT);
    List<InputSplit> splits = inFormat.getSplits(job);
    int samples = Math.min(MAX_SLICES_SAMPLED, splits.size());
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.size() / samples;
    long records = 0;
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        InputSplit isplit = splits.get(sampleStep * i);
        RecordReader<Text, Text> reader = inFormat.createRecordReader(isplit, taskContext);
        reader.initialize(isplit, taskContext);
        while (reader.nextKeyValue()) {
            sampler.addKey(reader.getCurrentKey());
            records += 1;
            if ((i + 1) * recordsPerSample <= records) {
                break;
            }
        }
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile))
        outFs.delete(partFile, false);

    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    for (Text split : sampler.createPartitions(partitions)) {
        writer.append(split, nullValue);
    }
    writer.close();
}

From source file:it.polito.dbdmg.searum.discretization.DiscretizationMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    // put here pre-processing and discretization code
    context.write(value, NullWritable.get());
}

From source file:it.polito.dbdmg.searum.itemsets.sorting.ClosedSortingReducer.java

License:Apache License

@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    for (Text value : values) {
        context.setStatus("Closed Sorting Reducer :" + key);
        context.write(value, NullWritable.get());
    }//w  w  w.j ava  2  s  . c o m
}

From source file:it.polito.dbdmg.searum.itemsets.sorting.ItemsetSortingReducer.java

License:Apache License

@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    for (Text value : values) {
        context.setStatus("Itemset Sorting Reducer :" + key);
        context.write(value, NullWritable.get());
    }/*ww  w.  j  a v  a  2  s .c om*/
}

From source file:kafka.bridge.pig.AvroKafkaStorage.java

License:Apache License

@Override
public void putNext(Tuple tuple) throws IOException {
    os.reset();/*from w ww. jav  a2  s.  c  om*/
    writeEnvelope(os, this.encoder);
    datumWriter.write(tuple, this.encoder);
    this.encoder.flush();

    try {
        this.writer.write(NullWritable.get(), new BytesWritable(this.os.toByteArray()));
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}

From source file:kafka.etl.impl.SimpleKafkaETLReducer.java

License:Apache License

@Override
protected NullWritable generateOutputKey(KafkaETLKey key, Message message) throws IOException {
    return NullWritable.get();
}