Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:comm.PrintValuesReducer.java

License:Open Source License

public void reduce(Text _key, Iterable<Object> values, Context context)
        throws IOException, InterruptedException {
    // process values
    for (Object val : values) {
        context.write(NullWritable.get(), val);
        System.out.println(val);
    }/*w w  w.  j a  v  a 2 s. co m*/
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void setWritableEmulation() throws IOException {
        MapWritable src = new MapWritable();
        src.put(new IntWritable(1), NullWritable.get());
        src.put(new IntWritable(2), NullWritable.get());

        MapWritable dest = new MapWritable();
        WritableUtils.cloneInto(dest, src);
        assertThat(dest.containsKey(new IntWritable(1)), is(true));
    }/*from   ww  w.j av a2s  .  c  o m*/

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void test() throws IOException {
        NullWritable writable = NullWritable.get();
        assertThat(serialize(writable).length, is(0));
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public NullWritable getCurrentKey() throws IOException, InterruptedException {
        return NullWritable.get();
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public NullWritable createKey() {
        return NullWritable.get();
    }

From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac)
        throws IOException, InterruptedException {

    initialize(tac.getConfiguration());/*  w  w  w . j av  a2 s . c o m*/
    @SuppressWarnings("unchecked")
    SourceSplit<V> split = (SourceSplit<V>) is;
    Reader<V> reader = split.partition.openReader();
    return new RecordReader<NullWritable, V>() {

        V v;

        @Override
        public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException {
            // nop
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            if (reader.hasNext()) {
                v = reader.next();
                return true;
            }
            return false;
        }

        @Override
        public NullWritable getCurrentKey() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public V getCurrentValue() throws IOException, InterruptedException {
            return v;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0.0f;
        }

        @Override
        public void close() throws IOException {
            reader.close();
        }

    };
}

From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java

License:Apache License

@Test
@SuppressWarnings("unchecked")
/**//from   ww w  .j  a  v  a 2s. c o m
 * Test that {@code ListDataSink} can be used in place of hadoop {@code OutputFormat}.
 **/
public void testDataSink() throws Exception {
    DummySink sink = new DummySink();
    Configuration conf = new Configuration();
    DataSinkOutputFormat.configure(conf, sink);

    // mock the instances we will need
    TaskAttemptContext first = mockContext(conf, 0);
    TaskAttemptContext second = mockContext(conf, 1);

    // instantiate the output format
    DataSinkOutputFormat<Long> format = DataSinkOutputFormat.class.newInstance();

    // validate
    format.checkOutputSpecs(first);

    // create record writer for the first partition
    RecordWriter<NullWritable, Long> writer = format.getRecordWriter(first);
    writer.write(NullWritable.get(), 2L);
    writer.close(first);
    format.getOutputCommitter(first).commitTask(first);

    // now the second partition, we need to create new instance of output format
    format = DataSinkOutputFormat.class.newInstance();
    // validate
    format.checkOutputSpecs(second);

    // create record writer for the second partition
    writer = format.getRecordWriter(second);
    writer.write(NullWritable.get(), 4L);
    writer.close(second);
    OutputCommitter committer = format.getOutputCommitter(second);
    committer.commitTask(second);

    // and now validate what was written
    assertFalse(DummySink.isCommitted);

    committer.commitJob(second);
    assertTrue(DummySink.isCommitted);

    assertTrue(DummySink.outputs.isEmpty());
    assertEquals(2, DummySink.committed.size());

    assertEquals(Arrays.asList(2L), DummySink.committed.get(0));
    assertEquals(Arrays.asList(4L), DummySink.committed.get(1));
}

From source file:cz.seznam.euphoria.spark.SparkFlowTranslator.java

License:Apache License

@SuppressWarnings("unchecked")
public List<DataSink<?>> translateInto(Flow flow) {
    // transform flow to acyclic graph of supported operators
    DAG<Operator<?, ?>> dag = flowToDag(flow);

    SparkExecutorContext executorContext = new SparkExecutorContext(sparkEnv, dag);

    // translate each operator to proper Spark transformation
    dag.traverse().map(Node::get).forEach(op -> {
        Translation tx = translations.get(op.getClass());
        if (tx == null) {
            throw new UnsupportedOperationException(
                    "Operator " + op.getClass().getSimpleName() + " not supported");
        }//from   w w w . ja v a 2s  .  c  om
        // ~ verify the flowToDag translation
        Preconditions.checkState(tx.accept == null || Boolean.TRUE.equals(tx.accept.apply(op)));

        JavaRDD<?> out = tx.translator.translate(op, executorContext);

        // save output of current operator to context
        executorContext.setOutput(op, out);
    });

    // process all sinks in the DAG (leaf nodes)
    final List<DataSink<?>> sinks = new ArrayList<>();
    dag.getLeafs().stream().map(Node::get).filter(op -> op.output().getOutputSink() != null).forEach(op -> {

        final DataSink<?> sink = op.output().getOutputSink();
        sinks.add(sink);
        JavaRDD<SparkElement> sparkOutput = Objects.requireNonNull((JavaRDD) executorContext.getOutput(op));

        // unwrap data from WindowedElement
        JavaPairRDD<NullWritable, Object> unwrapped = sparkOutput
                .mapToPair(el -> new Tuple2<>(NullWritable.get(), el.getElement()));

        try {
            Configuration conf = DataSinkOutputFormat.configure(new Configuration(), sink);

            conf.set(JobContext.OUTPUT_FORMAT_CLASS_ATTR, DataSinkOutputFormat.class.getName());

            // FIXME blocking op
            unwrapped.saveAsNewAPIHadoopDataset(conf);
        } catch (IOException e) {
            throw new RuntimeException();
        }
    });

    return sinks;
}

From source file:de.tuberlin.dima.aim3.assignment3.SearchAsMatrixVectorMultiplicationTest.java

License:Open Source License

protected SparseVector readResult(File outputFile, Configuration conf) throws IOException {
    SequenceFile.Reader reader = null;
    try {/*from  ww  w .ja  v a 2s. co m*/
        reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(outputFile.getAbsolutePath()), conf);

        Writable row = NullWritable.get();
        SparseVector vector = new SparseVector();

        Preconditions.checkArgument(reader.getValueClass().equals(SparseVector.class),
                "value type of sequencefile must be a SparseVector");

        boolean hasAtLeastOneRow = reader.next(row, vector);
        Preconditions.checkState(hasAtLeastOneRow, "result must have at least one value");

        return vector;

    } finally {
        Closeables.closeQuietly(reader);
    }

}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DeDuplicationTextOutputReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<DocumentInfo> values, Context context)
        throws IOException, InterruptedException {
    List<DocumentInfo> documents = new ArrayList<DocumentInfo>();

    //collect the values of each band#_bitString
    for (DocumentInfo v : values) {
        // we really need the copy here!
        DocumentInfo documentInfo = new DocumentInfo();
        documentInfo.setDocSimHash(new LongWritable(v.getDocSimHash().get()));
        documentInfo.setDocLength(new IntWritable(v.getDocLength().get()));
        documentInfo.setDocID(new Text(v.getDocID().toString()));
        documentInfo.setDocLanguage(new Text(v.getDocLang().toString()));

        documents.add(documentInfo);//from  w  w  w . jav  a 2 s .c  o m
    }

    //choose candidates for similarity check
    if (documents.size() >= 2) {
        //sort the list to be able to remove redundancies later
        Collections.sort(documents, new DocIDComparator());
        // set the file name prefix
        String fileName = documents.get(0).getDocLang().toString();

        multipleOutputs.write(NullWritable.get(), documents, fileName);
    }
}