List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:comm.PrintValuesReducer.java
License:Open Source License
public void reduce(Text _key, Iterable<Object> values, Context context) throws IOException, InterruptedException { // process values for (Object val : values) { context.write(NullWritable.get(), val); System.out.println(val); }/*w w w. j a v a 2 s. co m*/ }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void setWritableEmulation() throws IOException { MapWritable src = new MapWritable(); src.put(new IntWritable(1), NullWritable.get()); src.put(new IntWritable(2), NullWritable.get()); MapWritable dest = new MapWritable(); WritableUtils.cloneInto(dest, src); assertThat(dest.containsKey(new IntWritable(1)), is(true)); }/*from ww w.j av a2s . c o m*/
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void test() throws IOException { NullWritable writable = NullWritable.get(); assertThat(serialize(writable).length, is(0)); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public NullWritable getCurrentKey() throws IOException, InterruptedException { return NullWritable.get(); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public NullWritable createKey() { return NullWritable.get(); }
From source file:cz.seznam.euphoria.hadoop.input.DataSourceInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, V> createRecordReader(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException { initialize(tac.getConfiguration());/* w w w . j av a2 s . c o m*/ @SuppressWarnings("unchecked") SourceSplit<V> split = (SourceSplit<V>) is; Reader<V> reader = split.partition.openReader(); return new RecordReader<NullWritable, V>() { V v; @Override public void initialize(InputSplit is, TaskAttemptContext tac) throws IOException, InterruptedException { // nop } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (reader.hasNext()) { v = reader.next(); return true; } return false; } @Override public NullWritable getCurrentKey() throws IOException, InterruptedException { return NullWritable.get(); } @Override public V getCurrentValue() throws IOException, InterruptedException { return v; } @Override public float getProgress() throws IOException, InterruptedException { return 0.0f; } @Override public void close() throws IOException { reader.close(); } }; }
From source file:cz.seznam.euphoria.hadoop.output.TestDataSinkOutputFormat.java
License:Apache License
@Test @SuppressWarnings("unchecked") /**//from ww w .j a v a 2s. c o m * Test that {@code ListDataSink} can be used in place of hadoop {@code OutputFormat}. **/ public void testDataSink() throws Exception { DummySink sink = new DummySink(); Configuration conf = new Configuration(); DataSinkOutputFormat.configure(conf, sink); // mock the instances we will need TaskAttemptContext first = mockContext(conf, 0); TaskAttemptContext second = mockContext(conf, 1); // instantiate the output format DataSinkOutputFormat<Long> format = DataSinkOutputFormat.class.newInstance(); // validate format.checkOutputSpecs(first); // create record writer for the first partition RecordWriter<NullWritable, Long> writer = format.getRecordWriter(first); writer.write(NullWritable.get(), 2L); writer.close(first); format.getOutputCommitter(first).commitTask(first); // now the second partition, we need to create new instance of output format format = DataSinkOutputFormat.class.newInstance(); // validate format.checkOutputSpecs(second); // create record writer for the second partition writer = format.getRecordWriter(second); writer.write(NullWritable.get(), 4L); writer.close(second); OutputCommitter committer = format.getOutputCommitter(second); committer.commitTask(second); // and now validate what was written assertFalse(DummySink.isCommitted); committer.commitJob(second); assertTrue(DummySink.isCommitted); assertTrue(DummySink.outputs.isEmpty()); assertEquals(2, DummySink.committed.size()); assertEquals(Arrays.asList(2L), DummySink.committed.get(0)); assertEquals(Arrays.asList(4L), DummySink.committed.get(1)); }
From source file:cz.seznam.euphoria.spark.SparkFlowTranslator.java
License:Apache License
@SuppressWarnings("unchecked") public List<DataSink<?>> translateInto(Flow flow) { // transform flow to acyclic graph of supported operators DAG<Operator<?, ?>> dag = flowToDag(flow); SparkExecutorContext executorContext = new SparkExecutorContext(sparkEnv, dag); // translate each operator to proper Spark transformation dag.traverse().map(Node::get).forEach(op -> { Translation tx = translations.get(op.getClass()); if (tx == null) { throw new UnsupportedOperationException( "Operator " + op.getClass().getSimpleName() + " not supported"); }//from w w w . ja v a 2s . c om // ~ verify the flowToDag translation Preconditions.checkState(tx.accept == null || Boolean.TRUE.equals(tx.accept.apply(op))); JavaRDD<?> out = tx.translator.translate(op, executorContext); // save output of current operator to context executorContext.setOutput(op, out); }); // process all sinks in the DAG (leaf nodes) final List<DataSink<?>> sinks = new ArrayList<>(); dag.getLeafs().stream().map(Node::get).filter(op -> op.output().getOutputSink() != null).forEach(op -> { final DataSink<?> sink = op.output().getOutputSink(); sinks.add(sink); JavaRDD<SparkElement> sparkOutput = Objects.requireNonNull((JavaRDD) executorContext.getOutput(op)); // unwrap data from WindowedElement JavaPairRDD<NullWritable, Object> unwrapped = sparkOutput .mapToPair(el -> new Tuple2<>(NullWritable.get(), el.getElement())); try { Configuration conf = DataSinkOutputFormat.configure(new Configuration(), sink); conf.set(JobContext.OUTPUT_FORMAT_CLASS_ATTR, DataSinkOutputFormat.class.getName()); // FIXME blocking op unwrapped.saveAsNewAPIHadoopDataset(conf); } catch (IOException e) { throw new RuntimeException(); } }); return sinks; }
From source file:de.tuberlin.dima.aim3.assignment3.SearchAsMatrixVectorMultiplicationTest.java
License:Open Source License
protected SparseVector readResult(File outputFile, Configuration conf) throws IOException { SequenceFile.Reader reader = null; try {/*from ww w .ja v a 2s. co m*/ reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(outputFile.getAbsolutePath()), conf); Writable row = NullWritable.get(); SparseVector vector = new SparseVector(); Preconditions.checkArgument(reader.getValueClass().equals(SparseVector.class), "value type of sequencefile must be a SparseVector"); boolean hasAtLeastOneRow = reader.next(row, vector); Preconditions.checkState(hasAtLeastOneRow, "result must have at least one value"); return vector; } finally { Closeables.closeQuietly(reader); } }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.deduplication.DeDuplicationTextOutputReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<DocumentInfo> values, Context context) throws IOException, InterruptedException { List<DocumentInfo> documents = new ArrayList<DocumentInfo>(); //collect the values of each band#_bitString for (DocumentInfo v : values) { // we really need the copy here! DocumentInfo documentInfo = new DocumentInfo(); documentInfo.setDocSimHash(new LongWritable(v.getDocSimHash().get())); documentInfo.setDocLength(new IntWritable(v.getDocLength().get())); documentInfo.setDocID(new Text(v.getDocID().toString())); documentInfo.setDocLanguage(new Text(v.getDocLang().toString())); documents.add(documentInfo);//from w w w . jav a 2 s .c o m } //choose candidates for similarity check if (documents.size() >= 2) { //sort the list to be able to remove redundancies later Collections.sort(documents, new DocIDComparator()); // set the file name prefix String fileName = documents.get(0).getDocLang().toString(); multipleOutputs.write(NullWritable.get(), documents, fileName); } }