List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:ipldataanalysis4.BloomMapper.java
@Override public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException { if (value != null) { String arrAttributes[] = value.toString().split(","); if (arrAttributes != null) { if (!arrAttributes[0].equals("match_id")) { //int total_runs = Integer.parseInt(String.valueOf(arrAttributes[17])); //int extra_runs = Integer.parseInt(String.valueOf(arrAttributes[16])); //int sc = total_runs - extra_runs; String playerName = arrAttributes[6]; int match_id = Integer.parseInt(String.valueOf(arrAttributes[0])); Scores s = new Scores(playerName); if (friends.mightContain(s)) { //IntWritable sco = new IntWritable(sc); String st = playerName + " " + value.toString(); Text v = new Text(st); context.write(v, NullWritable.get()); }//www. j av a 2 s .co m } } } }
From source file:it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub.java
License:Apache License
public void binaryProtocolStub() { try {//w w w . j a va 2 s .c o m initSoket(); System.out.println("start OK"); // RUN_MAP.code // should be 3 int answer = WritableUtils.readVInt(dataInput); System.out.println("RunMap:" + answer); FileSplit split = new FileSplit(); readObject(split, dataInput); WritableUtils.readVInt(dataInput); WritableUtils.readVInt(dataInput); // end runMap // get InputTypes WritableUtils.readVInt(dataInput); String inText = Text.readString(dataInput); System.out.println("Key class:" + inText); inText = Text.readString(dataInput); System.out.println("Value class:" + inText); @SuppressWarnings("unused") int inCode = 0; // read all data from sender and write to output while ((inCode = WritableUtils.readVInt(dataInput)) == 4) { FloatWritable key = new FloatWritable(); NullWritable value = NullWritable.get(); readObject(key, dataInput); System.out.println("value:" + key.get()); readObject(value, dataInput); } WritableUtils.writeVInt(dataOut, 54); dataOut.flush(); dataOut.close(); } catch (Exception x) { x.printStackTrace(); } finally { closeSoket(); } }
From source file:it.crs4.pydoop.mapreduce.pipes.PipesMapper.java
License:Apache License
@Override public void run(Context context) throws IOException, InterruptedException { setup(context);/*from w ww .j a v a 2 s . c o m*/ Configuration conf = context.getConfiguration(); InputSplit split = context.getInputSplit(); // FIXME: do we really need to be so convoluted? InputFormat<K1, V1> inputFormat; try { inputFormat = (InputFormat<K1, V1>) ReflectionUtils.newInstance(context.getInputFormatClass(), conf); } catch (ClassNotFoundException ce) { throw new RuntimeException("class not found", ce); } RecordReader<K1, V1> input = inputFormat.createRecordReader(split, context); input.initialize(split, context); boolean isJavaInput = Submitter.getIsJavaRecordReader(conf); try { // FIXME: what happens for a java mapper and no java record reader? DummyRecordReader fakeInput = (!isJavaInput && !Submitter.getIsJavaMapper(conf)) ? (DummyRecordReader) input : null; application = new Application<K1, V1, K2, V2>(context, fakeInput); } catch (InterruptedException ie) { throw new RuntimeException("interrupted", ie); } DownwardProtocol<K1, V1> downlink = application.getDownlink(); // FIXME: InputSplit is not Writable, but still, this is ugly... downlink.runMap((FileSplit) context.getInputSplit(), context.getNumReduceTasks(), isJavaInput); boolean skipping = conf.getBoolean(context.SKIP_RECORDS, false); boolean sent_input_types = false; try { if (isJavaInput) { // FIXME while (input.nextKeyValue()) { if (!sent_input_types) { sent_input_types = true; NullWritable n = NullWritable.get(); String kclass_name = n.getClass().getName(); String vclass_name = n.getClass().getName(); if (input.getCurrentKey() != null) { kclass_name = input.getCurrentKey().getClass().getName(); } if (input.getCurrentValue() != null) { vclass_name = input.getCurrentValue().getClass().getName(); } downlink.setInputTypes(kclass_name, vclass_name); } downlink.mapItem(input.getCurrentKey(), input.getCurrentValue()); if (skipping) { //flush the streams on every record input if running in skip mode //so that we don't buffer other records surrounding a bad record. downlink.flush(); } } downlink.endOfInput(); } application.waitForFinish(); } catch (Throwable t) { application.abort(t); } finally { cleanup(context); } }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipesNonJavaInputFormat.java
License:Apache License
/** * test PipesNonJavaInputFormat/*from w w w . j ava 2s. c o m*/ */ @Test public void testFormat() throws IOException, InterruptedException { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); PipesNonJavaInputFormat input_format = new PipesNonJavaInputFormat(); DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(new FileSplit(), tcontext); assertEquals(0.0f, reader.getProgress(), 0.001); // input and output files File input1 = new File(workSpace + File.separator + "input1"); if (!input1.getParentFile().exists()) { Assert.assertTrue(input1.getParentFile().mkdirs()); } if (!input1.exists()) { Assert.assertTrue(input1.createNewFile()); } File input2 = new File(workSpace + File.separator + "input2"); if (!input2.exists()) { Assert.assertTrue(input2.createNewFile()); } // THIS fill fail without hdfs support. // // set data for splits // conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, // StringUtils.escapeString(input1.getAbsolutePath()) + "," // + StringUtils.escapeString(input2.getAbsolutePath())); // List<InputSplit> splits = input_format.getSplits(job); // assertTrue(splits.size() >= 2); PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader( new FileSplit(), tcontext); // empty dummyRecordReader assertEquals(0.0, dummyRecordReader.getProgress(), 0.001); // test method next assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get())); assertEquals(2.0, dummyRecordReader.getProgress(), 0.001); dummyRecordReader.close(); }
From source file:it.crs4.seal.tsv_sort.TextSampler.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 20 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param inFormat The input to sample/*from w ww . j ava2 s. c o m*/ * @param conf the job to sample * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(FileInputFormat<Text, Text> inFormat, JobContext job, Path partFile) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); TaskAttemptContext taskContext = Utils.getTaskAttemptContext(conf); TextSampler sampler = new TextSampler(); Text key = new Text(); Text value = new Text(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE_CONF, SAMPLE_SIZE_DEFAULT); List<InputSplit> splits = inFormat.getSplits(job); int samples = Math.min(MAX_SLICES_SAMPLED, splits.size()); long recordsPerSample = sampleSize / samples; int sampleStep = splits.size() / samples; long records = 0; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { InputSplit isplit = splits.get(sampleStep * i); RecordReader<Text, Text> reader = inFormat.createRecordReader(isplit, taskContext); reader.initialize(isplit, taskContext); while (reader.nextKeyValue()) { sampler.addKey(reader.getCurrentKey()); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) outFs.delete(partFile, false); SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); for (Text split : sampler.createPartitions(partitions)) { writer.append(split, nullValue); } writer.close(); }
From source file:it.polito.dbdmg.searum.discretization.DiscretizationMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // put here pre-processing and discretization code context.write(value, NullWritable.get()); }
From source file:it.polito.dbdmg.searum.itemsets.sorting.ClosedSortingReducer.java
License:Apache License
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { context.setStatus("Closed Sorting Reducer :" + key); context.write(value, NullWritable.get()); }//w w w.j ava 2 s . c o m }
From source file:it.polito.dbdmg.searum.itemsets.sorting.ItemsetSortingReducer.java
License:Apache License
@Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { context.setStatus("Itemset Sorting Reducer :" + key); context.write(value, NullWritable.get()); }/*ww w. j a v a 2 s .c om*/ }
From source file:kafka.bridge.pig.AvroKafkaStorage.java
License:Apache License
@Override public void putNext(Tuple tuple) throws IOException { os.reset();/*from w ww. jav a2 s. c om*/ writeEnvelope(os, this.encoder); datumWriter.write(tuple, this.encoder); this.encoder.flush(); try { this.writer.write(NullWritable.get(), new BytesWritable(this.os.toByteArray())); } catch (InterruptedException e) { throw new IOException(e); } }
From source file:kafka.etl.impl.SimpleKafkaETLReducer.java
License:Apache License
@Override protected NullWritable generateOutputKey(KafkaETLKey key, Message message) throws IOException { return NullWritable.get(); }