List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:hu.sztaki.ilab.bigdata.common.tools.InputSampler.java
License:Apache License
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link TotalOrderPartitioner#getPartitionFile}. *//* ww w. j a va2s. c o m*/ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java
License:Apache License
/** * Sink method to take an outgoing tuple and write it to Avro. * * @param flowProcess// w ww .j a v a2s . co m * The cascading FlowProcess object. Should be passed in by * cascading automatically. * @param sinkCall * The cascading SinkCall object. Should be passed in by * cascading automatically. * @throws IOException */ @Override public void sink(FlowProcess<? extends JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); IndexedRecord record = new Record((Schema) sinkCall.getContext()[0]); Object[] objectArray = CustomCascadingToAvro.parseTupleEntry(tupleEntry, (Schema) sinkCall.getContext()[0]); for (int i = 0; i < objectArray.length; i++) { record.put(i, objectArray[i]); } // noinspection unchecked sinkCall.getOutput().collect(new AvroWrapper<IndexedRecord>(record), NullWritable.get()); }
From source file:infinidb.hadoop.db.IDBFileInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1, Reporter arg2) throws IOException { final String filename = ((FileSplit) arg0).getPath().toString(); final JobConf job = arg1; return new RecordReader<NullWritable, NullWritable>() { private boolean unread = true; @Override//from w ww .j a v a 2s. com public void close() throws IOException { } @Override public NullWritable createKey() { return NullWritable.get(); } @Override public NullWritable createValue() { return NullWritable.get(); } @Override public long getPos() throws IOException { return 0; } @Override public float getProgress() throws IOException { return unread ? 0 : 1; } @Override /* spawn a cpimport process for each input file */ public boolean next(NullWritable arg0, NullWritable arg1) throws IOException { InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job); String schemaName = dbConf.getOutputSchemaName(); String tableName = (filename.substring(filename.lastIndexOf('/') + 1, filename.length())); tableName = tableName.substring(0, tableName.lastIndexOf('.')); String output = job.get("mapred.output.dir"); if (unread) { try { StringBuilder loadCmdStr = new StringBuilder(); loadCmdStr.append(dbConf.getInfiniDBHome()); loadCmdStr.append("/bin/"); loadCmdStr.append("infinidoop_load.sh "); loadCmdStr.append(filename); loadCmdStr.append(" "); loadCmdStr.append(schemaName); loadCmdStr.append(" "); loadCmdStr.append(tableName); Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString()); // Wait for the child to exit lChldProc.waitFor(); BufferedReader lChldProcOutStream = new BufferedReader( new InputStreamReader(lChldProc.getInputStream())); BufferedReader stdError = new BufferedReader( new InputStreamReader(lChldProc.getErrorStream())); String lChldProcOutPutStr = null; StringBuffer outpath = new StringBuffer(); outpath.append(job.getWorkingDirectory()); outpath.append("/"); outpath.append(output); outpath.append("/"); outpath.append(tableName); outpath.append(".log"); Path pt = new Path(outpath.toString()); FileSystem fs = FileSystem.get(new Configuration()); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt, false))); // catch output while ((lChldProcOutPutStr = lChldProcOutStream.readLine()) != null) { br.write(lChldProcOutPutStr); br.newLine(); } // catch error while ((lChldProcOutPutStr = stdError.readLine()) != null) { br.write(lChldProcOutPutStr); br.newLine(); } //br.write(outpath.toString()); //br.newLine(); //br.write(loadCmdStr.toString()); //br.newLine(); //br.write(filename); br.close(); lChldProcOutStream.close(); } catch (Exception e) { e.printStackTrace(); } unread = false; return true; } else { return false; } } }; }
From source file:infinidb.hadoop.example.InfiniDoopMapper.java
License:Apache License
public void map(NullWritable key, NullWritable val, OutputCollector<NullWritable, NullWritable> output, Reporter reporter) throws IOException { NullWritable n = NullWritable.get(); output.collect(n, n);//from w ww . ja v a 2s . c o m }
From source file:io.aos.hdfs.MapWritableTest.java
License:Apache License
@Test public void setWritableEmulation() throws IOException { MapWritable src = new MapWritable(); src.put(new IntWritable(1), NullWritable.get()); src.put(new IntWritable(2), NullWritable.get()); MapWritable dest = new MapWritable(); WritableUtils.cloneInto(dest, src);//from w w w . j a va2 s. c om assertThat(dest.containsKey(new IntWritable(1)), is(true)); }
From source file:io.apigee.lembos.utils.ConversionUtils.java
License:Apache License
/** * Convert a JavaScript object to its {@link Writable} equivalent or throws an exception if a converter isn't found. * * @param jsObject the JavaScript object to convert * @param scope the scope to serialize the object * * @return the {@link Writable} equivalent of the JavaScript object *//*from w ww . j a v a2 s . c o m*/ public static Writable jsToWritable(final Object jsObject, final Scriptable scope) { Writable writable = null; if (JavaScriptUtils.isDefined(jsObject)) { JSToWritableConverter converter = null; for (final JSToWritableConverter entry : TO_WRITABLE_CONVERTERS) { if (entry.canConvert(jsObject)) { converter = entry; } } if (converter == null) { throw new RuntimeException("No JavaScript to Writable converter found for class: " + jsObject.getClass().getCanonicalName()); } else { //noinspection unchecked writable = converter.fromJavaScript(scope, jsObject); } } else { writable = NullWritable.get(); } return writable; }
From source file:io.druid.indexer.HadoopDruidIndexerMapperTest.java
License:Apache License
@Test public void testHadoopyStringParser() throws Exception { final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig( new HadoopIngestionSpec(DATA_SCHEMA, IO_CONFIG, TUNING_CONFIG)); final MyMapper mapper = new MyMapper(); final Configuration hadoopConfig = new Configuration(); hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config)); final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class); EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once(); EasyMock.replay(mapContext);//from w w w . ja v a 2 s . com mapper.setup(mapContext); final List<Map<String, Object>> rows = ImmutableList.of( ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim2", "y", "m1", 1.0)); for (Map<String, Object> row : rows) { mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext); } assertRowListEquals(rows, mapper.getRows()); }
From source file:io.druid.indexer.HadoopDruidIndexerMapperTest.java
License:Apache License
@Test public void testHadoopyStringParserWithTransformSpec() throws Exception { final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig( new HadoopIngestionSpec( DATA_SCHEMA/*from ww w . j a v a 2 s . c o m*/ .withTransformSpec( new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), IO_CONFIG, TUNING_CONFIG)); final MyMapper mapper = new MyMapper(); final Configuration hadoopConfig = new Configuration(); hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config)); final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class); EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once(); EasyMock.replay(mapContext); mapper.setup(mapContext); final List<Map<String, Object>> rows = ImmutableList.of( ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "bar", "dim2", "y", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "z", "m1", 1.0)); for (Map<String, Object> row : rows) { mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext); } assertRowListEquals(ImmutableList.of( ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "z", "m1", 1.0)), mapper.getRows()); }
From source file:io.gzet.community.accumulo.AccumuloGraphxInputFormat.java
License:Apache License
@Override public RecordReader<NullWritable, EdgeWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { log.setLevel(getLogLevel(context));// w ww.ja v a2 s . c o m return new RecordReaderBase<NullWritable, EdgeWritable>() { @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { ++numKeysRead; Entry<Key, Value> entry = scannerIterator.next(); EdgeWritable edge = new EdgeWritable(); edge.setSourceVertex(entry.getKey().getRow().toString()); edge.setDestVertex(entry.getKey().getColumnQualifier().toString()); edge.setCount(Long.parseLong(entry.getValue().toString())); currentK = NullWritable.get(); currentKey = entry.getKey(); currentV = edge; if (log.isTraceEnabled()) log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); return true; } return false; } }; }
From source file:ipldataanalysis2.DataAnalysisMapper.java
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { if (value != null) { if (value.toString().length() > 0) { String records[] = value.toString().split(","); if (!(records[0].equals("match_id"))) { HashMap<String, Integer> recordFilter = new HashMap<String, Integer>(); recordFilter.put(records[2], 1); for (String s : recordFilter.keySet()) { if (records[2].equals(s)) { mos.write("bins", value, NullWritable.get(), s); break; }//from ww w. j av a 2 s. c o m } } } } }