List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:com.rw.legion.input.LegionRecordReader.java
License:Apache License
@Override public NullWritable getCurrentKey() { return NullWritable.get(); }
From source file:com.skp.experiment.common.mapreduce.IdentityMapper.java
License:Apache License
@SuppressWarnings("unchecked") @Override// w ww. ja va 2s .c o m protected void map(K key, V value, Context context) throws IOException, InterruptedException { if (valueOnlyOut) { context.write((K) NullWritable.get(), value); } else if (keyOnlyOut) { context.write(key, (V) NullWritable.get()); } else { context.write(key, value); } }
From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizerReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder builder = new StringBuilder(); String rightText = null;/*from w w w . j a v a 2 s.c o m*/ List<String> zeros = new ArrayList<String>(); for (Text item : values) { String valueItem = item.toString(); String[] tokens = valueItem.split(":"); int side; try { side = Integer.parseInt(tokens[1]); } catch (NumberFormatException nfe) { throw new NumberFormatException("valueItem: " + valueItem); } if (side == 1) { rightText = tokens[0]; } else { zeros.add(tokens[0]); } } for (String item : zeros) { builder.append(item); if (rightText != null) { builder.append('\t'); builder.append(rightText); } context.write(new Text(builder.toString()), NullWritable.get()); builder.setLength(0); } }
From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizerTest.java
License:Apache License
@Test public void testReducer() throws Exception { List<Text> list = new LinkedList<Text>(); list.add(new Text("1\t2:0")); list.add(new Text("2\t2:0")); list.add(new Text("3\t2:0")); list.add(new Text("6:1")); list.add(new Text("5\t2:0")); reduceDriver.addInput(new Text("2"), list); reduceDriver.addOutput(new Text("1\t2\t6"), NullWritable.get()); reduceDriver.addOutput(new Text("2\t2\t6"), NullWritable.get()); reduceDriver.addOutput(new Text("3\t2\t6"), NullWritable.get()); reduceDriver.addOutput(new Text("5\t2\t6"), NullWritable.get()); List<Pair<Text, NullWritable>> results = reduceDriver.run(); assertTrue(results.size() == 4);//from w w w . jav a2 s. c o m }
From source file:com.splout.db.benchmark.BenchmarkStoreTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { JCommander jComm = new JCommander(this); jComm.setProgramName("Benchmark-Store Tool"); try {/*from ww w. j a v a 2s . co m*/ jComm.parse(args); } catch (ParameterException e) { System.out.println(e.getMessage()); jComm.usage(); return -1; } catch (Throwable t) { t.printStackTrace(); jComm.usage(); return -1; } // Create some input files that will represent the partitions to generate Path out = new Path(output); FileSystem outFs = out.getFileSystem(getConf()); HadoopUtils.deleteIfExists(outFs, out); Integer min, max, eachPartition; int maxKeyDigits; try { String[] minMax = keySpace.split(":"); min = Integer.parseInt(minMax[0]); max = Integer.parseInt(minMax[1]); maxKeyDigits = max.toString().length(); eachPartition = (max - min) / nPartitions; } catch (Exception e) { throw new IllegalArgumentException( "Key range format is not valid. It must be minKey:maxKey where both minKey and maxKey are integers."); } FileSystem inFs = FileSystem.get(getConf()); Path input = new Path("benchmark-store-tool-" + System.currentTimeMillis()); HadoopUtils.deleteIfExists(inFs, input); inFs.mkdirs(input); List<PartitionEntry> partitionEntries = new ArrayList<PartitionEntry>(); // Create as many input files as partitions // Each input file will have as value the range that each Mapper will write String paddingExp = "%0" + (padding != null ? padding : maxKeyDigits) + "d"; for (int i = 0; i < nPartitions; i++) { int thisMin = (i * eachPartition); int thisMax = (i + 1) * eachPartition; HadoopUtils.stringToFile(inFs, new Path(input, i + ".txt"), i + "\t" + thisMin + ":" + thisMax); PartitionEntry entry = new PartitionEntry(); entry.setMin(String.format(paddingExp, thisMin)); entry.setMax(String.format(paddingExp, thisMax)); entry.setShard(i); partitionEntries.add(entry); } partitionEntries.get(0).setMin(null); partitionEntries.get(partitionEntries.size() - 1).setMax(null); PartitionMap partitionMap = new PartitionMap(partitionEntries); HadoopUtils.stringToFile(outFs, new Path(out, "partition-map"), JSONSerDe.ser(partitionMap)); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, Type.INT)); fields.addAll(Fields.parse("key:int, value:string")); final Schema schema = new Schema(tablename, fields); byte[] valueArray = new byte[valueSize]; for (int i = 0; i < valueSize; i++) { valueArray[i] = 'A'; } final String theValue = new String(valueArray); if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) { File nativeLibs = new File("native"); if (nativeLibs.exists()) { SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf); } } MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf); TableSpec tableSpec = new TableSpec(schema, schema.getFields().get(1)); job.setOutput(new Path(out, "store"), new SploutSQLProxyOutputFormat(new SQLite4JavaOutputFormat(1000000, tableSpec)), ITuple.class, NullWritable.class); job.addInput(input, new HadoopInputFormat(TextInputFormat.class), new MapOnlyMapper<LongWritable, Text, ITuple, NullWritable>() { ITuple metaTuple = new Tuple(schema); protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] partitionRange = value.toString().split("\t"); Integer partition = Integer.parseInt(partitionRange[0]); metaTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, partition); String[] minMax = partitionRange[1].split(":"); Integer min = Integer.parseInt(minMax[0]); Integer max = Integer.parseInt(minMax[1]); for (int i = min; i < max; i++) { metaTuple.set("key", i); metaTuple.set("value", theValue); context.write(metaTuple, NullWritable.get()); } } }); job.createJob().waitForCompletion(true); HadoopUtils.deleteIfExists(inFs, input); return 0; }
From source file:com.splout.db.hadoop.SumReducer.java
License:Apache License
public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { int totalCount = 0; ITuple outTuple = null;//from w ww .j a va 2 s .c o m for (ITuple tuple : tuples) { totalCount += (Integer) tuple.get(field); outTuple = tuple; } outTuple.set(field, totalCount); collector.write(outTuple, NullWritable.get()); }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
@SuppressWarnings("deprecation") private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf, Path outputPath, final int nSplits) throws TupleSamplerException { MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath); for (Table table : tablespace.getPartitionedTables()) { final TableSpec tableSpec = table.getTableSpec(); final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript(); for (TableInput inputFile : table.getFiles()) { final RecordProcessor processor = inputFile.getRecordProcessor(); for (Path path : inputFile.getPaths()) { builder.addInput(path, inputFile.getFormat(), new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() { final int nSamples = (int) (sampleSize / nSplits); final String[] samples = new String[nSamples]; CounterInterface counterInterface; long recordCounter = 0; JavascriptEngine jsEngine = null; @Override protected void setup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { counterInterface = new CounterInterface(context); // Initialize JavaScript engine if needed if (getPartitionByJavaScript != null) { try { jsEngine = new JavascriptEngine(getPartitionByJavaScript); } catch (Throwable e) { throw new RuntimeException(e); }/* w ww . j a v a2 s. c om*/ } } ; // Collect Tuples with decreasing probability // (http://en.wikipedia.org/wiki/Reservoir_sampling) protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { ITuple uTuple; try { uTuple = processor.process(key, key.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple == null) { // user may have filtered the record return; } long reservoirIndex; if (recordCounter < nSamples) { reservoirIndex = recordCounter; } else { reservoirIndex = (long) (Math.random() * recordCounter); } if (reservoirIndex < nSamples) { String pkey = null; try { pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec, jsEngine); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } samples[(int) reservoirIndex] = pkey; } recordCounter++; } // Write the in-memory sampled Tuples protected void cleanup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { Text key = new Text(); for (String keyStr : samples) { if (keyStr != null) { key.set(keyStr); context.write(key, NullWritable.get()); } } } }, inputFile.getSpecificHadoopInputFormatContext()); } } } // Set output path Path outReservoirPath = new Path(outputPath + "-reservoir"); builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class, NullWritable.class); builder.setJarByClass(callingClass); try { Job job = null; job = builder.createJob(); if (!job.waitForCompletion(true)) { throw new TupleSamplerException("Reservoir Sampling failed!"); } } catch (Exception e) { throw new TupleSamplerException("Error creating or launching the sampling job.", e); } finally { try { builder.cleanUpInstanceFiles(); } catch (IOException e) { throw new TupleSamplerException("Error cleaning up the sampling job.", e); } } long retrievedSamples = 0; try { FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf); if (outFs.listStatus(outReservoirPath) == null) { throw new IOException("Output folder not created: the Job failed!"); } retrievedSamples = 0; // Instantiate the writer we will write samples to SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class, NullWritable.class); // Aggregate the output into a single file for being consistent with the other sampling methods for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) { Path thisPath = fileStatus.getPath(); if (thisPath.getName().startsWith("part-m-")) { SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf); Text key = new Text(); while (reader.next(key)) { writer.append(key, NullWritable.get()); retrievedSamples++; } reader.close(); } } writer.close(); outFs.delete(outReservoirPath, true); } catch (IOException e) { throw new TupleSamplerException("Error consolidating the sample job results into one file.", e); } return retrievedSamples; }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
/** * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit * without using a Job.// ww w . j ava 2s . c o m * The output is SequenceFile with keys. * * @return The number of retrieved samples */ private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits, Map<InputSplit, TableSpec> splitToTableSpec, Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat, Map<InputSplit, Map<String, String>> specificHadoopConf, Map<InputSplit, RecordProcessor> recordProcessorPerSplit, Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException { // Instantiate the writer we will write samples to FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf); if (splits.size() == 0) { throw new IllegalArgumentException("There are no splits to sample from!"); } @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class, NullWritable.class); logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: " + sampleSize + ", total number of splits: " + splits.size()); int blocks = Math.min(maxSplitsToVisit, splits.size()); blocks = Math.min((int) sampleSize, blocks); long recordsPerSample = sampleSize / blocks; int sampleStep = splits.size() / blocks; long records = 0; CounterInterface counterInterface = new CounterInterface(null) { public Counter getCounter(String group, String name) { return Mockito.mock(Counter.class); } ; }; // Take N samples from different parts of the input for (int i = 0; i < blocks; ++i) { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext = null; try { attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId); } catch (Exception e) { throw new RuntimeException(e); } InputSplit split = splits.get(sampleStep * i); if (specificHadoopConf.get(split) != null) { for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) { attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue()); } } logger.info("Sampling split: " + split); RecordReader<ITuple, NullWritable> reader = null; try { reader = splitToFormat.get(split).createRecordReader(split, attemptContext); reader.initialize(split, attemptContext); RecordProcessor processor = recordProcessorPerSplit.get(split); Text key = new Text(); while (reader.nextKeyValue()) { // ITuple tuple = reader.getCurrentKey(); ITuple uTuple; try { uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple != null) { // user may have filtered the record try { key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split), splitToJsEngine.get(split))); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } writer.append(key, NullWritable.get()); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } } catch (InterruptedException e) { throw new RuntimeException(e); } } writer.close(); return records; }
From source file:com.splout.db.integration.RetailDemo.java
License:Apache License
public void generate(long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath) throws Exception { Configuration conf = new Configuration(); FileSystem fS = FileSystem.get(conf); HadoopUtils.deleteIfExists(fS, inputPath); HadoopUtils.deleteIfExists(fS, outputPath); NullWritable nullValue = NullWritable.get(); Schema retailSchema = new Schema("retail", Fields .parse("tienda:string, cliente:int, ticket:double, producto:int, precio:double, fecha:string")); ITuple tuple = new Tuple(retailSchema); TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, retailSchema); // Writes nRegs Tuples to HDFS long soFar = 0; while (soFar < nRegs) { int tienda = (int) (Math.random() * N_TIENDAS); int cliente = (int) (Math.random() * N_CLIENTES); tuple.set("tienda", "T" + tienda); tuple.set("cliente", cliente); double[] precios = new double[N_PRODUCTOS_PER_TICKET]; double ticket = 0; for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) { precios[i] = ((int) (Math.random() * MAX_PRECIO * 100)) / 100; precios[i] = Math.max(precios[i], 5.00); ticket += precios[i];/*w ww. j a va2s. c o m*/ } tuple.set("ticket", ticket); long fecha = System.currentTimeMillis() - ((long) (Math.random() * DAY_SPAN * 24 * 60 * 60 * 1000)); tuple.set("fecha", fmt.print(fecha)); for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) { int producto = (int) (Math.random() * N_PRODUCTOS); tuple.set("precio", precios[i]); tuple.set("producto", producto); writer.append(tuple); soFar++; } } writer.close(); // Generate Splout view (cliente) String[] dnodeArray = dnodes.split(","); TablespaceSpec tablespace = TablespaceSpec.of(retailSchema, "cliente", inputPath, new TupleInputFormat(), dnodeArray.length); TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); PartitionMap partitionMap = generateView.getPartitionMap(); ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodeArray); Path deployUri = new Path(outputPath, "store").makeQualified(fS); SploutClient client = new SploutClient(qnode); client.deploy("retailcliente", partitionMap, replicationMap, deployUri.toUri()); // Generate Splout view (tienda) Path output2 = new Path(outputPath + "-2"); HadoopUtils.deleteIfExists(fS, output2); tablespace = TablespaceSpec.of(retailSchema, "tienda", inputPath, new TupleInputFormat(), dnodeArray.length); generateView = new TablespaceGenerator(tablespace, output2); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); partitionMap = generateView.getPartitionMap(); deployUri = new Path(output2, "store").makeQualified(fS); client.deploy("retailtienda", partitionMap, replicationMap, deployUri.toUri()); }
From source file:com.splout.db.integration.TestDemo.java
License:Apache License
public void generate(int nPartitions, long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath) throws Exception { Configuration conf = new Configuration(); FileSystem fS = FileSystem.get(conf); HadoopUtils.deleteIfExists(fS, inputPath); HadoopUtils.deleteIfExists(fS, outputPath); NullWritable nullValue = NullWritable.get(); TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, SploutHadoopTestUtils.SCHEMA); // Writes nRegs Tuples to HDFS long soFar = 0; while (soFar < nRegs) { writer.append(SploutHadoopTestUtils.getTuple("id" + soFar, (int) soFar)); soFar++;/*w ww . jav a 2 s . co m*/ } writer.close(); // Generate Splout view TablespaceSpec tablespace = TablespaceSpec.of(SploutHadoopTestUtils.SCHEMA, "id", inputPath, new TupleInputFormat(), nPartitions); TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath); generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions()); PartitionMap partitionMap = generateView.getPartitionMap(); ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodes.split(",")); Path deployUri = new Path(outputPath, "store").makeQualified(fS); SploutClient client = new SploutClient(qnode); client.deploy("tablespace1", partitionMap, replicationMap, deployUri.toUri()); }