List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:com.couchbase.sqoop.mapreduce.CouchbaseImportMapper.java
License:Apache License
@Override public void map(Text key, SqoopRecord val, Context context) throws IOException, InterruptedException { try {/*from w ww . j a v a 2s. c om*/ val.loadLargeObjects(lobLoader); } catch (SQLException sqlE) { throw new IOException(sqlE); } outkey.set(val.toString()); context.write(outkey, NullWritable.get()); }
From source file:com.csiro.hadoop.WritableTest.java
public static void main(String[] args) { System.out.println("*** Primitive Writable ***"); BooleanWritable bool1 = new BooleanWritable(true); ByteWritable byte1 = new ByteWritable((byte) 3); System.out.printf("Boolean:%s Byte:%d\n", bool1, byte1.get()); IntWritable int1 = new IntWritable(5); IntWritable int2 = new IntWritable(17); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); int1.set(int2.get()); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); Integer int3 = new Integer(23); int1.set(int3); System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get()); System.out.println("*** Array Writable ***"); ArrayWritable a = new ArrayWritable(IntWritable.class); a.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) }); IntWritable[] values = (IntWritable[]) a.get(); for (IntWritable i : values) { System.out.println(i);// w ww . j av a2s . c o m } IntArrayWritable ia = new IntArrayWritable(); ia.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) }); IntWritable[] ivalues = (IntWritable[]) ia.get(); ia.set((new LongWritable[] { new LongWritable(10001) })); System.out.println("*** Map Writables ***"); MapWritable m = new MapWritable(); IntWritable key1 = new IntWritable(5); NullWritable value1 = NullWritable.get(); m.put(key1, value1); System.out.println(m.containsKey(key1)); System.out.println(m.get(key1)); m.put(new LongWritable(100000000), key1); Set<Writable> keys = m.keySet(); for (Writable k : keys) System.out.println(k.getClass()); }
From source file:com.datasalt.pangool.examples.gameoflife.GameOfLifeJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { failArguments("Wrong number of arguments"); return -1; }/*from w ww.ja va 2s . c o m*/ String output = args[0]; String input = GameOfLifeJob.class.getName() + "-prepared-input"; delete(output); delete(input); final int gridSize = Integer.parseInt(args[1]); // Write the input of the job as a set of (min, max) intervals // Each number between (min, max) represents a possible initial configuration for Game of Life int parallelism = Integer.parseInt(args[2]); int maxCombinations = (int) Math.pow(2, gridSize * gridSize); int splitSize = maxCombinations / parallelism; FileSystem fS = FileSystem.get(conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fS.create(new Path(input)))); for (int i = 0; i < parallelism; i++) { writer.write(((i * splitSize) + 1) + "\t" + ((i + 1) * splitSize) + "\n"); } writer.close(); // Optional parameters: maxX, maxY, #iterations final int maxX = conf.getInt("gol.max_x", 32); final int maxY = conf.getInt("gol.max_y", 32); final int iterations = conf.getInt("gol.iterations", 1000); Log.info( "using parameters: maxX grid: " + maxX + " maxY grid: " + maxY + " max #iterations: " + iterations); // Define the intermediate schema: a pair of ints final Schema schema = new Schema("minMax", Fields.parse("min:int, max:int")); TupleMRBuilder job = new TupleMRBuilder(conf); job.addIntermediateSchema(schema); job.setGroupByFields("min", "max"); job.setCustomPartitionFields("min"); // Define the input and its associated mapper // The mapper will just emit the (min, max) pairs to the reduce stage job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() { Tuple tuple = new Tuple(schema); @Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { String[] fields = value.toString().split("\t"); tuple.set("min", Integer.parseInt(fields[0])); tuple.set("max", Integer.parseInt(fields[1])); collector.write(tuple); } }); // Define the reducer // The reducer will run as many games of life as (max - min) for each interval it receives // It will emit the inputs of GOL that converged together with the number of iterations // Note that inputs that produce grid overflow are ignored (but may have longer iteration convergence) job.setTupleReducer(new TupleReducer<Text, NullWritable>() { public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { int min = (Integer) group.get("min"), max = (Integer) group.get("max"); for (int i = min; i < max; i++) { try { GameOfLife gameOfLife = new GameOfLife(gridSize, GameOfLife.longToBytes((long) i), maxX, maxY, iterations); while (true) { gameOfLife.nextCycle(); } } catch (GameOfLifeException e) { context.getHadoopContext().progress(); context.getHadoopContext().getCounter("stats", e.getCauseMessage() + "").increment(1); if (e.getCauseMessage().equals(CauseMessage.CONVERGENCE_REACHED)) { collector.write(new Text( Arrays.toString(GameOfLife.longToBytes((long) i)) + "\t" + e.getIterations()), NullWritable.get()); } } } }; }); job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class); try { job.createJob().waitForCompletion(true); } finally { job.cleanUpInstanceFiles(); } delete(input); return 0; }
From source file:com.datasalt.pangool.solr.TupleSolrOutputFormatExample.java
License:Apache License
public int run(String input, String output, Configuration conf) throws Exception { // Define the intermediate schema: It must match SOLR's schema.xml! final Schema schema = new Schema("iSchema", Fields.parse("user_id:string, message:string")); TupleMRBuilder job = new TupleMRBuilder(conf); job.addIntermediateSchema(schema);//w w w. j a v a 2 s.c om job.setGroupByFields("user_id"); // Define the input and its associated mapper. // We'll just have a Mapper, reducer will be Identity job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new TupleMapper<LongWritable, Text>() { Tuple tuple = new Tuple(schema); @Override public void map(LongWritable key, Text value, TupleMRContext context, Collector collector) throws IOException, InterruptedException { String[] fields = value.toString().split("\t"); String language = fields[1]; tuple.set("user_id", fields[0]); tuple.set("message", fields[2]); if (language.equals("en")) { // English -> write to main output collector.write(tuple); } else if (language.equals("fr")) { // French -> write to french index collector.getNamedOutput("FR").write(tuple, NullWritable.get()); } else if (language.equals("es")) { // Spanish -> write to spanish index collector.getNamedOutput("ES").write(tuple, NullWritable.get()); } } }); // Add multi-output: French index job.addNamedOutput("FR", new TupleSolrOutputFormat(new File("src/test/resources/solr-fr"), conf), ITuple.class, NullWritable.class); // Add multi-output: Spanish index job.addNamedOutput("ES", new TupleSolrOutputFormat(new File("src/test/resources/solr-es"), conf), ITuple.class, NullWritable.class); job.setTupleReducer(new IdentityTupleReducer()); // Add multi-output: English index job.setOutput(new Path(output), new TupleSolrOutputFormat(new File("src/test/resources/solr-en"), conf), ITuple.class, NullWritable.class); Job hadoopJob = job.createJob(); try { hadoopJob.waitForCompletion(true); if (!hadoopJob.isSuccessful()) { throw new PangoolRuntimeException("Job was not sucessfull"); } } finally { job.cleanUpInstanceFiles(); } return 0; }
From source file:com.datasalt.pangool.tuplemr.IdentityTupleReducer.java
License:Apache License
@Override public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { for (ITuple tuple : tuples) { collector.write(tuple, NullWritable.get()); }/* w ww . j a v a 2 s . c om*/ }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java
License:Apache License
@Override public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatInputFormat iF = new HCatInputFormat(); @SuppressWarnings("rawtypes") final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split, taskContext);/*from w w w . j a v a2 s . c om*/ return new RecordReader<ITuple, NullWritable>() { ITuple tuple = new Tuple(pangoolSchema); @Override public void close() throws IOException { hCatRecordReader.close(); } @Override public ITuple getCurrentKey() throws IOException, InterruptedException { HCatRecord record = hCatRecordReader.getCurrentValue(); // Perform conversion between HCatRecord and Tuple for (int pos = 0; pos < schema.size(); pos++) { tuple.set(pos, record.get(pos)); } return tuple; } @Override public NullWritable getCurrentValue() throws IOException, InterruptedException { return NullWritable.get(); } @Override public float getProgress() throws IOException, InterruptedException { return hCatRecordReader.getProgress(); } @Override public void initialize(InputSplit iS, TaskAttemptContext context) throws IOException, InterruptedException { hCatRecordReader.initialize(iS, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return hCatRecordReader.nextKeyValue(); } }; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestMultipleOutputs.java
License:Apache License
@Test public void test() throws TupleMRException, IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { initHadoop();//from w w w. jav a 2 s. c om trash(INPUT, OUTPUT); // One file with one line - context will be ignored // Business logic in {@link MyInputProcessor} CommonUtils.writeTXT("ignore-me", new File(INPUT)); getConf().set("mapred.output.compress", "true"); getConf().set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); TupleMRBuilder builder = new TupleMRBuilder(getConf()); Schema baseSchema = new Schema("schema", Fields.parse("name:string, money:int, country:string")); builder.addIntermediateSchema(baseSchema); builder.setGroupByFields("country"); builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("money", Order.DESC).add("name", Order.ASC)); builder.addInput(new Path(INPUT), new HadoopInputFormat(TextInputFormat.class), new MyInputProcessor()); builder.setTupleReducer(new MyGroupHandler()); builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(SequenceFileOutputFormat.class), DoubleWritable.class, NullWritable.class); // Configure extra outputs builder.addNamedOutput(OUTPUT_1, new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class, Utf8.class); builder.addNamedOutput(OUTPUT_2, new HadoopOutputFormat(SequenceFileOutputFormat.class), IntWritable.class, NullWritable.class); builder.addNamedTupleOutput(TUPLEOUTPUT_1, baseSchema); getConf().setClass(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, SequenceFileOutputFormat.class, OutputFormat.class); Job job = builder.createJob(); try { assertRun(job); } finally { builder.cleanUpInstanceFiles(); } // Check outputs checkCompression(firstReducerOutput(OUTPUT + "/" + OUTPUT_1), DefaultCodec.class); checkCompression(firstReducerOutput(OUTPUT + "/" + OUTPUT_2), DefaultCodec.class); checkCompression(firstMapOutput(OUTPUT + "/" + OUTPUT_1), DefaultCodec.class); checkCompression(firstMapOutput(OUTPUT + "/" + OUTPUT_2), DefaultCodec.class); checkCompression(firstReducerOutput(OUTPUT), DefaultCodec.class); withOutput(firstReducerOutput(OUTPUT), new DoubleWritable(1.0), NullWritable.get()); withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES")); withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES")); withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get()); withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get()); Tuple tuple = new Tuple(baseSchema); tuple.set(0, "Pere"); tuple.set(1, 100); tuple.set(2, "ES"); withTupleOutput(firstMapOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple); withTupleOutput(firstReducerOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple); trash(INPUT, OUTPUT); cleanUp(); }
From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleHashPartitioner.java
License:Apache License
@Test public void multipleSourcesTest() throws TupleMRException, IOException { Configuration conf = getConf(); TupleHashPartitioner partitioner = new TupleHashPartitioner(); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("number1", Type.INT)); fields.add(Field.create("string1", Type.STRING)); fields.add(Field.create("string2", Type.STRING)); Schema schema1 = new Schema("test1", fields); fields = new ArrayList<Field>(); fields.add(Field.create("number1", Type.INT)); fields.add(Field.create("string1", Type.STRING)); fields.add(Field.create("number2", Type.LONG)); Schema schema2 = new Schema("test2", fields); TupleMRConfigBuilder builder = new TupleMRConfigBuilder(); builder.addIntermediateSchema(schema1); builder.addIntermediateSchema(schema2); builder.setGroupByFields("number1", "string1"); TupleMRConfig tupleMRConf = builder.buildConf(); TupleMRConfig.set(tupleMRConf, conf); partitioner.setConf(conf);// w ww. ja v a2s. com ITuple tuple = new Tuple(schema1); tuple.set("number1", 35); tuple.set("string1", "foo"); // Check that for the same prefix (number1, string1) we obtain the same partition int partitionId = -N_PARTITIONS; for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) { tuple.set("string2", TestUtils.randomString(10)); int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS); Assert.assertTrue(thisPartitionId >= 0); Assert.assertTrue(thisPartitionId < N_PARTITIONS); if (partitionId == -N_PARTITIONS) { partitionId = thisPartitionId; } else { // Check that the returned partition is always the same even if "string2" field changes its value Assert.assertEquals(thisPartitionId, partitionId); } } // On the other hand, check that when we vary one of the group by fields, partition varies int partitionMatches[] = new int[N_PARTITIONS]; for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) { tuple.set("string1", TestUtils.randomString(10)); int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS); Assert.assertTrue(thisPartitionId >= 0); Assert.assertTrue(thisPartitionId < N_PARTITIONS); partitionMatches[thisPartitionId]++; ; } for (int i = 0; i < partitionMatches.length; i++) { if (partitionMatches[i] == 0) { throw new AssertionError( "Partition matches: 0 for partition " + i + ". Seems like a bug in the Partitioner."); } } }
From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleHashPartitioner.java
License:Apache License
@Test public void sanityTest() throws TupleMRException, IOException { // This is a basic sanity test for checking that the partitioner works for nPartitions > 1 Configuration conf = getConf(); TupleHashPartitioner partitioner = new TupleHashPartitioner(); List<Field> fields = new ArrayList<Field>(); // We use one INT field - we'll put random numbers in it fields.add(Field.create("foo", Type.INT)); Schema schema = new Schema("test", fields); TupleMRConfigBuilder builder = new TupleMRConfigBuilder(); builder.addIntermediateSchema(schema); builder.setGroupByFields("foo"); TupleMRConfig tupleMRConf = builder.buildConf(); TupleMRConfig.set(tupleMRConf, conf); partitioner.setConf(conf);/*from w ww.j a va2 s .com*/ ITuple tuple = new Tuple(schema); int partitionMatches[] = new int[N_PARTITIONS]; for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) { tuple.set("foo", (int) (Math.random() * Integer.MAX_VALUE)); int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(), N_PARTITIONS); Assert.assertTrue(thisPartitionId >= 0); Assert.assertTrue(thisPartitionId < N_PARTITIONS); partitionMatches[thisPartitionId]++; ; } for (int i = 0; i < partitionMatches.length; i++) { if (partitionMatches[i] == 0) { throw new AssertionError( "Partition matches: 0 for partition " + i + ". Seems like a bug in the Partitioner."); } } }
From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleMRJob.java
License:Apache License
@Test public void testFillingTuplesJob() throws IOException, ClassNotFoundException, InterruptedException, TupleMRException { int NUM_ROWS_TO_GENERATE = 100; Configuration conf = getConf(); String input = TestTupleMRJob.class + "-input"; String output = TestTupleMRJob.class + "-output"; ITuple tuple = new Tuple(SCHEMA); for (int i = 0; i < NUM_ROWS_TO_GENERATE; i++) { withTupleInput(input, fillTuple(true, tuple)); }/*from ww w. j a v a 2s .c o m*/ TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test"); builder.addTupleInput(new Path(input), new TupleMapper<ITuple, NullWritable>() { @Override public void map(ITuple iTuple, NullWritable nullWritable, TupleMRContext context, Collector collector) throws IOException, InterruptedException { collector.write(fillTuple(true, iTuple)); } }); builder.setTupleReducer(new TupleReducer<ITuple, NullWritable>() { @Override public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector) throws IOException, InterruptedException, TupleMRException { for (ITuple tuple : tuples) { collector.write(fillTuple(true, tuple), NullWritable.get()); } } }); builder.addIntermediateSchema(SCHEMA); builder.setGroupByFields(SCHEMA.getField(0).getName()); builder.setTupleOutput(new Path(output), SCHEMA); Job job = builder.createJob(); job.setNumReduceTasks(1); try { assertRun(job); } finally { builder.cleanUpInstanceFiles(); } final AtomicInteger count = new AtomicInteger(); readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() { @Override public void onTuple(ITuple tuple) { count.incrementAndGet(); } }); assertEquals(NUM_ROWS_TO_GENERATE, count.get()); trash(input); trash(output); }