Example usage for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get()

Source Link

Document

Returns the single instance of this class.

Usage

From source file:com.couchbase.sqoop.mapreduce.CouchbaseImportMapper.java

License:Apache License

@Override
public void map(Text key, SqoopRecord val, Context context) throws IOException, InterruptedException {
    try {/*from w ww .  j  a  v a  2s. c om*/
        val.loadLargeObjects(lobLoader);
    } catch (SQLException sqlE) {
        throw new IOException(sqlE);
    }

    outkey.set(val.toString());
    context.write(outkey, NullWritable.get());
}

From source file:com.csiro.hadoop.WritableTest.java

public static void main(String[] args) {
    System.out.println("*** Primitive Writable ***");

    BooleanWritable bool1 = new BooleanWritable(true);
    ByteWritable byte1 = new ByteWritable((byte) 3);
    System.out.printf("Boolean:%s Byte:%d\n", bool1, byte1.get());

    IntWritable int1 = new IntWritable(5);
    IntWritable int2 = new IntWritable(17);
    System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get());

    int1.set(int2.get());
    System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get());

    Integer int3 = new Integer(23);
    int1.set(int3);
    System.out.printf("I1:%d I2:%d\n", int1.get(), int2.get());

    System.out.println("*** Array Writable ***");

    ArrayWritable a = new ArrayWritable(IntWritable.class);
    a.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) });

    IntWritable[] values = (IntWritable[]) a.get();
    for (IntWritable i : values) {
        System.out.println(i);//  w  ww  . j  av  a2s .  c  o m
    }

    IntArrayWritable ia = new IntArrayWritable();
    ia.set(new IntWritable[] { new IntWritable(1), new IntWritable(3), new IntWritable(5) });

    IntWritable[] ivalues = (IntWritable[]) ia.get();

    ia.set((new LongWritable[] { new LongWritable(10001) }));

    System.out.println("*** Map Writables ***");

    MapWritable m = new MapWritable();
    IntWritable key1 = new IntWritable(5);
    NullWritable value1 = NullWritable.get();

    m.put(key1, value1);
    System.out.println(m.containsKey(key1));
    System.out.println(m.get(key1));
    m.put(new LongWritable(100000000), key1);
    Set<Writable> keys = m.keySet();

    for (Writable k : keys)
        System.out.println(k.getClass());

}

From source file:com.datasalt.pangool.examples.gameoflife.GameOfLifeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        failArguments("Wrong number of arguments");
        return -1;
    }/*from w  ww.ja  va 2s .  c o m*/
    String output = args[0];
    String input = GameOfLifeJob.class.getName() + "-prepared-input";
    delete(output);
    delete(input);

    final int gridSize = Integer.parseInt(args[1]);
    // Write the input of the job as a set of (min, max) intervals
    // Each number between (min, max) represents a possible initial configuration for Game of Life
    int parallelism = Integer.parseInt(args[2]);
    int maxCombinations = (int) Math.pow(2, gridSize * gridSize);
    int splitSize = maxCombinations / parallelism;
    FileSystem fS = FileSystem.get(conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fS.create(new Path(input))));
    for (int i = 0; i < parallelism; i++) {
        writer.write(((i * splitSize) + 1) + "\t" + ((i + 1) * splitSize) + "\n");
    }
    writer.close();

    // Optional parameters: maxX, maxY, #iterations
    final int maxX = conf.getInt("gol.max_x", 32);
    final int maxY = conf.getInt("gol.max_y", 32);
    final int iterations = conf.getInt("gol.iterations", 1000);
    Log.info(
            "using parameters: maxX grid: " + maxX + " maxY grid: " + maxY + " max #iterations: " + iterations);

    // Define the intermediate schema: a pair of ints
    final Schema schema = new Schema("minMax", Fields.parse("min:int, max:int"));

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);
    job.setGroupByFields("min", "max");
    job.setCustomPartitionFields("min");
    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                Tuple tuple = new Tuple(schema);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] fields = value.toString().split("\t");
                    tuple.set("min", Integer.parseInt(fields[0]));
                    tuple.set("max", Integer.parseInt(fields[1]));
                    collector.write(tuple);
                }
            });

    // Define the reducer
    // The reducer will run as many games of life as (max - min) for each interval it receives
    // It will emit the inputs of GOL that converged together with the number of iterations
    // Note that inputs that produce grid overflow are ignored (but may have longer iteration convergence)
    job.setTupleReducer(new TupleReducer<Text, NullWritable>() {

        public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException, TupleMRException {

            int min = (Integer) group.get("min"), max = (Integer) group.get("max");
            for (int i = min; i < max; i++) {
                try {
                    GameOfLife gameOfLife = new GameOfLife(gridSize, GameOfLife.longToBytes((long) i), maxX,
                            maxY, iterations);
                    while (true) {
                        gameOfLife.nextCycle();
                    }
                } catch (GameOfLifeException e) {
                    context.getHadoopContext().progress();
                    context.getHadoopContext().getCounter("stats", e.getCauseMessage() + "").increment(1);
                    if (e.getCauseMessage().equals(CauseMessage.CONVERGENCE_REACHED)) {
                        collector.write(new Text(
                                Arrays.toString(GameOfLife.longToBytes((long) i)) + "\t" + e.getIterations()),
                                NullWritable.get());
                    }
                }
            }
        };
    });

    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
            NullWritable.class);
    try {
        job.createJob().waitForCompletion(true);
    } finally {
        job.cleanUpInstanceFiles();
    }
    delete(input);
    return 0;
}

From source file:com.datasalt.pangool.solr.TupleSolrOutputFormatExample.java

License:Apache License

public int run(String input, String output, Configuration conf) throws Exception {
    // Define the intermediate schema: It must match SOLR's schema.xml!
    final Schema schema = new Schema("iSchema", Fields.parse("user_id:string, message:string"));

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);//w w  w. j a  v a 2 s.c om
    job.setGroupByFields("user_id");
    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                Tuple tuple = new Tuple(schema);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] fields = value.toString().split("\t");
                    String language = fields[1];
                    tuple.set("user_id", fields[0]);
                    tuple.set("message", fields[2]);
                    if (language.equals("en")) {
                        // English -> write to main output
                        collector.write(tuple);
                    } else if (language.equals("fr")) {
                        // French -> write to french index
                        collector.getNamedOutput("FR").write(tuple, NullWritable.get());
                    } else if (language.equals("es")) {
                        // Spanish -> write to spanish index
                        collector.getNamedOutput("ES").write(tuple, NullWritable.get());
                    }
                }
            });
    // Add multi-output: French index
    job.addNamedOutput("FR", new TupleSolrOutputFormat(new File("src/test/resources/solr-fr"), conf),
            ITuple.class, NullWritable.class);
    // Add multi-output: Spanish index
    job.addNamedOutput("ES", new TupleSolrOutputFormat(new File("src/test/resources/solr-es"), conf),
            ITuple.class, NullWritable.class);
    job.setTupleReducer(new IdentityTupleReducer());
    // Add multi-output: English index
    job.setOutput(new Path(output), new TupleSolrOutputFormat(new File("src/test/resources/solr-en"), conf),
            ITuple.class, NullWritable.class);
    Job hadoopJob = job.createJob();
    try {
        hadoopJob.waitForCompletion(true);
        if (!hadoopJob.isSuccessful()) {
            throw new PangoolRuntimeException("Job was not sucessfull");
        }
    } finally {
        job.cleanUpInstanceFiles();
    }
    return 0;
}

From source file:com.datasalt.pangool.tuplemr.IdentityTupleReducer.java

License:Apache License

@Override
public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException, TupleMRException {

    for (ITuple tuple : tuples) {
        collector.write(tuple, NullWritable.get());
    }/* w ww . j  a v  a  2  s  . c om*/
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java

License:Apache License

@Override
public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext)
        throws IOException, InterruptedException {

    HCatInputFormat iF = new HCatInputFormat();

    @SuppressWarnings("rawtypes")
    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
            taskContext);/*from   w  w  w  . j a v a2  s . c om*/

    return new RecordReader<ITuple, NullWritable>() {

        ITuple tuple = new Tuple(pangoolSchema);

        @Override
        public void close() throws IOException {
            hCatRecordReader.close();
        }

        @Override
        public ITuple getCurrentKey() throws IOException, InterruptedException {
            HCatRecord record = hCatRecordReader.getCurrentValue();
            // Perform conversion between HCatRecord and Tuple
            for (int pos = 0; pos < schema.size(); pos++) {
                tuple.set(pos, record.get(pos));
            }
            return tuple;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return hCatRecordReader.getProgress();
        }

        @Override
        public void initialize(InputSplit iS, TaskAttemptContext context)
                throws IOException, InterruptedException {
            hCatRecordReader.initialize(iS, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return hCatRecordReader.nextKeyValue();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestMultipleOutputs.java

License:Apache License

@Test
public void test() throws TupleMRException, IOException, InterruptedException, ClassNotFoundException,
        InstantiationException, IllegalAccessException {

    initHadoop();//from w  w w. jav a 2 s.  c  om
    trash(INPUT, OUTPUT);

    // One file with one line - context will be ignored
    // Business logic in {@link MyInputProcessor}
    CommonUtils.writeTXT("ignore-me", new File(INPUT));

    getConf().set("mapred.output.compress", "true");
    getConf().set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    Schema baseSchema = new Schema("schema", Fields.parse("name:string, money:int, country:string"));
    builder.addIntermediateSchema(baseSchema);
    builder.setGroupByFields("country");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("money", Order.DESC).add("name", Order.ASC));
    builder.addInput(new Path(INPUT), new HadoopInputFormat(TextInputFormat.class), new MyInputProcessor());
    builder.setTupleReducer(new MyGroupHandler());
    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(SequenceFileOutputFormat.class),
            DoubleWritable.class, NullWritable.class);
    // Configure extra outputs
    builder.addNamedOutput(OUTPUT_1, new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
            Utf8.class);
    builder.addNamedOutput(OUTPUT_2, new HadoopOutputFormat(SequenceFileOutputFormat.class), IntWritable.class,
            NullWritable.class);
    builder.addNamedTupleOutput(TUPLEOUTPUT_1, baseSchema);

    getConf().setClass(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, SequenceFileOutputFormat.class,
            OutputFormat.class);
    Job job = builder.createJob();
    try {
        assertRun(job);
    } finally {
        builder.cleanUpInstanceFiles();
    }

    // Check outputs

    checkCompression(firstReducerOutput(OUTPUT + "/" + OUTPUT_1), DefaultCodec.class);
    checkCompression(firstReducerOutput(OUTPUT + "/" + OUTPUT_2), DefaultCodec.class);
    checkCompression(firstMapOutput(OUTPUT + "/" + OUTPUT_1), DefaultCodec.class);
    checkCompression(firstMapOutput(OUTPUT + "/" + OUTPUT_2), DefaultCodec.class);
    checkCompression(firstReducerOutput(OUTPUT), DefaultCodec.class);

    withOutput(firstReducerOutput(OUTPUT), new DoubleWritable(1.0), NullWritable.get());
    withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES"));
    withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES"));
    withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get());
    withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get());

    Tuple tuple = new Tuple(baseSchema);
    tuple.set(0, "Pere");
    tuple.set(1, 100);
    tuple.set(2, "ES");

    withTupleOutput(firstMapOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple);
    withTupleOutput(firstReducerOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple);

    trash(INPUT, OUTPUT);
    cleanUp();
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleHashPartitioner.java

License:Apache License

@Test
public void multipleSourcesTest() throws TupleMRException, IOException {
    Configuration conf = getConf();
    TupleHashPartitioner partitioner = new TupleHashPartitioner();

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("number1", Type.INT));
    fields.add(Field.create("string1", Type.STRING));
    fields.add(Field.create("string2", Type.STRING));
    Schema schema1 = new Schema("test1", fields);

    fields = new ArrayList<Field>();
    fields.add(Field.create("number1", Type.INT));
    fields.add(Field.create("string1", Type.STRING));
    fields.add(Field.create("number2", Type.LONG));
    Schema schema2 = new Schema("test2", fields);

    TupleMRConfigBuilder builder = new TupleMRConfigBuilder();
    builder.addIntermediateSchema(schema1);
    builder.addIntermediateSchema(schema2);
    builder.setGroupByFields("number1", "string1");
    TupleMRConfig tupleMRConf = builder.buildConf();
    TupleMRConfig.set(tupleMRConf, conf);

    partitioner.setConf(conf);// w ww.  ja  v a2s.  com

    ITuple tuple = new Tuple(schema1);
    tuple.set("number1", 35);
    tuple.set("string1", "foo");

    // Check that for the same prefix (number1, string1) we obtain the same partition

    int partitionId = -N_PARTITIONS;
    for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
        tuple.set("string2", TestUtils.randomString(10));
        int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(),
                N_PARTITIONS);
        Assert.assertTrue(thisPartitionId >= 0);
        Assert.assertTrue(thisPartitionId < N_PARTITIONS);
        if (partitionId == -N_PARTITIONS) {
            partitionId = thisPartitionId;
        } else {
            // Check that the returned partition is always the same even if "string2" field changes its value
            Assert.assertEquals(thisPartitionId, partitionId);
        }
    }

    // On the other hand, check that when we vary one of the group by fields, partition varies

    int partitionMatches[] = new int[N_PARTITIONS];
    for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
        tuple.set("string1", TestUtils.randomString(10));
        int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(),
                N_PARTITIONS);
        Assert.assertTrue(thisPartitionId >= 0);
        Assert.assertTrue(thisPartitionId < N_PARTITIONS);
        partitionMatches[thisPartitionId]++;
        ;
    }

    for (int i = 0; i < partitionMatches.length; i++) {
        if (partitionMatches[i] == 0) {
            throw new AssertionError(
                    "Partition matches: 0 for partition " + i + ". Seems like a bug in the Partitioner.");
        }
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleHashPartitioner.java

License:Apache License

@Test
public void sanityTest() throws TupleMRException, IOException {
    // This is a basic sanity test for checking that the partitioner works for nPartitions > 1

    Configuration conf = getConf();
    TupleHashPartitioner partitioner = new TupleHashPartitioner();

    List<Field> fields = new ArrayList<Field>();
    // We use one INT field - we'll put random numbers in it
    fields.add(Field.create("foo", Type.INT));
    Schema schema = new Schema("test", fields);

    TupleMRConfigBuilder builder = new TupleMRConfigBuilder();
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("foo");
    TupleMRConfig tupleMRConf = builder.buildConf();
    TupleMRConfig.set(tupleMRConf, conf);

    partitioner.setConf(conf);/*from w ww.j a va2 s .com*/

    ITuple tuple = new Tuple(schema);

    int partitionMatches[] = new int[N_PARTITIONS];

    for (int i = 0; i < MAX_ITERATIONS_OVER_ONE_SCHEMA; i++) {
        tuple.set("foo", (int) (Math.random() * Integer.MAX_VALUE));
        int thisPartitionId = partitioner.getPartition(new DatumWrapper(tuple), NullWritable.get(),
                N_PARTITIONS);
        Assert.assertTrue(thisPartitionId >= 0);
        Assert.assertTrue(thisPartitionId < N_PARTITIONS);
        partitionMatches[thisPartitionId]++;
        ;
    }

    for (int i = 0; i < partitionMatches.length; i++) {
        if (partitionMatches[i] == 0) {
            throw new AssertionError(
                    "Partition matches: 0 for partition " + i + ". Seems like a bug in the Partitioner.");
        }
    }
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestTupleMRJob.java

License:Apache License

@Test
public void testFillingTuplesJob()
        throws IOException, ClassNotFoundException, InterruptedException, TupleMRException {
    int NUM_ROWS_TO_GENERATE = 100;

    Configuration conf = getConf();
    String input = TestTupleMRJob.class + "-input";
    String output = TestTupleMRJob.class + "-output";

    ITuple tuple = new Tuple(SCHEMA);
    for (int i = 0; i < NUM_ROWS_TO_GENERATE; i++) {
        withTupleInput(input, fillTuple(true, tuple));
    }/*from  ww w. j a v  a  2s  .c  o m*/

    TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test");
    builder.addTupleInput(new Path(input), new TupleMapper<ITuple, NullWritable>() {

        @Override
        public void map(ITuple iTuple, NullWritable nullWritable, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException {
            collector.write(fillTuple(true, iTuple));
        }
    });
    builder.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {
        @Override
        public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException, TupleMRException {
            for (ITuple tuple : tuples) {
                collector.write(fillTuple(true, tuple), NullWritable.get());
            }
        }
    });
    builder.addIntermediateSchema(SCHEMA);
    builder.setGroupByFields(SCHEMA.getField(0).getName());
    builder.setTupleOutput(new Path(output), SCHEMA);

    Job job = builder.createJob();
    job.setNumReduceTasks(1);
    try {
        assertRun(job);
    } finally {
        builder.cleanUpInstanceFiles();
    }

    final AtomicInteger count = new AtomicInteger();
    readTuples(new Path(output + "/part-r-00000"), getConf(), new TupleVisitor() {

        @Override
        public void onTuple(ITuple tuple) {
            count.incrementAndGet();
        }
    });

    assertEquals(NUM_ROWS_TO_GENERATE, count.get());

    trash(input);
    trash(output);
}