Example usage for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get()

Source Link

Document

Returns the single instance of this class.

Usage

From source file:com.rw.legion.input.LegionRecordReader.java

License:Apache License

@Override
public NullWritable getCurrentKey() {
    return NullWritable.get();
}

From source file:com.skp.experiment.common.mapreduce.IdentityMapper.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//  w  ww. ja  va  2s .c o  m
protected void map(K key, V value, Context context) throws IOException, InterruptedException {
    if (valueOnlyOut) {
        context.write((K) NullWritable.get(), value);
    } else if (keyOnlyOut) {
        context.write(key, (V) NullWritable.get());
    } else {
        context.write(key, value);
    }
}

From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizerReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    StringBuilder builder = new StringBuilder();
    String rightText = null;/*from   w w w .  j a  v  a 2  s.c  o  m*/
    List<String> zeros = new ArrayList<String>();
    for (Text item : values) {
        String valueItem = item.toString();
        String[] tokens = valueItem.split(":");
        int side;
        try {
            side = Integer.parseInt(tokens[1]);
        } catch (NumberFormatException nfe) {
            throw new NumberFormatException("valueItem: " + valueItem);
        }
        if (side == 1) {
            rightText = tokens[0];
        } else {
            zeros.add(tokens[0]);
        }
    }
    for (String item : zeros) {
        builder.append(item);
        if (rightText != null) {
            builder.append('\t');
            builder.append(rightText);
        }
        context.write(new Text(builder.toString()), NullWritable.get());
        builder.setLength(0);
    }

}

From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizerTest.java

License:Apache License

@Test
public void testReducer() throws Exception {
    List<Text> list = new LinkedList<Text>();
    list.add(new Text("1\t2:0"));
    list.add(new Text("2\t2:0"));
    list.add(new Text("3\t2:0"));
    list.add(new Text("6:1"));
    list.add(new Text("5\t2:0"));
    reduceDriver.addInput(new Text("2"), list);
    reduceDriver.addOutput(new Text("1\t2\t6"), NullWritable.get());
    reduceDriver.addOutput(new Text("2\t2\t6"), NullWritable.get());
    reduceDriver.addOutput(new Text("3\t2\t6"), NullWritable.get());
    reduceDriver.addOutput(new Text("5\t2\t6"), NullWritable.get());
    List<Pair<Text, NullWritable>> results = reduceDriver.run();
    assertTrue(results.size() == 4);//from  w w  w . jav a2 s. c  o  m
}

From source file:com.splout.db.benchmark.BenchmarkStoreTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Benchmark-Store Tool");
    try {/*from   ww w.  j  a v  a  2s .  co  m*/
        jComm.parse(args);
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        jComm.usage();
        return -1;
    } catch (Throwable t) {
        t.printStackTrace();
        jComm.usage();
        return -1;
    }

    // Create some input files that will represent the partitions to generate
    Path out = new Path(output);
    FileSystem outFs = out.getFileSystem(getConf());
    HadoopUtils.deleteIfExists(outFs, out);

    Integer min, max, eachPartition;
    int maxKeyDigits;

    try {
        String[] minMax = keySpace.split(":");
        min = Integer.parseInt(minMax[0]);
        max = Integer.parseInt(minMax[1]);
        maxKeyDigits = max.toString().length();

        eachPartition = (max - min) / nPartitions;
    } catch (Exception e) {
        throw new IllegalArgumentException(
                "Key range format is not valid. It must be minKey:maxKey where both minKey and maxKey are integers.");
    }

    FileSystem inFs = FileSystem.get(getConf());
    Path input = new Path("benchmark-store-tool-" + System.currentTimeMillis());
    HadoopUtils.deleteIfExists(inFs, input);
    inFs.mkdirs(input);

    List<PartitionEntry> partitionEntries = new ArrayList<PartitionEntry>();

    // Create as many input files as partitions
    // Each input file will have as value the range that each Mapper will write
    String paddingExp = "%0" + (padding != null ? padding : maxKeyDigits) + "d";
    for (int i = 0; i < nPartitions; i++) {
        int thisMin = (i * eachPartition);
        int thisMax = (i + 1) * eachPartition;
        HadoopUtils.stringToFile(inFs, new Path(input, i + ".txt"), i + "\t" + thisMin + ":" + thisMax);
        PartitionEntry entry = new PartitionEntry();
        entry.setMin(String.format(paddingExp, thisMin));
        entry.setMax(String.format(paddingExp, thisMax));
        entry.setShard(i);
        partitionEntries.add(entry);
    }

    partitionEntries.get(0).setMin(null);
    partitionEntries.get(partitionEntries.size() - 1).setMax(null);

    PartitionMap partitionMap = new PartitionMap(partitionEntries);
    HadoopUtils.stringToFile(outFs, new Path(out, "partition-map"), JSONSerDe.ser(partitionMap));

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, Type.INT));
    fields.addAll(Fields.parse("key:int, value:string"));
    final Schema schema = new Schema(tablename, fields);

    byte[] valueArray = new byte[valueSize];
    for (int i = 0; i < valueSize; i++) {
        valueArray[i] = 'A';
    }
    final String theValue = new String(valueArray);

    if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) {
        File nativeLibs = new File("native");
        if (nativeLibs.exists()) {
            SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
        }
    }

    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    TableSpec tableSpec = new TableSpec(schema, schema.getFields().get(1));

    job.setOutput(new Path(out, "store"),
            new SploutSQLProxyOutputFormat(new SQLite4JavaOutputFormat(1000000, tableSpec)), ITuple.class,
            NullWritable.class);
    job.addInput(input, new HadoopInputFormat(TextInputFormat.class),
            new MapOnlyMapper<LongWritable, Text, ITuple, NullWritable>() {

                ITuple metaTuple = new Tuple(schema);

                protected void map(LongWritable key, Text value, Context context)
                        throws IOException, InterruptedException {

                    String[] partitionRange = value.toString().split("\t");
                    Integer partition = Integer.parseInt(partitionRange[0]);
                    metaTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, partition);
                    String[] minMax = partitionRange[1].split(":");
                    Integer min = Integer.parseInt(minMax[0]);
                    Integer max = Integer.parseInt(minMax[1]);
                    for (int i = min; i < max; i++) {
                        metaTuple.set("key", i);
                        metaTuple.set("value", theValue);
                        context.write(metaTuple, NullWritable.get());
                    }
                }
            });

    job.createJob().waitForCompletion(true);

    HadoopUtils.deleteIfExists(inFs, input);
    return 0;
}

From source file:com.splout.db.hadoop.SumReducer.java

License:Apache License

public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException, TupleMRException {
    int totalCount = 0;
    ITuple outTuple = null;//from  w ww  .j  a  va 2 s  .c  o  m
    for (ITuple tuple : tuples) {
        totalCount += (Integer) tuple.get(field);
        outTuple = tuple;
    }
    outTuple.set(field, totalCount);
    collector.write(outTuple, NullWritable.get());
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

@SuppressWarnings("deprecation")
private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf,
        Path outputPath, final int nSplits) throws TupleSamplerException {

    MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath);

    for (Table table : tablespace.getPartitionedTables()) {
        final TableSpec tableSpec = table.getTableSpec();
        final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript();
        for (TableInput inputFile : table.getFiles()) {
            final RecordProcessor processor = inputFile.getRecordProcessor();
            for (Path path : inputFile.getPaths()) {
                builder.addInput(path, inputFile.getFormat(),
                        new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() {

                            final int nSamples = (int) (sampleSize / nSplits);
                            final String[] samples = new String[nSamples];

                            CounterInterface counterInterface;
                            long recordCounter = 0;

                            JavascriptEngine jsEngine = null;

                            @Override
                            protected void setup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                counterInterface = new CounterInterface(context);
                                // Initialize JavaScript engine if needed
                                if (getPartitionByJavaScript != null) {
                                    try {
                                        jsEngine = new JavascriptEngine(getPartitionByJavaScript);
                                    } catch (Throwable e) {
                                        throw new RuntimeException(e);
                                    }/* w ww .  j  a  v  a2 s.  c  om*/
                                }
                            }

                        ;

                            // Collect Tuples with decreasing probability
                            // (http://en.wikipedia.org/wiki/Reservoir_sampling)
                            protected void map(ITuple key, NullWritable value, Context context)
                                    throws IOException, InterruptedException {
                                ITuple uTuple;
                                try {
                                    uTuple = processor.process(key, key.getSchema().getName(),
                                            counterInterface);
                                } catch (Throwable e) {
                                    throw new RuntimeException(e);
                                }
                                if (uTuple == null) { // user may have filtered the record
                                    return;
                                }

                                long reservoirIndex;
                                if (recordCounter < nSamples) {
                                    reservoirIndex = recordCounter;
                                } else {
                                    reservoirIndex = (long) (Math.random() * recordCounter);
                                }

                                if (reservoirIndex < nSamples) {
                                    String pkey = null;
                                    try {
                                        pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec,
                                                jsEngine);
                                    } catch (Throwable e) {
                                        throw new RuntimeException("Error when determining partition key.", e);
                                    }
                                    samples[(int) reservoirIndex] = pkey;
                                }

                                recordCounter++;
                            }

                            // Write the in-memory sampled Tuples
                            protected void cleanup(Context context, MultipleOutputsCollector coll)
                                    throws IOException, InterruptedException {
                                Text key = new Text();
                                for (String keyStr : samples) {
                                    if (keyStr != null) {
                                        key.set(keyStr);
                                        context.write(key, NullWritable.get());
                                    }
                                }
                            }
                        }, inputFile.getSpecificHadoopInputFormatContext());
            }
        }
    }
    // Set output path
    Path outReservoirPath = new Path(outputPath + "-reservoir");
    builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
            NullWritable.class);
    builder.setJarByClass(callingClass);

    try {
        Job job = null;
        job = builder.createJob();

        if (!job.waitForCompletion(true)) {
            throw new TupleSamplerException("Reservoir Sampling failed!");
        }
    } catch (Exception e) {
        throw new TupleSamplerException("Error creating or launching the sampling job.", e);
    } finally {
        try {
            builder.cleanUpInstanceFiles();
        } catch (IOException e) {
            throw new TupleSamplerException("Error cleaning up the sampling job.", e);
        }
    }

    long retrievedSamples = 0;
    try {
        FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf);
        if (outFs.listStatus(outReservoirPath) == null) {
            throw new IOException("Output folder not created: the Job failed!");
        }

        retrievedSamples = 0;
        // Instantiate the writer we will write samples to
        SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class,
                NullWritable.class);

        // Aggregate the output into a single file for being consistent with the other sampling methods
        for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) {
            Path thisPath = fileStatus.getPath();
            if (thisPath.getName().startsWith("part-m-")) {
                SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf);
                Text key = new Text();
                while (reader.next(key)) {
                    writer.append(key, NullWritable.get());
                    retrievedSamples++;
                }
                reader.close();
            }
        }

        writer.close();
        outFs.delete(outReservoirPath, true);
    } catch (IOException e) {
        throw new TupleSamplerException("Error consolidating the sample job results into one file.", e);
    }

    return retrievedSamples;
}

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//  ww  w  . j  ava 2s .  c  o  m
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.splout.db.integration.RetailDemo.java

License:Apache License

public void generate(long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath)
        throws Exception {
    Configuration conf = new Configuration();

    FileSystem fS = FileSystem.get(conf);
    HadoopUtils.deleteIfExists(fS, inputPath);
    HadoopUtils.deleteIfExists(fS, outputPath);

    NullWritable nullValue = NullWritable.get();
    Schema retailSchema = new Schema("retail", Fields
            .parse("tienda:string, cliente:int, ticket:double, producto:int, precio:double, fecha:string"));
    ITuple tuple = new Tuple(retailSchema);

    TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, retailSchema);

    // Writes nRegs Tuples to HDFS
    long soFar = 0;

    while (soFar < nRegs) {
        int tienda = (int) (Math.random() * N_TIENDAS);
        int cliente = (int) (Math.random() * N_CLIENTES);

        tuple.set("tienda", "T" + tienda);
        tuple.set("cliente", cliente);
        double[] precios = new double[N_PRODUCTOS_PER_TICKET];
        double ticket = 0;
        for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) {
            precios[i] = ((int) (Math.random() * MAX_PRECIO * 100)) / 100;
            precios[i] = Math.max(precios[i], 5.00);
            ticket += precios[i];/*w ww. j  a va2s. c o  m*/
        }
        tuple.set("ticket", ticket);
        long fecha = System.currentTimeMillis() - ((long) (Math.random() * DAY_SPAN * 24 * 60 * 60 * 1000));
        tuple.set("fecha", fmt.print(fecha));
        for (int i = 0; i < N_PRODUCTOS_PER_TICKET; i++) {
            int producto = (int) (Math.random() * N_PRODUCTOS);
            tuple.set("precio", precios[i]);
            tuple.set("producto", producto);
            writer.append(tuple);
            soFar++;
        }
    }
    writer.close();

    // Generate Splout view (cliente)
    String[] dnodeArray = dnodes.split(",");
    TablespaceSpec tablespace = TablespaceSpec.of(retailSchema, "cliente", inputPath, new TupleInputFormat(),
            dnodeArray.length);
    TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath);
    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    PartitionMap partitionMap = generateView.getPartitionMap();
    ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodeArray);

    Path deployUri = new Path(outputPath, "store").makeQualified(fS);

    SploutClient client = new SploutClient(qnode);
    client.deploy("retailcliente", partitionMap, replicationMap, deployUri.toUri());

    // Generate Splout view (tienda)
    Path output2 = new Path(outputPath + "-2");
    HadoopUtils.deleteIfExists(fS, output2);
    tablespace = TablespaceSpec.of(retailSchema, "tienda", inputPath, new TupleInputFormat(),
            dnodeArray.length);
    generateView = new TablespaceGenerator(tablespace, output2);

    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    partitionMap = generateView.getPartitionMap();
    deployUri = new Path(output2, "store").makeQualified(fS);
    client.deploy("retailtienda", partitionMap, replicationMap, deployUri.toUri());
}

From source file:com.splout.db.integration.TestDemo.java

License:Apache License

public void generate(int nPartitions, long nRegs, String dnodes, String qnode, Path inputPath, Path outputPath)
        throws Exception {
    Configuration conf = new Configuration();

    FileSystem fS = FileSystem.get(conf);
    HadoopUtils.deleteIfExists(fS, inputPath);
    HadoopUtils.deleteIfExists(fS, outputPath);

    NullWritable nullValue = NullWritable.get();
    TupleFile.Writer writer = new TupleFile.Writer(fS, conf, inputPath, SploutHadoopTestUtils.SCHEMA);

    // Writes nRegs Tuples to HDFS
    long soFar = 0;
    while (soFar < nRegs) {
        writer.append(SploutHadoopTestUtils.getTuple("id" + soFar, (int) soFar));
        soFar++;/*w  ww .  jav a 2  s  .  co  m*/
    }
    writer.close();

    // Generate Splout view
    TablespaceSpec tablespace = TablespaceSpec.of(SploutHadoopTestUtils.SCHEMA, "id", inputPath,
            new TupleInputFormat(), nPartitions);
    TablespaceGenerator generateView = new TablespaceGenerator(tablespace, outputPath);
    generateView.generateView(conf, SamplingType.DEFAULT, new TupleSampler.DefaultSamplingOptions());
    PartitionMap partitionMap = generateView.getPartitionMap();
    ReplicationMap replicationMap = ReplicationMap.oneToOneMap(dnodes.split(","));

    Path deployUri = new Path(outputPath, "store").makeQualified(fS);

    SploutClient client = new SploutClient(qnode);
    client.deploy("tablespace1", partitionMap, replicationMap, deployUri.toUri());
}