Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java

License:Apache License

@Test
public void testMapper() {
    MatchedElement me = new MatchedElement(10, 11, "1",
            "(?i)developed\\s{1,2}?((\\d+|zero|one|two|three|four|five|six|seven|eight|nine|ten))\\p{Punct}{1,2}?(?:\\d+?|zero|one|two|three|four|five|six|seven|eight|nine|ten)",
            0.005767844268204758);//from w  ww .  ja va  2  s  . com
    MatchedElementWritable mew = new MatchedElementWritable(me);
    mapDriver.withInput(NullWritable.get(), new BytesWritable("p0|d0|2015-06-08\ndeveloped 1.1".getBytes()));
    mapDriver.withOutput(new Text("p0|d0|2015-06-08"), mew);
    mapDriver.setValueComparator(new Comparator<MatchedElementWritable>() {
        @Override
        public int compare(MatchedElementWritable o1, MatchedElementWritable o2) {
            if (o1.getMatchedElement().getStartPos() != o2.getMatchedElement().getStartPos()) {
                return o1.getMatchedElement().getStartPos() - o2.getMatchedElement().getEndPos();
            }
            if (o1.getMatchedElement().getEndPos() != o2.getMatchedElement().getEndPos()) {
                return o1.getMatchedElement().getEndPos() - o2.getMatchedElement().getEndPos();
            }
            if (!o1.getMatchedElement().getMatch().equals(o2.getMatchedElement().getMatch())) {
                return o1.getMatchedElement().getMatch().compareTo(o2.getMatchedElement().getMatch());
            }
            return 0;
        }
    });
    try {
        mapDriver.runTest();
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}

From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java

License:Apache License

@Test
public void testReducer() {
    List<MatchedElementWritable> mewList = new ArrayList<>();
    MatchedElementWritable mew = new MatchedElementWritable(new MatchedElement(10, 11, "1", "", 1));
    mewList.add(mew);/* w  w w.  ja  v  a2 s .  c o  m*/
    Text output = new Text("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
            + "<!DOCTYPE collection SYSTEM \"BioC.dtd\">" + "<collection>" + "<source></source>"
            + "<date></date>" + "<key></key>" + "<document>" + "<id>d0</id>"
            + "<infon key=\"date\">2015-06-08</infon>" + "<infon key=\"Patient ID\">p0</infon>" + "<passage>"
            + "<offset>0</offset>" + "<annotation id=\"\">"
            + "<infon key=\"dateTime\">2015-06-17T21:44Z</infon>" + "<infon key=\"confidence\">1.0</infon>"
            + "<infon key=\"type\">pain</infon>" + "<location offset=\"10\" length=\"1\"></location>"
            + "<text>1</text>" + "</annotation>" + "</passage>" + "</document>" + "</collection>");
    reduceDriver.withInput(new Text("p0|d0|2015-06-08"), mewList);
    reduceDriver.withOutput(output, NullWritable.get());
    reduceDriver.setKeyComparator(new BioCXMLComparator());
    try {
        reduceDriver.runTest();
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}

From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java

License:Apache License

@Test
public void testMapReduce() {
    mapReduceDriver.withInput(NullWritable.get(),
            new BytesWritable("p0|d0|2015-06-08\ndeveloped 1.1".getBytes()));
    Text output = new Text("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
            + "<!DOCTYPE collection SYSTEM \"BioC.dtd\">" + "<collection>" + "<source></source>"
            + "<date></date>" + "<key></key>" + "<document>" + "<id>d0</id>"
            + "<infon key=\"date\">2015-06-08</infon>" + "<infon key=\"Patient ID\">p0</infon>" + "<passage>"
            + "<offset>0</offset>" + "<annotation id=\"\">"
            + "<infon key=\"dateTime\">2015-06-17T21:44Z</infon>" + "<infon key=\"confidence\">1.0</infon>"
            + "<infon key=\"type\">pain</infon>" + "<location offset=\"10\" length=\"1\"></location>"
            + "<text>1</text>" + "</annotation>" + "</passage>" + "</document>" + "</collection>");
    mapReduceDriver.withOutput(output, NullWritable.get());
    mapReduceDriver.setKeyComparator(new BioCXMLComparator());
    try {//from ww  w  .  jav  a 2s  . c  o  m
        mapReduceDriver.runTest();
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}

From source file:gr.ntua.h2rdf.loadTriples.TotalOrderPartitioner.java

License:Apache License

/**
 * Read the cut points from the given IFile.
 * @param fs The file system/*from ww  w.  j  av a2s .  c  o m*/
 * @param p The path to read
 * @param keyClass The map output key class
 * @param job The job config
 * @throws IOException
 */
// matching key types enforced by passing in
@SuppressWarnings("unchecked") // map output key class
private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass, Configuration conf) throws IOException {
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
    ArrayList<K> parts = new ArrayList<K>();
    K key = ReflectionUtils.newInstance(keyClass, conf);
    NullWritable value = NullWritable.get();
    while (reader.next(key, value)) {
        parts.add(key);
        key = ReflectionUtils.newInstance(keyClass, conf);
    }
    reader.close();
    return parts.toArray((K[]) Array.newInstance(keyClass, parts.size()));
}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //compute sample partitions
    FileSystem fs;/*from  w  w w. ja  v a  2 s.  com*/
    Configuration conf = new Configuration();
    int collected = 0, chunks = 0;
    try {
        fs = FileSystem.get(conf);
        Path sampleDir = new Path("sample");
        FileStatus[] samples = fs.listStatus(sampleDir);
        TreeSet<String> set = new TreeSet<String>();
        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                //System.out.println(tr[0].toN3());
                set.add(tr[0].toN3());
                set.add(tr[1].toN3());
                set.add(tr[2].toN3());
            }
            in1.close();
            in.close();
        }

        IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index");
        HashMap<String, Long> index = translator.translate(set);
        set.clear();
        TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>(
                new ImmutableBytesWritable.Comparator());

        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()),
                        index.get(tr[2].toN3()));
                set1.add(new ImmutableBytesWritable(btr.getSPOByte()));
                set1.add(new ImmutableBytesWritable(btr.getSOPByte()));
                set1.add(new ImmutableBytesWritable(btr.getOPSByte()));
                set1.add(new ImmutableBytesWritable(btr.getOSPByte()));
                set1.add(new ImmutableBytesWritable(btr.getPOSByte()));
                set1.add(new ImmutableBytesWritable(btr.getPSOByte()));
            }
            in1.close();
            in.close();
        }
        index.clear();

        Path p = new Path("hexastorePartition");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p,
                ImmutableBytesWritable.class, NullWritable.class);

        double chunkSize = bucketSampledTriples * DistinctIds.samplingRate;
        System.out.println("chunkSize: " + chunkSize);
        Iterator<ImmutableBytesWritable> it = set1.iterator();
        while (it.hasNext()) {
            ImmutableBytesWritable key = it.next();
            if (collected > chunkSize) {
                partitionWriter.append(key, NullWritable.get());
                //System.out.println(Bytes.toStringBinary(key.get()));
                collected = 0;
                chunks++;
            } else {
                collected++;
            }
        }
        System.out.println("chunks: " + chunks);
        partitionWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = new Job();
    job = new Job(conf, "Import Hexastore");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path("out");
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    StringBuilder compressionConfigValue = new StringBuilder();
    compressionConfigValue.append(URLEncoder.encode("I", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("S", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("T", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
            compressionConfigValue.toString());
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144);
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setJarByClass(TranslateAndImport.class);
    job.setMapperClass(Map.class);
    //job.setReducerClass(HexaStoreHistogramsReduce.class);
    job.setReducerClass(HexaStoreReduce.class);

    job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
    job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1);
    //job.setCombinerClass(Combiner.class);
    job.setJobName("Translate Projections");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
    job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    return job;

}

From source file:hamr.core.general.mapper.GeneralMapper.java

License:Open Source License

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override//from  w  w w.ja  va 2s.  co m
protected void map(Object key, Object value, Mapper.Context context) throws IOException, InterruptedException {
    Class<? extends KeyGenerator> kgClass = context.getConfiguration()
            .getClass(JobContext.MAP_OUTPUT_KEY_CLASS, null, AnnotedBean.class).getAnnotation(Generator.class)
            .keyGeneratorClass();
    KeyGenerator kg;
    try {
        kg = kgClass.getConstructor(Mapper.Context.class).newInstance(context);
        List<AnnotedBean> generated = kg.generate(key, value);
        if (generated != null) {
            for (int i = 0; i < generated.size(); i++) {
                context.write(generated.get(i), NullWritable.get());
            }
        }
    } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
            | InvocationTargetException | NoSuchMethodException | SecurityException e) {
        e.printStackTrace();
    }
}

From source file:hamr.core.general.reducer.GeneralReducer.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes" })
protected void reduce(AnnotedBean key, Iterable<NullWritable> value,
        Reducer<AnnotedBean, NullWritable, AnnotedBean, NullWritable>.Context context) {

    Preducer predu = null;/*from w ww .  j  a v a2  s.c om*/
    List<Counter> counters = null;
    AnnotedBean preduced = null;
    AnnotedBean result = null;
    Iterator<NullWritable> iter = value.iterator();
    while (iter.hasNext()) {
        iter.next();
        // build preducer
        Class preducerClass = null;
        if (predu == null) {
            Preduce preduce = key.getClass().getAnnotation(Preduce.class);
            if (preduce == null || preduce.preducer() == null) {
                // default preducer
                preducerClass = Preducer.class;
            } else {
                preducerClass = preduce.preducer();
            }
            try {
                predu = (Preducer) preducerClass.getDeclaredConstructor(Reducer.Context.class)
                        .newInstance(context);
            } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
                    | InvocationTargetException | NoSuchMethodException | SecurityException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        // preduce data
        preduced = predu.preduce(key);
        if (result == null) {
            result = preduced;
        }
        if (preduced == null) {
            continue;
        }

        // build counters
        if (counters == null) {
            counters = new ArrayList<Counter>();
            Class[] counterClasses = preduced.getClass().getAnnotation(Counters.class).counters();
            for (Class cc : counterClasses) {
                try {
                    counters.add((Counter) cc.getConstructor(Context.class).newInstance(context));
                } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
                        | InvocationTargetException | NoSuchMethodException | SecurityException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }

        // send each data to each counter;

        for (Counter c : counters) {
            c.count(preduced);
        }
    }
    //if all preduced is invalid
    if (counters == null) {
        return;
    }
    //end each counter and decide wether to invoke write or not
    boolean needWrite = false;
    for (Counter c : counters) {
        if (c.end(result)) {
            needWrite = true;
        }
    }

    //need write then write
    if (needWrite) {
        try {
            context.write(result, NullWritable.get());
        } catch (IOException | InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

From source file:homework.sample.SampleMapper.java

License:Apache License

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    context.write(NullWritable.get(), new Text(""));
}

From source file:homework.sample.SampleReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    context.write(NullWritable.get(), new Text("Hello World"));
}

From source file:hr.fer.tel.rovkp.homework02.task02.LocationsReducer.java

@Override
public void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    DebsRecordParser parser = new DebsRecordParser();

    boolean passed = false;

    for (Text value : values) {
        if (!passed) {
            try {
                parser.parse(value.toString());
                passed = true;//from   w ww.jav  a 2 s .c o  m
            } catch (ParseException ex) {
                passed = false;
            }
        }
        mos.write("bins", NullWritable.get(), value, parser.getLocation() + key.toString() + "/part");
    }
}