List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java
License:Apache License
@Test public void testMapper() { MatchedElement me = new MatchedElement(10, 11, "1", "(?i)developed\\s{1,2}?((\\d+|zero|one|two|three|four|five|six|seven|eight|nine|ten))\\p{Punct}{1,2}?(?:\\d+?|zero|one|two|three|four|five|six|seven|eight|nine|ten)", 0.005767844268204758);//from w ww . ja va 2 s . com MatchedElementWritable mew = new MatchedElementWritable(me); mapDriver.withInput(NullWritable.get(), new BytesWritable("p0|d0|2015-06-08\ndeveloped 1.1".getBytes())); mapDriver.withOutput(new Text("p0|d0|2015-06-08"), mew); mapDriver.setValueComparator(new Comparator<MatchedElementWritable>() { @Override public int compare(MatchedElementWritable o1, MatchedElementWritable o2) { if (o1.getMatchedElement().getStartPos() != o2.getMatchedElement().getStartPos()) { return o1.getMatchedElement().getStartPos() - o2.getMatchedElement().getEndPos(); } if (o1.getMatchedElement().getEndPos() != o2.getMatchedElement().getEndPos()) { return o1.getMatchedElement().getEndPos() - o2.getMatchedElement().getEndPos(); } if (!o1.getMatchedElement().getMatch().equals(o2.getMatchedElement().getMatch())) { return o1.getMatchedElement().getMatch().compareTo(o2.getMatchedElement().getMatch()); } return 0; } }); try { mapDriver.runTest(); } catch (IOException e) { throw new AssertionError(e); } }
From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java
License:Apache License
@Test public void testReducer() { List<MatchedElementWritable> mewList = new ArrayList<>(); MatchedElementWritable mew = new MatchedElementWritable(new MatchedElement(10, 11, "1", "", 1)); mewList.add(mew);/* w w w. ja v a2 s . c o m*/ Text output = new Text("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<!DOCTYPE collection SYSTEM \"BioC.dtd\">" + "<collection>" + "<source></source>" + "<date></date>" + "<key></key>" + "<document>" + "<id>d0</id>" + "<infon key=\"date\">2015-06-08</infon>" + "<infon key=\"Patient ID\">p0</infon>" + "<passage>" + "<offset>0</offset>" + "<annotation id=\"\">" + "<infon key=\"dateTime\">2015-06-17T21:44Z</infon>" + "<infon key=\"confidence\">1.0</infon>" + "<infon key=\"type\">pain</infon>" + "<location offset=\"10\" length=\"1\"></location>" + "<text>1</text>" + "</annotation>" + "</passage>" + "</document>" + "</collection>"); reduceDriver.withInput(new Text("p0|d0|2015-06-08"), mewList); reduceDriver.withOutput(output, NullWritable.get()); reduceDriver.setKeyComparator(new BioCXMLComparator()); try { reduceDriver.runTest(); } catch (IOException e) { throw new AssertionError(e); } }
From source file:gov.va.research.red.ex.hadoop.TestREDExHadoop.java
License:Apache License
@Test public void testMapReduce() { mapReduceDriver.withInput(NullWritable.get(), new BytesWritable("p0|d0|2015-06-08\ndeveloped 1.1".getBytes())); Text output = new Text("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<!DOCTYPE collection SYSTEM \"BioC.dtd\">" + "<collection>" + "<source></source>" + "<date></date>" + "<key></key>" + "<document>" + "<id>d0</id>" + "<infon key=\"date\">2015-06-08</infon>" + "<infon key=\"Patient ID\">p0</infon>" + "<passage>" + "<offset>0</offset>" + "<annotation id=\"\">" + "<infon key=\"dateTime\">2015-06-17T21:44Z</infon>" + "<infon key=\"confidence\">1.0</infon>" + "<infon key=\"type\">pain</infon>" + "<location offset=\"10\" length=\"1\"></location>" + "<text>1</text>" + "</annotation>" + "</passage>" + "</document>" + "</collection>"); mapReduceDriver.withOutput(output, NullWritable.get()); mapReduceDriver.setKeyComparator(new BioCXMLComparator()); try {//from ww w . jav a 2s . c o m mapReduceDriver.runTest(); } catch (IOException e) { throw new AssertionError(e); } }
From source file:gr.ntua.h2rdf.loadTriples.TotalOrderPartitioner.java
License:Apache License
/** * Read the cut points from the given IFile. * @param fs The file system/*from ww w. j av a2s . c o m*/ * @param p The path to read * @param keyClass The map output key class * @param job The job config * @throws IOException */ // matching key types enforced by passing in @SuppressWarnings("unchecked") // map output key class private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass, Configuration conf) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); ArrayList<K> parts = new ArrayList<K>(); K key = ReflectionUtils.newInstance(keyClass, conf); NullWritable value = NullWritable.get(); while (reader.next(key, value)) { parts.add(key); key = ReflectionUtils.newInstance(keyClass, conf); } reader.close(); return parts.toArray((K[]) Array.newInstance(keyClass, parts.size())); }
From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java
License:Apache License
public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException { //compute sample partitions FileSystem fs;/*from w w w. ja v a 2 s. com*/ Configuration conf = new Configuration(); int collected = 0, chunks = 0; try { fs = FileSystem.get(conf); Path sampleDir = new Path("sample"); FileStatus[] samples = fs.listStatus(sampleDir); TreeSet<String> set = new TreeSet<String>(); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); //System.out.println(tr[0].toN3()); set.add(tr[0].toN3()); set.add(tr[1].toN3()); set.add(tr[2].toN3()); } in1.close(); in.close(); } IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index"); HashMap<String, Long> index = translator.translate(set); set.clear(); TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>( new ImmutableBytesWritable.Comparator()); for (FileStatus sample : samples) { FSDataInputStream in = fs.open(sample.getPath()); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf); CompressionInputStream in1 = codec.createInputStream(in); NxParser nxp = new NxParser(in1); Iterator<Node[]> it = nxp.iterator(); while (it.hasNext()) { Node[] tr = it.next(); ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()), index.get(tr[2].toN3())); set1.add(new ImmutableBytesWritable(btr.getSPOByte())); set1.add(new ImmutableBytesWritable(btr.getSOPByte())); set1.add(new ImmutableBytesWritable(btr.getOPSByte())); set1.add(new ImmutableBytesWritable(btr.getOSPByte())); set1.add(new ImmutableBytesWritable(btr.getPOSByte())); set1.add(new ImmutableBytesWritable(btr.getPSOByte())); } in1.close(); in.close(); } index.clear(); Path p = new Path("hexastorePartition"); if (fs.exists(p)) { fs.delete(p, true); } SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p, ImmutableBytesWritable.class, NullWritable.class); double chunkSize = bucketSampledTriples * DistinctIds.samplingRate; System.out.println("chunkSize: " + chunkSize); Iterator<ImmutableBytesWritable> it = set1.iterator(); while (it.hasNext()) { ImmutableBytesWritable key = it.next(); if (collected > chunkSize) { partitionWriter.append(key, NullWritable.get()); //System.out.println(Bytes.toStringBinary(key.get())); collected = 0; chunks++; } else { collected++; } } System.out.println("chunks: " + chunks); partitionWriter.close(); } catch (IOException e) { e.printStackTrace(); } Job job = new Job(); job = new Job(conf, "Import Hexastore"); FileInputFormat.setInputPaths(job, new Path(args[0])); Path out = new Path("out"); try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); StringBuilder compressionConfigValue = new StringBuilder(); compressionConfigValue.append(URLEncoder.encode("I", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("S", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); compressionConfigValue.append('&'); compressionConfigValue.append(URLEncoder.encode("T", "UTF-8")); compressionConfigValue.append('='); compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8")); job.getConfiguration().set("hbase.hfileoutputformat.families.compression", compressionConfigValue.toString()); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144); //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setJarByClass(TranslateAndImport.class); job.setMapperClass(Map.class); //job.setReducerClass(HexaStoreHistogramsReduce.class); job.setReducerClass(HexaStoreReduce.class); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1); //job.setCombinerClass(Combiner.class); job.setJobName("Translate Projections"); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); job.getConfiguration().set("mapred.compress.map.output", "true"); job.getConfiguration().set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); return job; }
From source file:hamr.core.general.mapper.GeneralMapper.java
License:Open Source License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override//from w w w.ja va 2s. co m protected void map(Object key, Object value, Mapper.Context context) throws IOException, InterruptedException { Class<? extends KeyGenerator> kgClass = context.getConfiguration() .getClass(JobContext.MAP_OUTPUT_KEY_CLASS, null, AnnotedBean.class).getAnnotation(Generator.class) .keyGeneratorClass(); KeyGenerator kg; try { kg = kgClass.getConstructor(Mapper.Context.class).newInstance(context); List<AnnotedBean> generated = kg.generate(key, value); if (generated != null) { for (int i = 0; i < generated.size(); i++) { context.write(generated.get(i), NullWritable.get()); } } } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { e.printStackTrace(); } }
From source file:hamr.core.general.reducer.GeneralReducer.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" }) protected void reduce(AnnotedBean key, Iterable<NullWritable> value, Reducer<AnnotedBean, NullWritable, AnnotedBean, NullWritable>.Context context) { Preducer predu = null;/*from w ww . j a v a2 s.c om*/ List<Counter> counters = null; AnnotedBean preduced = null; AnnotedBean result = null; Iterator<NullWritable> iter = value.iterator(); while (iter.hasNext()) { iter.next(); // build preducer Class preducerClass = null; if (predu == null) { Preduce preduce = key.getClass().getAnnotation(Preduce.class); if (preduce == null || preduce.preducer() == null) { // default preducer preducerClass = Preducer.class; } else { preducerClass = preduce.preducer(); } try { predu = (Preducer) preducerClass.getDeclaredConstructor(Reducer.Context.class) .newInstance(context); } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } } // preduce data preduced = predu.preduce(key); if (result == null) { result = preduced; } if (preduced == null) { continue; } // build counters if (counters == null) { counters = new ArrayList<Counter>(); Class[] counterClasses = preduced.getClass().getAnnotation(Counters.class).counters(); for (Class cc : counterClasses) { try { counters.add((Counter) cc.getConstructor(Context.class).newInstance(context)); } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // send each data to each counter; for (Counter c : counters) { c.count(preduced); } } //if all preduced is invalid if (counters == null) { return; } //end each counter and decide wether to invoke write or not boolean needWrite = false; for (Counter c : counters) { if (c.end(result)) { needWrite = true; } } //need write then write if (needWrite) { try { context.write(result, NullWritable.get()); } catch (IOException | InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
From source file:homework.sample.SampleMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(NullWritable.get(), new Text("")); }
From source file:homework.sample.SampleReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { context.write(NullWritable.get(), new Text("Hello World")); }
From source file:hr.fer.tel.rovkp.homework02.task02.LocationsReducer.java
@Override public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { DebsRecordParser parser = new DebsRecordParser(); boolean passed = false; for (Text value : values) { if (!passed) { try { parser.parse(value.toString()); passed = true;//from w ww.jav a 2 s .c o m } catch (ParseException ex) { passed = false; } } mos.write("bins", NullWritable.get(), value, parser.getLocation() + key.toString() + "/part"); } }