List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:com.yahoo.semsearch.fastlinking.io.Datapack.java
License:Apache License
private void merge(String anchorMapPath, String dfMapPath, String multiple_out, String out, String ngram) throws IOException { JobConf conf = new JobConf(getConf(), Datapack.class); FileSystem fs = FileSystem.get(conf); BufferedWriter anchorsDataOut; BufferedWriter anchorsTSVOut; Boolean multiple_output = (multiple_out != null && multiple_out.equalsIgnoreCase("true")); Boolean ngram_output = (ngram != null && ngram.equalsIgnoreCase("true")); if (!multiple_output) { anchorsDataOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out), outputEncoding)); anchorsTSVOut = null;//from w w w .jav a 2 s . c om } else { anchorsDataOut = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(out + ".dat"), outputEncoding)); anchorsTSVOut = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(out + ".tsv"), outputEncoding)); } // Loop over anchors MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf); MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf); /*FileStatus[] status = fs.listStatus( new Path( dfMapPath ) ); // you need to pass in your hdfs path for( FileStatus fileStatus : status ) { if( !fileStatus.getPath().toString().contains( "part-" )) continue; MapFile.Reader dfMapReader = new MapFile.Reader( fileStatus.getPath(), conf ); */ Text akey = new Text(); Text dkey = new Text(); IntWritable df = new IntWritable(); HMapSIW map = new HMapSIW(); while (anchorMapReader.next(akey, map)) { // since they are both sorted we can just iterate over both // TODO if need be, artificially add a 0 count to unseen anchors dfMapReader.next(dkey, df); while (!akey.toString().equalsIgnoreCase(dkey.toString())) { //System.err.println("Mismatch: '" + akey + "' and '" + dkey + "'"); anchorMapReader.next(akey, map); } String l = akey.toString(); // while( dfMapReader.next( dkey, df ) ) { // String l = dkey.toString(); if (l.trim().length() < 2) continue; StringBuilder targets = new StringBuilder(); int total = 0; for (String target : map.keySet()) { int count = map.get(target); total += count; String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); targets.append(entity); targets.append(SEPARATOR); targets.append(Integer.toString(count)); targets.append("\t"); } if (StringUtils.isNumeric(l) && total < 2) continue; //System.err.println("targets " + targets); if (targets.length() < 2) continue; if (!ngram_output) { anchorsDataOut.write(l); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(df.get())); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(total)); anchorsDataOut.write("\t"); anchorsDataOut.write(targets.substring(0, targets.length() - 1)); anchorsDataOut.write("\n"); anchorsDataOut.flush(); if (multiple_output) { for (String target : map.keySet()) { int count = map.get(target); String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); anchorsTSVOut.write(l); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(df.get())); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(total)); anchorsTSVOut.write("\t"); anchorsTSVOut.write(entity); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(count)); anchorsTSVOut.write("\n"); anchorsTSVOut.flush(); } } } else { String parts[] = l.split("\\s+"); for (int i = 0; i < parts.length; i++) { StringBuilder sb = new StringBuilder(); for (int j = i; j < parts.length; j++) { sb.append(parts[j]); String ss = sb.toString(); anchorsDataOut.write(ss); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(df.get())); anchorsDataOut.write(SEPARATOR); anchorsDataOut.write(Integer.toString(total)); anchorsDataOut.write("\t"); anchorsDataOut.write(targets.substring(0, targets.length() - 1)); anchorsDataOut.write("\n"); anchorsDataOut.flush(); if (multiple_output) { for (String target : map.keySet()) { int count = map.get(target); String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8"); anchorsTSVOut.write(ss); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(df.get())); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(total)); anchorsTSVOut.write("\t"); anchorsTSVOut.write(entity); anchorsTSVOut.write("\t"); anchorsTSVOut.write(Integer.toString(count)); anchorsTSVOut.write("\n"); anchorsTSVOut.flush(); } sb.append(" "); } } } } } dfMapReader.close(); //} anchorsDataOut.close(); if (multiple_output) { anchorsTSVOut.close(); } //anchorMapReader.close(); fs.close(); }
From source file:com.yosanai.tutorial.hadoop.hellohadoop.WordCountReducer.java
License:Open Source License
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws java.io.IOException, InterruptedException { int sum = 0;//w ww .jav a2 s .co m for (IntWritable intWritable : values) { sum += intWritable.get(); } context.write(key, new IntWritable(sum)); }
From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;/*from w w w.j a va2 s .c o m*/ for (IntWritable count : values) { sum += count.get(); } context.getCounter("COUNT", "UNIQUE_WORD").increment(1); if (sum >= minSupport) { context.getCounter("COUNT", "OVER_THRESHOLD").increment(1); context.write(key, new IntWritable(sum)); } else { context.getCounter("COUNT", "UNDER_THRESHOLD").increment(1); } }
From source file:combiner.CombinerReducer.java
@Override public void reduce(Text _key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { Text key = _key;//from ww w . j a v a 2 s. c o m int frequencyForYear = 0; while (values.hasNext()) { // replace ValueType with the real type of your value IntWritable value = (IntWritable) values.next(); frequencyForYear += value.get(); // process value } output.collect(key, new IntWritable(frequencyForYear)); }
From source file:computation.test.MockUtils.java
License:Apache License
/** * Prepare a mocked CentralizedServiceWorker. * * @param numOfPartitions The number of partitions * @return CentralizedServiceWorker//from w ww . j ava2 s . c o m */ public static CentralizedServiceWorker<IntWritable, IntWritable, IntWritable> mockServiceGetVertexPartitionOwner( final int numOfPartitions) { CentralizedServiceWorker<IntWritable, IntWritable, IntWritable> service = Mockito .mock(CentralizedServiceWorker.class); Answer<PartitionOwner> answer = new Answer<PartitionOwner>() { @Override public PartitionOwner answer(InvocationOnMock invocation) throws Throwable { IntWritable vertexId = (IntWritable) invocation.getArguments()[0]; return new BasicPartitionOwner(vertexId.get() % numOfPartitions, null); } }; Mockito.when(service.getVertexPartitionOwner(Mockito.any(IntWritable.class))).thenAnswer(answer); return service; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, // XXX Iterable Context context)//from w w w.j ava2 s. c om throws IOException, InterruptedException { int maxValue = Integer.MIN_VALUE; for (IntWritable value : values) { maxValue = Math.max(maxValue, value.get()); } context.write(key, new IntWritable(maxValue)); }
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkWalkthrough(IntWritable writable) throws IOException { // vv IntWritableTest-SerializedLength byte[] bytes = serialize(writable); assertThat(bytes.length, is(4)); // ^^ IntWritableTest-SerializedLength // vv IntWritableTest-SerializedBytes assertThat(StringUtils.byteToHexString(bytes), is("000000a3")); // ^^ IntWritableTest-SerializedBytes // vv IntWritableTest-Deserialization IntWritable newWritable = new IntWritable(); deserialize(newWritable, bytes); assertThat(newWritable.get(), is(163)); // ^^ IntWritableTest-Deserialization }/*from ww w . ja v a2 s . co m*/
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void test() throws IOException { IntWritable src = new IntWritable(163); IntWritable dest = new IntWritable(); assertThat(writeTo(src, dest), is("000000a3")); assertThat(dest.get(), is(src.get())); }//from w w w .j a va 2 s . c o m
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int maxValue = Integer.MIN_VALUE; for (IntWritable value : values) { maxValue = Math.max(maxValue, value.get()); }//from w ww . j ava2s . co m context.write(key, new IntWritable(maxValue)); }
From source file:de.kp.core.arules.hadoop.IntArrayWritable.java
License:Open Source License
public int[] get() { Writable[] writableArray = intArrayWritable.get(); int[] intArray = new int[writableArray.length]; for (int i = 0; i < writableArray.length; i++) { IntWritable item = (IntWritable) writableArray[i]; intArray[i] = item.get(); }//from w w w . j a v a2 s.co m return intArray; }