List of usage examples for org.apache.hadoop.mapred OutputCollector collect
void collect(K key, V value) throws IOException;
From source file:crimeScoreMapper.java
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { StringTokenizer tokenizer = new StringTokenizer(value.toString(), ","); if (tokenizer.countTokens() != 0) { String[] data = new String[tokenizer.countTokens()]; int i = 0; while (tokenizer.hasMoreTokens()) { data[i] = tokenizer.nextToken(); i++;//from w w w . j av a2s .c o m } String type = data[2]; type = type.trim(); int weight = 0; if (type.equalsIgnoreCase("arson")) { weight = 2; } else if (type.equalsIgnoreCase("theft")) { weight = 3; } else if (type.equalsIgnoreCase("assault")) { weight = 2; } else if (type.equalsIgnoreCase("battery")) { weight = 4; } else if (type.equalsIgnoreCase("robbery")) { weight = 3; } else if (type.equalsIgnoreCase("burglary")) { weight = 3; } else if (type.equalsIgnoreCase("gambling")) { weight = 1; } else if (type.equalsIgnoreCase("homicide")) { weight = 4; } else if (type.equalsIgnoreCase("stalking")) { weight = 1; } else if (type.equalsIgnoreCase("narcotics")) { weight = 2; } else if (type.equalsIgnoreCase("obscenity")) { weight = 1; } else if (type.equalsIgnoreCase("kidnapping")) { weight = 3; } else if (type.equalsIgnoreCase("sex offense")) { weight = 3; } else if (type.equalsIgnoreCase("intimidation")) { weight = 2; } else if (type.equalsIgnoreCase("non - criminal")) { weight = 1; } else if (type.equalsIgnoreCase("prostitution")) { weight = 2; } else if (type.equalsIgnoreCase("other offense")) { weight = 1; } else if (type.equalsIgnoreCase("non-criminal")) { weight = 1; } else if (type.equalsIgnoreCase("criminal damage")) { weight = 2; } else if (type.equalsIgnoreCase("public indecency")) { weight = 2; } else if (type.equalsIgnoreCase("criminal trespass")) { weight = 2; } else if (type.equalsIgnoreCase("human trafficking")) { weight = 3; } else if (type.equalsIgnoreCase("weapons violation")) { weight = 2; } else if (type.equalsIgnoreCase("deceptive practice")) { weight = 2; } else if (type.equalsIgnoreCase("crim sexual assault")) { weight = 4; } else if (type.equalsIgnoreCase("motor vehicle theft")) { weight = 2; } else if (type.equalsIgnoreCase("liquor law violation")) { weight = 1; } else if (type.equalsIgnoreCase("public peace violation")) { weight = 1; } else if (type.equalsIgnoreCase("other narcotic violation")) { weight = 1; } else if (type.equalsIgnoreCase("offense involving children")) { weight = 3; } else if (type.equalsIgnoreCase("interference with public officer")) { weight = 1; } else if (type.equalsIgnoreCase("concealed carry license violation")) { weight = 2; } if (data[3].trim().startsWith("60")) output.collect(new Text(data[3].trim()), new IntWritable(weight)); else output.collect(new Text(data[4].trim()), new IntWritable(weight)); } else { output.collect(new Text("ProBLEMMMMMMMMMMMMMMMMMMMMM"), new IntWritable(1)); } }
From source file:FriendsReducer.java
License:Apache License
public void reduce(Text anagramKey, Iterator<Text> anagramValues, OutputCollector<Text, Text> results, Reporter reporter) throws IOException { String output = ""; List<String> values1 = Arrays.asList(anagramValues.next().toString().split(",")); String[] values2 = anagramValues.next().toString().split(","); List<String> r = new ArrayList<String>(); for (String v2 : values2) { if (values1.contains(v2)) r.add(v2);/*from w ww . j av a 2 s .com*/ } //StringTokenizer outputTokenizer = new StringTokenizer(output,"~"); /* if(outputTokenizer.countTokens()>=2) {*/ // output = output.replace("~", ","); outputKey.set(anagramKey.toString()); outputValue.set(String.join(",", r)); results.collect(outputKey, outputValue); //} }
From source file:DistribCountingReducer.java
License:Apache License
@Override public void reduce(Text itemset, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0;// www . j av a2 s. c o m while (values.hasNext()) { sum += values.next().get(); } if (sum >= datasetSize * minFreqPercent / 100) { double freq = ((double) sum) / datasetSize; output.collect(itemset, new Text((new Double(freq)).toString())); } }
From source file:DataJoinMapperBase.java
License:Apache License
public void map(Object key, Object value, OutputCollector output, Reporter reporter) throws IOException { if (this.reporter == null) { this.reporter = reporter; }//from w w w . j a v a 2 s. co m addLongValue("totalCount", 1); TaggedMapOutput aRecord = generateTaggedMapOutput(value); if (aRecord == null) { addLongValue("discardedCount", 1); return; } Text groupKey = generateGroupKey(aRecord); if (groupKey == null) { addLongValue("nullGroupKeyCount", 1); return; } output.collect(groupKey, aRecord); addLongValue("collectedCount", 1); }
From source file:BinomialSamplerMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { for (int i = 0; i < reducersNum; i++) { int sampledTimes = Binomial.staticNextInt(datasetSize / reducersNum, 1.0 / datasetSize); /**/*from www . java2s. co m*/ * XXX I assume there is a better way of doing * this, by only having one "message" sent to * reducer i, for example by making "value" an * object containing the fields "sampleTimes" * and "value". MR */ for (int j = 0; j < sampledTimes; j++) { output.collect(new IntWritable(i), value); } } }
From source file:DistribCountingMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { IntWritable one = new IntWritable(1); HashSet<String> transactionItems = new HashSet<String>(); StringTokenizer st = new StringTokenizer(value.toString()); while (st.hasMoreTokens()) { transactionItems.add(st.nextToken()); }//w ww . j a v a 2s.co m Set<Set<String>> powerSet = Sets.powerSet(transactionItems); for (Set<String> itemset : powerSet) { if (itemset.size() > 0) { String[] itemsetArr = new String[itemset.size()]; itemset.toArray(itemsetArr); Arrays.sort(itemsetArr); String itemsetStr = ""; for (int i = 0; i < itemsetArr.length; i++) { itemsetStr += itemsetArr[i] + " "; } output.collect(new Text(itemsetStr), one); } } }
From source file:PartitionMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { Random rand = new Random(); int key = rand.nextInt(reducersNum); output.collect(new IntWritable(key), value); }
From source file:RandIntPartSamplerMapper.java
License:Apache License
@Override public void map(NullWritable lineNum, TextArrayWritable transactionsArrWr, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { reporter.incrCounter("FIMMapperStart", String.valueOf(id), System.currentTimeMillis()); Random rand = new Random(); Writable[] transactions = transactionsArrWr.get(); int transactionsNum = transactions.length; System.out.println("transactionsNum: " + transactionsNum); for (int i = 0; i < toSample; i++) { int sampledIndex = rand.nextInt(transactionsNum); output.collect(sampleDestinations[i], (Text) transactions[sampledIndex]); }//w w w . j a v a2 s .co m reporter.incrCounter("FIMMapperEnd", String.valueOf(id), System.currentTimeMillis()); }
From source file:CoinFlipSamplerMapper.java
License:Apache License
@Override public void map(LongWritable lineNum, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { Random rand = new Random(); for (int i = 0; i < reducersNum; i++) { double f = rand.nextDouble(); if (f <= 1.0 / datasetSize) { output.collect(new IntWritable(i), value); }//w w w . j a va2 s . co m } }
From source file:DistribCountingCombiner.java
License:Apache License
@Override public void reduce(Text itemset, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0;/*from w w w.j a v a2 s. c om*/ while (values.hasNext()) { sum += values.next().get(); } output.collect(itemset, new IntWritable(sum)); }