List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:com.mongodb.hadoop.examples.treasury.TreasuryYieldUpdateReducer.java
License:Apache License
@Override public void reduce(final IntWritable pKey, final Iterable<DoubleWritable> pValues, final Context pContext) throws IOException, InterruptedException { int count = 0; double sum = 0; for (final DoubleWritable value : pValues) { sum += value.get();//from w w w . j ava2s. c o m count++; } final double avg = sum / count; LOG.debug("Average 10 Year Treasury for " + pKey.get() + " was " + avg); BasicBSONObject query = new BasicBSONObject("_id", pKey.get()); BasicBSONObject modifiers = new BasicBSONObject(); modifiers.put("$set", BasicDBObjectBuilder.start().add("count", count).add("avg", avg).add("sum", sum).get()); modifiers.put("$push", new BasicBSONObject("calculatedAt", new Date())); modifiers.put("$inc", new BasicBSONObject("numCalculations", 1)); pContext.write(null, new MongoUpdateWritable(query, modifiers, true, false)); }
From source file:com.mongodb.hadoop.examples.TreasuryYieldReducer.java
License:Apache License
public void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { int count = 0; double sum = 0; for (final DoubleWritable value : values) { log.debug("Key: " + key + " Value: " + value); sum += value.get();/*from w w w. jav a 2 s .c o m*/ count++; } double avg = sum / count; log.info("Average 10 Year Treasury for " + key.get() + " was " + avg); context.write(key, new DoubleWritable(avg)); }
From source file:com.mongodb.hadoop.examples.ufos.UfoSightingsReducer.java
License:Apache License
@Override public void reduce(final Text location, final Iterable<IntWritable> sightings, final Context pContext) throws IOException, InterruptedException { int count = 0; for (final IntWritable v : sightings) { LOG.debug("Location: " + location + " Value: " + v); count += v.get(); }/* www . ja va2 s . co m*/ pContext.write(location, new IntWritable(count)); }
From source file:com.moz.fiji.mapreduce.lib.reduce.IntSumReducer.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w w w . j a va2s. co m*/ protected void reduce(K key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } mValue.set(sum); context.write(key, mValue); }
From source file:com.mozilla.grouperfish.mahout.clustering.display.kmeans.OriginalText.java
License:Apache License
public Map<Integer, Set<String>> getDocIds(double sampleRate) { Random rand = new Random(); Map<Integer, Set<String>> docIdMap = new HashMap<Integer, Set<String>>(); SequenceFileDirectoryReader pointsReader = null; try {//from ww w .java2 s.c o m IntWritable k = new IntWritable(); WeightedVectorWritable wvw = new WeightedVectorWritable(); pointsReader = new SequenceFileDirectoryReader(clusteredPointsPath); while (pointsReader.next(k, wvw)) { int clusterId = k.get(); Vector v = wvw.getVector(); if (v instanceof NamedVector) { if (rand.nextDouble() < sampleRate) { NamedVector nv = (NamedVector) v; nv.getName(); Set<String> curDocIds = docIdMap.get(clusterId); if (curDocIds == null) { curDocIds = new HashSet<String>(); } curDocIds.add(nv.getName()); docIdMap.put(clusterId, curDocIds); } } } } catch (IOException e) { LOG.error("IOException caught while reading clustered points", e); } finally { if (pointsReader != null) { pointsReader.close(); } } return docIdMap; }
From source file:com.mozilla.grouperfish.mahout.clustering.display.kmeans.WordCloud.java
License:Apache License
public Map<Integer, Cloud> getClouds(Cloud template) { Map<Integer, Cloud> cloudMap = new HashMap<Integer, Cloud>(); SequenceFileDirectoryReader pointsReader = null; try {/*from w w w.ja v a 2 s . c om*/ IntWritable k = new IntWritable(); WeightedVectorWritable wvw = new WeightedVectorWritable(); pointsReader = new SequenceFileDirectoryReader(clusteredPointsPath); while (pointsReader.next(k, wvw)) { int clusterId = k.get(); Cloud c = cloudMap.get(clusterId); if (c == null) { c = new Cloud(template); } Iterator<Element> viter = wvw.getVector().iterateNonZero(); while (viter.hasNext()) { Element e = viter.next(); String feature = invertedFeatureIndex.get(e.index()); c.addTag(new Tag(feature, e.get())); } cloudMap.put(clusterId, c); } } catch (IOException e) { LOG.error("IOException caught while reading clustered points", e); } finally { if (pointsReader != null) { pointsReader.close(); } } return cloudMap; }
From source file:com.mozilla.grouperfish.transforms.coclustering.display.WriteCoClusteringOutput.java
License:Apache License
private void loadPoints() throws IOException { SequenceFile.Reader currReader = null; IntWritable k = new IntWritable(); CoCluster currCluster;/*from w w w .j ava 2 s. c o m*/ int currVID; WeightedVectorWritable wvw = new WeightedVectorWritable(); try { fs = FileSystem.get(clusteredPointsPath.toUri(), conf); for (FileStatus status : fs.listStatus(clusteredPointsPath)) { Path p = status.getPath(); if (!status.isDir() && !p.getName().startsWith("_")) { try { currReader = new SequenceFile.Reader(fs, p, conf); while (currReader.next(k, wvw)) { currCluster = coclusters.get(k.get()); NamedVector v = (NamedVector) wvw.getVector(); currVID = Integer.parseInt(v.getName()); if (docIDMap.containsKey(currVID)) { currCluster.put(v, docIDMap.get(currVID), true); } else if (featureIDMap.containsKey(currVID)) { currCluster.put(v, featureIDMap.get(currVID), false); } else { LOG.error("Key not feature or document!"); } } } finally { if (currReader != null) { IOUtils.closeStream(currReader); } } } } } catch (IOException ie) { LOG.info("Error while reading points", ie); } catch (ClassCastException ce) { LOG.info("NamedVectors possibly not used", ce); } finally { if (currReader != null) { IOUtils.closeStream(currReader); } if (fs != null) { fs.close(); } } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.KMeansOutputLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { try {//from w ww . j av a 2 s . c om if (!this.reader.nextKeyValue()) { return null; } Tuple currRow = tupleFactory.newTuple(3); DataBag rowInfoBag = bagFactory.newDefaultBag(); IntWritable key = (IntWritable) reader.getCurrentKey(); int clusterID = key.get(); WeightedVectorWritable value = (WeightedVectorWritable) reader.getCurrentValue(); Vector rowInfo = value.getVector(); NamedVector nrowInfo = (NamedVector) rowInfo; int vectorID = Integer.parseInt(nrowInfo.getName()); for (Iterator<Vector.Element> itr = rowInfo.iterateNonZero(); itr.hasNext();) { Vector.Element elemInfo = itr.next(); Tuple currElement = tupleFactory.newTuple(2); currElement.set(0, elemInfo.index()); currElement.set(1, elemInfo.get()); rowInfoBag.add(currElement); } currRow.set(0, clusterID); currRow.set(1, vectorID); currRow.set(2, rowInfoBag); return currRow; } catch (InterruptedException ie) { LOG.error("Interrupted while reading", ie); throw new IOException(ie); } catch (NumberFormatException ne) { LOG.error("Possible use of non int values for NamedVector keys", ne); throw new IOException(ne); } catch (ClassCastException e) { LOG.error("Possible cast of normal Vector to NamedVector", e); throw new IOException(e); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public Tuple getNext() throws IOException { try {/* w w w . j av a2s. c om*/ if (!this.reader.nextKeyValue()) { return null; } Tuple currRow = tupleFactory.newTuple(2); DataBag rowInfoBag = bagFactory.newDefaultBag(); IntWritable key = reader.getCurrentKey(); int rowID = key.get(); VectorWritable value = reader.getCurrentValue(); Vector rowInfo = value.get(); if (rowInfo instanceof NamedVector) { NamedVector nrowInfo = (NamedVector) rowInfo; rowID = Integer.parseInt(nrowInfo.getName()); } for (Iterator<Vector.Element> itr = rowInfo.iterateNonZero(); itr.hasNext();) { Vector.Element elemInfo = itr.next(); Tuple currElement = tupleFactory.newTuple(2); currElement.set(0, elemInfo.index()); currElement.set(1, elemInfo.get()); rowInfoBag.add(currElement); } currRow.set(0, rowID); currRow.set(1, rowInfoBag); return currRow; } catch (InterruptedException ie) { LOG.error("Interrupted while reading", ie); throw new IOException(ie); } catch (NumberFormatException ne) { LOG.error("Possible use of non int values for NamedVector keys", ne); throw new IOException(ne); } }
From source file:com.mycompany.hadooptrain.WordCountReducer.java
public void reduce(Text text, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0;//from w ww .j a va2 s .c o m for (IntWritable value : values) { sum += value.get(); } context.write(text, new IntWritable(sum)); }