List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:edu.ub.ahstfg.io.document.ParsedDocument.java
License:Open Source License
/** * Builds and gets a vector with added keywords. * @return The keyword vector./*from w w w .j a va 2s .c o m*/ */ public long[] getTermFreqVector() { long[] ret = new long[terms.size()]; Text t; IntWritable v; int i = 0; for (Writable w : terms.keySet()) { t = (Text) w; v = (IntWritable) terms.get(t); ret[i] = v.get(); i++; } return ret; }
From source file:edu.ub.ahstfg.io.document.ParsedDocument.java
License:Open Source License
/** * Adds a new keyword. If the keyword exists increases their frequency. * @param term Keyword to add.//w ww .j a v a 2 s . c o m */ public void addKeyword(Text keyword) { if (keywords.containsKey(keyword)) { IntWritable n = (IntWritable) keywords.get(keyword); n.set(n.get() + 1); } else { keywords.put(keyword, ONE()); } }
From source file:edu.ub.ahstfg.io.document.ParsedDocument.java
License:Open Source License
/** * Gets the HashMap containing keyword frequency. * @return Keyword frequency HashMap./*from ww w. j a v a 2 s. c om*/ */ public HashMap<String, Short> getKeywordMap() { HashMap<String, Short> ret = new HashMap<String, Short>(); Text t; IntWritable value; for (Writable w : keywords.keySet()) { t = (Text) w; value = (IntWritable) keywords.get(w); ret.put(t.toString(), (short) value.get()); } return ret; }
From source file:edu.ub.ahstfg.io.WritableConverter.java
License:Open Source License
/** * Converts long ArrayWritable to long LinkedList. * @param input Long ArrayWritable to convert. * @return Converted long LinkedList./*from www . j a va2 s .com*/ */ public static LinkedList<Short> arrayWritable2LinkedListShort(ArrayWritable input) { LinkedList<Short> ret = new LinkedList<Short>(); Writable[] ws = input.get(); IntWritable l; for (Writable w : ws) { l = (IntWritable) w; ret.add((short) l.get()); } return ret; }
From source file:edu.ub.ahstfg.io.WritableConverter.java
License:Open Source License
/** * Converts Long ArrayWritable to long static array. * @param input Long ArrayWritable to convert. * @return Converted long static array./*from w ww . jav a 2s .co m*/ */ public static short[] arrayWritable2ShortArray(ArrayWritable input) { Writable[] ws = input.get(); short[] ret = new short[ws.length]; int i = 0; IntWritable t; for (Writable w : ws) { t = (IntWritable) w; ret[i] = (short) t.get(); i++; } return ret; }
From source file:edu.ub.ahstfg.kmeans.KmeansReducer.java
License:Open Source License
@Override public void reduce(IntWritable key, Iterator<DocumentDistance> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { reporter.incrCounter(REPORTER_GROUP, "Num reducers", 1); int nKeywords = job.getInt(ParamSet.NUM_KEYWORDS, 0); int nTerms = job.getInt(ParamSet.NUM_TERMS, 0); //boolean haveKeywords = nKeywords > 0; DocumentDistance d;/*from ww w.j a va2s . c o m*/ ArrayList<short[]> keys = new ArrayList<short[]>(); ArrayList<short[]> terms = new ArrayList<short[]>(); DocumentDescriptor doc; boolean stub = false, allStub = false; int nStub = 0, count = 0; while (values.hasNext()) { count++; d = values.next(); stub = d.isStub(); if (!stub) { doc = d.getDoc(); keys.add(doc.getKeyFreq()); terms.add(doc.getTermFreq()); output.collect(new Text(doc.getUrl()), key); } else { nStub++; } } if (nStub >= count) { allStub = true; } String centroidPath = Centroids.CENTROIDS_FILE_PREFIX + String.valueOf(key.get()); String oldPath = job.get(ParamSet.OLD_CENTROIDS_PATH) + centroidPath; DocumentCentroid oldCentroid = new DocumentCentroid(); oldCentroid.fromHDFS(new Path(oldPath)); DocumentCentroid newCentroid = null; if (!allStub) { newCentroid = DocumentCentroid.calculateCentroid(nKeywords, nTerms, keys, terms); } else { newCentroid = oldCentroid; reporter.incrCounter(REPORTER_GROUP, "Stub centroids", 1); } double distance = newCentroid.distance(oldCentroid, job.getFloat(job.get(ParamSet.WEIGHT_KEYWORDS), (float) 0.5), job.getFloat(job.get(ParamSet.WEIGHT_TERMS), (float) 0.5)); newCentroid.setDistance(distance); String newPath = job.get(ParamSet.NEW_CENTROIDS_PATH) + centroidPath; newCentroid.toHDFS(new Path(newPath)); }
From source file:edu.ubc.mirrors.holographs.mapreduce.InvokeMethodMapper.java
License:Open Source License
@Override public void map(IntWritable key, IntWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int objectId = value.get(); reporter.setStatus("Invoking method on object " + objectId); // TODO-RS: Reporter -> ProgressListener adaptor try {//from w w w . java 2s . co m ObjectMirror mirror = HolographVMRegistry.getObjectMirror(snapshot, objectId, new VoidProgressListener()); String nameString = Reflection .getRealStringForMirror((InstanceMirror) method.invoke(thread, null, mirror)); name.set(nameString); output.collect(name, one); } catch (IllegalArgumentException e) { throw new RuntimeException(e); } catch (IllegalAccessException e) { throw new RuntimeException(e); } catch (MirrorInvocationTargetException e) { throw new RuntimeException(e); } catch (SnapshotException e) { throw new RuntimeException(e); } }
From source file:edu.uci.ics.asterix.external.indexing.dataflow.IndexingScheduler.java
License:Apache License
/** * Scan the splits once and build a popularity map * /*from w w w. j a v a 2 s .co m*/ * @param splits * the split array * @param locationToNumOfSplits * the map to be built * @throws IOException */ private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits) throws IOException { for (InputSplit split : splits) { String[] locations = split.getLocations(); for (String loc : locations) { IntWritable locCount = locationToNumOfSplits.get(loc); if (locCount == null) { locCount = new IntWritable(0); locationToNumOfSplits.put(loc, locCount); } locCount.set(locCount.get() + 1); } } }
From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.ReduceVerifyListSelfJoin.java
License:Apache License
public void reduce(IntWritable key, Iterator<ValueSelfJoin> values, OutputCollector<Text, NullWritable> output, Reporter reporter) throws IOException { while (values.hasNext()) { ValueSelfJoin rec = values.next(); ValueSelfJoin recCopy = new ValueSelfJoin(rec); records.add(recCopy);// www .java 2s.co m } for (int i = 0; i < records.size(); i++) { ValueSelfJoin rec1 = records.get(i); for (int j = i + 1; j < records.size(); j++) { ValueSelfJoin rec2 = records.get(j); // reporter.incrCounter(Counters.PAIRS_BUILD, 1); int[] tokens1 = rec1.getTokens(); int[] tokens2 = rec2.getTokens(); if (!similarityFilters.passLengthFilter(tokens1.length, tokens2.length)) { // reporter.incrCounter(Counters.PAIRS_FILTERED, 1); continue; } PartialIntersect p = SimilarityMetric.getPartialIntersectSize(tokens1, tokens2, key.get()); if (!similarityFilters.passPositionFilter(p.intersectSize, p.posXStop, tokens1.length, p.posYStop, tokens2.length)) { continue; } if (!similarityFilters.passSuffixFilter(tokens1, p.posXStart, tokens2, p.posYStart)) { continue; } float similarity = similarityFilters.passSimilarityFilter(tokens1, p.posXStop + 1, tokens2, p.posYStop + 1, p.intersectSize); // similarityMetric.getSimilarity(tokens1, // tokens2); // reporter.incrCounter(Counters.PAIRS_VERIFIED, 1); if (similarity > 0 // = similarityThreshold ) { int ridA = rec1.getRID(); int ridB = rec2.getRID(); if (ridA < ridB) { int rid = ridA; ridA = ridB; ridB = rid; } text.set("" + ridA + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + ridB + FuzzyJoinConfig.RIDPAIRS_SEPARATOR + similarity); output.collect(text, nullWritable); } } } records.clear(); }
From source file:edu.uci.ics.hyracks.hdfs.scheduler.IPProximityNcCollectionBuilder.java
License:Apache License
@Override public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) { final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>(); for (int i = 0; i < workloads.length; i++) { if (workloads[i] < slotLimit) { byte[] rawip; try { rawip = ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress(); } catch (UnknownHostException e) { // QQQ Should probably have a neater solution than this throw new RuntimeException(e); }/* ww w.j a v a2s .co m*/ BytesWritable ip = new BytesWritable(rawip); IntWritable availableSlot = availableIpsToSlots.get(ip); if (availableSlot == null) { availableSlot = new IntWritable(slotLimit - workloads[i]); availableIpsToSlots.put(ip, availableSlot); } else { availableSlot.set(slotLimit - workloads[i] + availableSlot.get()); } } } return new INcCollection() { @Override public String findNearestAvailableSlot(InputSplit split) { try { String[] locs = split.getLocations(); int minDistance = Integer.MAX_VALUE; BytesWritable currentCandidateIp = null; if (locs == null || locs.length > 0) { for (int j = 0; j < locs.length; j++) { /** * get all the IP addresses from the name */ InetAddress[] allIps = InetAddress.getAllByName(locs[j]); for (InetAddress ip : allIps) { BytesWritable splitIp = new BytesWritable(ip.getAddress()); /** * if the node controller exists */ BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp); if (candidateNcIp == null) { candidateNcIp = availableIpsToSlots.ceilingKey(splitIp); } if (candidateNcIp != null) { if (availableIpsToSlots.get(candidateNcIp).get() > 0) { byte[] candidateIP = candidateNcIp.getBytes(); byte[] splitIP = splitIp.getBytes(); int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16 | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF); int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16 | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF); int distance = Math.abs(candidateInt - splitInt); if (minDistance > distance) { minDistance = distance; currentCandidateIp = candidateNcIp; } } } } } } else { for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { currentCandidateIp = entry.getKey(); break; } } } if (currentCandidateIp != null) { /** * Update the entry of the selected IP */ IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp); availableSlot.set(availableSlot.get() - 1); if (availableSlot.get() == 0) { availableIpsToSlots.remove(currentCandidateIp); } /** * Update the entry of the selected NC */ List<String> dataLocations = ipToNcMapping .get(InetAddress.getByAddress(currentCandidateIp.getBytes()).getHostAddress()); for (String nc : dataLocations) { int ncIndex = ncNameToIndex.get(nc); if (workloads[ncIndex] < slotLimit) { return nc; } } } /** not scheduled */ return null; } catch (Exception e) { throw new IllegalStateException(e); } } @Override public int numAvailableSlots() { return availableIpsToSlots.size(); } }; }