List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:edu.ub.ahstfg.io.document.ParsedDocument.java
License:Open Source License
/** * Adds a new term. If the term exists increases their frequency. * @param term Term to add.//from w w w . ja v a2 s.co m */ public void addTerm(Text term) { if (terms.containsKey(term)) { IntWritable n = (IntWritable) terms.get(term); n.set(n.get() + 1); } else { terms.put(term, ONE()); } }
From source file:edu.ub.ahstfg.io.document.ParsedDocument.java
License:Open Source License
/** * Adds a new keyword. If the keyword exists increases their frequency. * @param term Keyword to add.// w w w.ja v a 2 s .c o m */ public void addKeyword(Text keyword) { if (keywords.containsKey(keyword)) { IntWritable n = (IntWritable) keywords.get(keyword); n.set(n.get() + 1); } else { keywords.put(keyword, ONE()); } }
From source file:edu.uci.ics.asterix.external.indexing.dataflow.IndexingScheduler.java
License:Apache License
/** * Scan the splits once and build a popularity map * /* w ww . j a va 2 s . c o m*/ * @param splits * the split array * @param locationToNumOfSplits * the map to be built * @throws IOException */ private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits) throws IOException { for (InputSplit split : splits) { String[] locations = split.getLocations(); for (String loc : locations) { IntWritable locCount = locationToNumOfSplits.get(loc); if (locCount == null) { locCount = new IntWritable(0); locationToNumOfSplits.put(loc, locCount); } locCount.set(locCount.get() + 1); } } }
From source file:edu.uci.ics.hyracks.hdfs.scheduler.IPProximityNcCollectionBuilder.java
License:Apache License
@Override public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) { final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>(); for (int i = 0; i < workloads.length; i++) { if (workloads[i] < slotLimit) { byte[] rawip; try { rawip = ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress(); } catch (UnknownHostException e) { // QQQ Should probably have a neater solution than this throw new RuntimeException(e); }// w w w.j a v a 2 s .c om BytesWritable ip = new BytesWritable(rawip); IntWritable availableSlot = availableIpsToSlots.get(ip); if (availableSlot == null) { availableSlot = new IntWritable(slotLimit - workloads[i]); availableIpsToSlots.put(ip, availableSlot); } else { availableSlot.set(slotLimit - workloads[i] + availableSlot.get()); } } } return new INcCollection() { @Override public String findNearestAvailableSlot(InputSplit split) { try { String[] locs = split.getLocations(); int minDistance = Integer.MAX_VALUE; BytesWritable currentCandidateIp = null; if (locs == null || locs.length > 0) { for (int j = 0; j < locs.length; j++) { /** * get all the IP addresses from the name */ InetAddress[] allIps = InetAddress.getAllByName(locs[j]); for (InetAddress ip : allIps) { BytesWritable splitIp = new BytesWritable(ip.getAddress()); /** * if the node controller exists */ BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp); if (candidateNcIp == null) { candidateNcIp = availableIpsToSlots.ceilingKey(splitIp); } if (candidateNcIp != null) { if (availableIpsToSlots.get(candidateNcIp).get() > 0) { byte[] candidateIP = candidateNcIp.getBytes(); byte[] splitIP = splitIp.getBytes(); int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16 | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF); int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16 | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF); int distance = Math.abs(candidateInt - splitInt); if (minDistance > distance) { minDistance = distance; currentCandidateIp = candidateNcIp; } } } } } } else { for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { currentCandidateIp = entry.getKey(); break; } } } if (currentCandidateIp != null) { /** * Update the entry of the selected IP */ IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp); availableSlot.set(availableSlot.get() - 1); if (availableSlot.get() == 0) { availableIpsToSlots.remove(currentCandidateIp); } /** * Update the entry of the selected NC */ List<String> dataLocations = ipToNcMapping .get(InetAddress.getByAddress(currentCandidateIp.getBytes()).getHostAddress()); for (String nc : dataLocations) { int ncIndex = ncNameToIndex.get(nc); if (workloads[ncIndex] < slotLimit) { return nc; } } } /** not scheduled */ return null; } catch (Exception e) { throw new IllegalStateException(e); } } @Override public int numAvailableSlots() { return availableIpsToSlots.size(); } }; }
From source file:edu.uci.ics.hyracks.hdfs.scheduler.RackAwareNcCollectionBuilder.java
License:Apache License
@Override public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) { try {//from w w w. j av a 2s.c o m final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>(); for (int i = 0; i < NCs.length; i++) { List<Integer> path = new ArrayList<Integer>(); String ipAddress = InetAddress .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()) .getHostAddress(); topology.lookupNetworkTerminal(ipAddress, path); if (path.size() <= 0) { // if the hyracks nc is not in the defined cluster path.add(Integer.MIN_VALUE); LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!"); } List<String> ncs = pathToNCs.get(path); if (ncs == null) { ncs = new ArrayList<String>(); pathToNCs.put(path, ncs); } ncs.add(NCs[i]); } final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>( new Comparator<List<Integer>>() { @Override public int compare(List<Integer> l1, List<Integer> l2) { int commonLength = Math.min(l1.size(), l2.size()); for (int i = 0; i < commonLength; i++) { Integer value1 = l1.get(i); Integer value2 = l2.get(i); int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0); if (cmp != 0) { return cmp; } } return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0); } }); for (int i = 0; i < workloads.length; i++) { if (workloads[i] < slotLimit) { List<Integer> path = new ArrayList<Integer>(); String ipAddress = InetAddress .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()) .getHostAddress(); topology.lookupNetworkTerminal(ipAddress, path); if (path.size() <= 0) { // if the hyracks nc is not in the defined cluster path.add(Integer.MIN_VALUE); } IntWritable availableSlot = availableIpsToSlots.get(path); if (availableSlot == null) { availableSlot = new IntWritable(slotLimit - workloads[i]); availableIpsToSlots.put(path, availableSlot); } else { availableSlot.set(slotLimit - workloads[i] + availableSlot.get()); } } } return new INcCollection() { @Override public String findNearestAvailableSlot(InputSplit split) { try { String[] locs = split.getLocations(); int minDistance = Integer.MAX_VALUE; List<Integer> currentCandidatePath = null; if (locs == null || locs.length > 0) { for (int j = 0; j < locs.length; j++) { /** * get all the IP addresses from the name */ InetAddress[] allIps = InetAddress.getAllByName(locs[j]); boolean inTopology = false; for (InetAddress ip : allIps) { List<Integer> splitPath = new ArrayList<Integer>(); boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath); if (!inCluster) { continue; } inTopology = true; /** * if the node controller exists */ List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath); if (candidatePath == null) { candidatePath = availableIpsToSlots.ceilingKey(splitPath); } if (candidatePath != null) { if (availableIpsToSlots.get(candidatePath).get() > 0) { int distance = distance(splitPath, candidatePath); if (minDistance > distance) { minDistance = distance; currentCandidatePath = candidatePath; } } } } if (!inTopology) { LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!"); /** * if the machine is not in the toplogy file */ List<Integer> candidatePath = null; for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { candidatePath = entry.getKey(); break; } } /** the split path is empty */ if (candidatePath != null) { if (availableIpsToSlots.get(candidatePath).get() > 0) { currentCandidatePath = candidatePath; } } } } } else { for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { currentCandidatePath = entry.getKey(); break; } } } if (currentCandidatePath != null && currentCandidatePath.size() > 0) { /** * Update the entry of the selected IP */ IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath); availableSlot.set(availableSlot.get() - 1); if (availableSlot.get() == 0) { availableIpsToSlots.remove(currentCandidatePath); } /** * Update the entry of the selected NC */ List<String> candidateNcs = pathToNCs.get(currentCandidatePath); for (String candidate : candidateNcs) { int ncIndex = ncNameToIndex.get(candidate); if (workloads[ncIndex] < slotLimit) { return candidate; } } } /** not scheduled */ return null; } catch (Exception e) { throw new IllegalStateException(e); } } @Override public int numAvailableSlots() { return availableIpsToSlots.size(); } private int distance(List<Integer> splitPath, List<Integer> candidatePath) { int commonLength = Math.min(splitPath.size(), candidatePath.size()); int distance = 0; for (int i = 0; i < commonLength; i++) { distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i)); } List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath : candidatePath; for (int i = commonLength; i < restElements.size(); i++) { distance = distance * 100 + Math.abs(restElements.get(i)); } return distance; } }; } catch (Exception e) { throw new IllegalStateException(e); } }
From source file:edu.uci.ics.pregelix.core.optimizer.DynamicOptimizer.java
License:Apache License
/** * initialize the load-per-machine map//from w ww . j a v a2s . co m * * @return the degree of parallelism * @throws HyracksException */ private int initializeLoadPerMachine() throws HyracksException { machineToDegreeOfParallelism.clear(); String[] locationConstraints = ClusterConfig.getLocationConstraint(); for (String loc : locationConstraints) { machineToDegreeOfParallelism.put(loc, new IntWritable(0)); } dop = 0; for (Entry<String, IntWritable> entry : machineToDegreeOfParallelism.entrySet()) { String loc = entry.getKey(); //reserve one core for heartbeat int load = (int) counterContext.getCounter(Counters.NUM_PROCESSOR, false).get(); //load = load > 3 ? load - 2 : load; IntWritable count = machineToDegreeOfParallelism.get(loc); count.set(load); dop += load; } return dop; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final List<S1> R, final List<S2> S, final ResultCollector2<S1, S2> output, Reporter reporter) throws IOException { LOG.debug("Start spatial join plan sweep algorithm !!!"); final RectangleID[] Rmbrs = new RectangleID[R.size()]; for (int i = 0; i < R.size(); i++) { Rmbrs[i] = new RectangleID(i, R.get(i).getMBR()); }/* w w w .j av a2 s . c om*/ final RectangleID[] Smbrs = new RectangleID[S.size()]; for (int i = 0; i < S.size(); i++) { Smbrs[i] = new RectangleID(i, S.get(i).getMBR()); } final IntWritable count = new IntWritable(); int filterCount = SpatialJoin_rectangles(Rmbrs, Smbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { //if (R.get(r1.id).isIntersected(S.get(r2.id))) { if (output != null) output.collect(R.get(r1.id), S.get(r2.id)); count.set(count.get() + 1); //} } }, reporter); LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get()); return count.get(); }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
/** * The general version of self join algorithm which works with arbitrary * shapes. First, it performs a filter step where it finds shapes with * overlapping MBRs. Second, an optional refine step can be executed to * return only shapes which actually overlap. * @param R - input set of shapes//from w w w . ja v a 2 s. co m * @param refine - Whether or not to run a refine step * @param output - output collector where the results are reported * @return - number of pairs returned by the planesweep algorithm * @throws IOException */ public static <S extends Shape> int SelfJoin_planeSweep(final S[] R, boolean refine, final OutputCollector<S, S> output, Progressable reporter) throws IOException { // Use a two-phase filter and refine approach // 1- Use MBRs as a first filter // 2- Use ConvexHull as a second filter // 3- Use the exact shape for refinement final RectangleID[] mbrs = new RectangleID[R.length]; for (int i = 0; i < R.length; i++) { mbrs[i] = new RectangleID(i, R[i].getMBR()); } if (refine) { final IntWritable count = new IntWritable(); int filterCount = SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (R[r1.id].isIntersected(R[r2.id])) { if (output != null) output.collect(R[r1.id], R[r2.id]); count.set(count.get() + 1); } } }, reporter); LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get()); return count.get(); } else { return SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (output != null) output.collect(R[r1.id], R[r2.id]); } }, reporter); } }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, final Path outPath, OperationsParams params) throws IOException, InterruptedException { Job job = Job.getInstance(params);//from w ww .j a v a2s. c o m final Configuration conf = job.getConfiguration(); final String sindex = conf.get("sindex"); // Start reading input file List<InputSplit> splits = new ArrayList<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); FileSystem inFs = inPath.getFileSystem(conf); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else { SpatialInputFormat3.setInputPaths(job, inPath); for (InputSplit s : inputFormat.getSplits(job)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = PartitionerReplicate.get(sindex); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); } setLocalIndexer(conf, sindex); final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, conf); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, conf); } else { throw new RuntimeException("Unknown record reader"); } final IntWritable partitionID = new IntWritable(); while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); if (replicate) { for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (final Shape s : shapes) { int pid = partitioner.overlapPartition(s); if (pid != -1) { partitionID.set(pid); recordWriter.write(partitionID, s); } } } } reader.close(); } recordWriter.close(null); // Write the WKT formatted master file Path masterPath = new Path(outPath, "_master." + sindex); FileSystem outFs = outPath.getFileSystem(params); Path wktPath = new Path(outPath, "_" + sindex + ".wkt"); PrintStream wktOut = new PrintStream(outFs.create(wktPath)); wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name"); Text tempLine = new Text2(); Partition tempPartition = new Partition(); LineReader in = new LineReader(outFs.open(masterPath)); while (in.readLine(tempLine) > 0) { tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); } in.close(); wktOut.close(); }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, Path outPath, OperationsParams params) throws IOException { JobConf job = new JobConf(params); String sindex = params.get("sindex"); Partitioner partitioner = createPartitioner(inPath, outPath, job, sindex); // Start reading input file Vector<InputSplit> splits = new Vector<InputSplit>(); final ShapeIterInputFormat inputFormat = new ShapeIterInputFormat(); FileSystem inFs = inPath.getFileSystem(params); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else {/*w ww .jav a2s . c om*/ ShapeIterInputFormat.addInputPath(job, inPath); for (InputSplit s : inputFormat.getSplits(job, 1)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = job.getBoolean("replicate", false); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, params); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<? extends Shape>> reader = inputFormat.getRecordReader(fsplit, job, null); Rectangle partitionMBR = reader.createKey(); Iterable<? extends Shape> shapes = reader.createValue(); final IntWritable partitionID = new IntWritable(); while (reader.next(partitionMBR, shapes)) { if (replicate) { // Replicate each shape to all overlapping partitions for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (Shape s : shapes) { partitionID.set(partitioner.overlapPartition(s)); recordWriter.write(partitionID, s); } } } reader.close(); } recordWriter.close(null); }