List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:edu.uci.ics.hyracks.hdfs.scheduler.RackAwareNcCollectionBuilder.java
License:Apache License
@Override public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) { try {//from w ww. ja va 2 s . c o m final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>(); for (int i = 0; i < NCs.length; i++) { List<Integer> path = new ArrayList<Integer>(); String ipAddress = InetAddress .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()) .getHostAddress(); topology.lookupNetworkTerminal(ipAddress, path); if (path.size() <= 0) { // if the hyracks nc is not in the defined cluster path.add(Integer.MIN_VALUE); LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!"); } List<String> ncs = pathToNCs.get(path); if (ncs == null) { ncs = new ArrayList<String>(); pathToNCs.put(path, ncs); } ncs.add(NCs[i]); } final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>( new Comparator<List<Integer>>() { @Override public int compare(List<Integer> l1, List<Integer> l2) { int commonLength = Math.min(l1.size(), l2.size()); for (int i = 0; i < commonLength; i++) { Integer value1 = l1.get(i); Integer value2 = l2.get(i); int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0); if (cmp != 0) { return cmp; } } return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0); } }); for (int i = 0; i < workloads.length; i++) { if (workloads[i] < slotLimit) { List<Integer> path = new ArrayList<Integer>(); String ipAddress = InetAddress .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()) .getHostAddress(); topology.lookupNetworkTerminal(ipAddress, path); if (path.size() <= 0) { // if the hyracks nc is not in the defined cluster path.add(Integer.MIN_VALUE); } IntWritable availableSlot = availableIpsToSlots.get(path); if (availableSlot == null) { availableSlot = new IntWritable(slotLimit - workloads[i]); availableIpsToSlots.put(path, availableSlot); } else { availableSlot.set(slotLimit - workloads[i] + availableSlot.get()); } } } return new INcCollection() { @Override public String findNearestAvailableSlot(InputSplit split) { try { String[] locs = split.getLocations(); int minDistance = Integer.MAX_VALUE; List<Integer> currentCandidatePath = null; if (locs == null || locs.length > 0) { for (int j = 0; j < locs.length; j++) { /** * get all the IP addresses from the name */ InetAddress[] allIps = InetAddress.getAllByName(locs[j]); boolean inTopology = false; for (InetAddress ip : allIps) { List<Integer> splitPath = new ArrayList<Integer>(); boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath); if (!inCluster) { continue; } inTopology = true; /** * if the node controller exists */ List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath); if (candidatePath == null) { candidatePath = availableIpsToSlots.ceilingKey(splitPath); } if (candidatePath != null) { if (availableIpsToSlots.get(candidatePath).get() > 0) { int distance = distance(splitPath, candidatePath); if (minDistance > distance) { minDistance = distance; currentCandidatePath = candidatePath; } } } } if (!inTopology) { LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!"); /** * if the machine is not in the toplogy file */ List<Integer> candidatePath = null; for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { candidatePath = entry.getKey(); break; } } /** the split path is empty */ if (candidatePath != null) { if (availableIpsToSlots.get(candidatePath).get() > 0) { currentCandidatePath = candidatePath; } } } } } else { for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) { if (entry.getValue().get() > 0) { currentCandidatePath = entry.getKey(); break; } } } if (currentCandidatePath != null && currentCandidatePath.size() > 0) { /** * Update the entry of the selected IP */ IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath); availableSlot.set(availableSlot.get() - 1); if (availableSlot.get() == 0) { availableIpsToSlots.remove(currentCandidatePath); } /** * Update the entry of the selected NC */ List<String> candidateNcs = pathToNCs.get(currentCandidatePath); for (String candidate : candidateNcs) { int ncIndex = ncNameToIndex.get(candidate); if (workloads[ncIndex] < slotLimit) { return candidate; } } } /** not scheduled */ return null; } catch (Exception e) { throw new IllegalStateException(e); } } @Override public int numAvailableSlots() { return availableIpsToSlots.size(); } private int distance(List<Integer> splitPath, List<Integer> candidatePath) { int commonLength = Math.min(splitPath.size(), candidatePath.size()); int distance = 0; for (int i = 0; i < commonLength; i++) { distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i)); } List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath : candidatePath; for (int i = commonLength; i < restElements.size(); i++) { distance = distance * 100 + Math.abs(restElements.get(i)); } return distance; } }; } catch (Exception e) { throw new IllegalStateException(e); } }
From source file:edu.uci.ics.pregelix.core.optimizer.DynamicOptimizer.java
License:Apache License
@Override public void setOptimizedLocationConstraints(JobSpecification spec, IOperatorDescriptor operator) { try {/*from w w w . jav a 2s. com*/ String[] constraints = new String[dop]; int index = 0; for (Entry<String, IntWritable> entry : machineToDegreeOfParallelism.entrySet()) { String loc = entry.getKey(); IntWritable count = entry.getValue(); for (int j = 0; j < count.get(); j++) { constraints[index++] = loc; } } PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, operator, constraints); } catch (Exception e) { throw new IllegalStateException(e); } }
From source file:edu.uci.ics.pregelix.core.optimizer.DynamicOptimizer.java
License:Apache License
@Override public IFileSplitProvider getOptimizedFileSplitProvider(String jobId, String indexName) { FileSplit[] fileSplits = new FileSplit[dop]; String[] stores = ClusterConfig.getStores(); int splitIndex = 0; for (Entry<String, IntWritable> entry : machineToDegreeOfParallelism.entrySet()) { String ncName = entry.getKey(); IntWritable count = entry.getValue(); for (int j = 0; j < count.get(); j++) { //cycles stores, each machine has the number of stores = the number of cores int storeCursor = j % stores.length; String st = stores[storeCursor]; FileSplit split = new FileSplit(ncName, st + File.separator + ncName + "-data" + File.separator + jobId + File.separator + indexName + (j / stores.length)); fileSplits[splitIndex++] = split; }/* w w w . j a v a 2s. c o m*/ } return new ConstantFileSplitProvider(fileSplits); }
From source file:edu.umd.cloud9.collection.clue.ClueWarcForwardIndex.java
License:Apache License
@Override public ClueWarcRecord getDocument(int docno) { long start = System.currentTimeMillis(); // Trap invalid docnos. if (docno < getFirstDocno() || docno > getLastDocno()) { return null; }//from w ww.j a v a2 s . com int idx = Arrays.binarySearch(docnos, docno); if (idx < 0) { idx = -idx - 2; } DecimalFormat df = new DecimalFormat("00000"); String file = collectionPath + "/part-" + df.format(fileno[idx]); LOG.info("fetching docno " + docno + ": seeking to " + offsets[idx] + " at " + file); try { SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path(file))); IntWritable key = new IntWritable(); ClueWarcRecord value = new ClueWarcRecord(); reader.seek(offsets[idx]); while (reader.next(key)) { if (key.get() == docno) { break; } } reader.getCurrentValue(value); reader.close(); long duration = System.currentTimeMillis() - start; LOG.info(" docno " + docno + " fetched in " + duration + "ms"); return value; } catch (IOException e) { e.printStackTrace(); } return null; }
From source file:edu.umd.cloud9.collection.clue.ClueWarcForwardIndex.java
License:Apache License
@Override public int getLastDocno() { if (lastDocno != -1) { return lastDocno; }/*from w w w . j a va 2s .c om*/ // Find the last entry, and then see all the way to the end of the collection. int idx = docnos.length - 1; String file = collectionPath + "/part-" + FORMAT5.format(fileno[idx]); try { SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path(file))); IntWritable key = new IntWritable(); reader.seek(offsets[idx]); while (reader.next(key)) ; lastDocno = key.get(); reader.close(); } catch (IOException e) { e.printStackTrace(); } return lastDocno; }
From source file:edu.umd.cloud9.collection.clue.ScanBlockCompressedSequenceFile.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 1) { System.out.println("usage: [SequenceFile]"); System.exit(-1);//www. j av a 2 s. c o m } List<Long> seekPoints = Lists.newArrayList(); long pos = -1; long prevPos = -1; int prevDocno = 0; Path path = new Path(args[0]); Configuration config = new Configuration(); SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path)); IntWritable key = new IntWritable(); ClueWarcRecord value = new ClueWarcRecord(); pos = reader.getPosition(); int cnt = 0; while (reader.next(key, value)) { if (prevPos != -1 && prevPos != pos) { System.out.println("## beginning of block at " + prevPos + ", docno:" + prevDocno); seekPoints.add(prevPos); } System.out.println("offset:" + pos + "\tdocno:" + key + "\tdocid:" + value.getDocid()); prevPos = pos; pos = reader.getPosition(); prevDocno = key.get(); cnt++; if (cnt > Integer.MAX_VALUE) break; } reader.close(); reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(path)); for (long p : seekPoints) { reader.seek(p); reader.next(key, value); System.out.println("seeking to pos " + p + "\tdocno:" + key + "\tdocid:" + value.getDocid()); } reader.close(); }
From source file:edu.umd.cloud9.collection.wikipedia.ExtractWikipediaAnchorTextWithWindow.java
License:Apache License
private void task3(String inputPath, String redirectPath, String outputPath) throws IOException { // caches/*from www. j a va 2s .c o m*/ IntWritable mapKey = new IntWritable(); HMapSIW mapVal = new HMapSIW(); HMapSIW tmpMap = new HMapSIW(); IntWritable target = new IntWritable(0); // read the redirect file MapFile.Reader redirectReader = null; MapFile.Writer mapWriter = null; MapFile.Reader mapReader = null; try { mapReader = new MapFile.Reader(new Path(inputPath + "/part-r-00000"), getConf()); redirectReader = new MapFile.Reader(new Path(redirectPath), getConf()); // TODO: Change code here mapWriter = new MapFile.Writer(getConf(), new Path(outputPath), MapFile.Writer.keyClass(IntWritable.class), MapFile.Writer.valueClass(HMapSIW.class)); while (mapReader.next(mapKey, mapVal)) { redirectReader.get(mapKey, target); if (target.get() > 0) { mapReader.get(target, tmpMap); if (!tmpMap.isEmpty()) { tmpMap.putAll(mapVal); mapWriter.append(target, tmpMap); } } else { mapWriter.append(mapKey, mapVal); } } } finally { if (mapWriter != null) mapWriter.close(); if (mapReader != null) mapReader.close(); if (redirectReader != null) redirectReader.close(); // Clean up intermediate data. FileSystem.get(getConf()).delete(new Path(inputPath), true); } }
From source file:edu.umd.cloud9.io.array.ArrayListOfIntsWritableTest.java
License:Apache License
@Test public void testIO() { ArrayListOfIntsWritable a1 = new ArrayListOfIntsWritable(); ArrayListOfIntsWritable a2 = new ArrayListOfIntsWritable(); ArrayListOfIntsWritable a3 = new ArrayListOfIntsWritable(); a1.add(1);// www . ja v a2 s. c o m a1.add(2); a2.add(3); SequenceFile.Writer w; Configuration conf = new Configuration(); try { w = SequenceFile.createWriter(conf, SequenceFile.Writer.file(new Path("tmp")), SequenceFile.Writer.keyClass(ArrayListOfIntsWritable.class), SequenceFile.Writer.valueClass(IntWritable.class)); w.append(a1, new IntWritable(1)); w.append(a2, new IntWritable(2)); w.append(a3, new IntWritable(3)); w.close(); } catch (IOException e) { e.printStackTrace(); assertTrue(false); } try { SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path("tmp"))); ArrayListOfIntsWritable key = (ArrayListOfIntsWritable) reader.getKeyClass().newInstance(); IntWritable value = (IntWritable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { int docno = value.get(); System.out.println(docno); System.out.println(key.toString()); System.out.println(key.size()); } reader.close(); } catch (IOException e) { e.printStackTrace(); assertTrue(false); } catch (InstantiationException e) { e.printStackTrace(); assertTrue(false); } catch (IllegalAccessException e) { assertTrue(false); e.printStackTrace(); } }
From source file:edu.umd.cloud9.io.array.ArrayListWritableTest.java
License:Apache License
@Test public void testSerialize3() throws IOException { //ArrayListWritable<Text> list = new ArrayListWritable<Text>(); ArrayListWritable<Writable> list = new ArrayListWritable<Writable>(); list.add(new Text("hi")); list.add(new IntWritable(1)); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); list.write(dataOut);/*from w w w . ja va 2 s . co m*/ ArrayListWritable<Writable> newList = new ArrayListWritable<Writable>(); newList.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); try { assertEquals(newList.get(0).toString(), "hi"); IntWritable i = (IntWritable) (newList.get(1)); assertEquals(i.get(), 1); assertTrue(false); } catch (Exception e) { } }
From source file:edu.umd.cloud9.io.HashMapWritableTest.java
License:Apache License
@Test public void testBasic() throws IOException { HashMapWritable<Text, IntWritable> map = new HashMapWritable<Text, IntWritable>(); map.put(new Text("hi"), new IntWritable(5)); map.put(new Text("there"), new IntWritable(22)); Text key;/*from w w w.j a v a 2s . co m*/ IntWritable value; assertEquals(map.size(), 2); key = new Text("hi"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 5); value = map.remove(key); assertEquals(map.size(), 1); key = new Text("there"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 22); }