List of usage examples for com.google.common.collect HashMultiset create
public static <E> HashMultiset<E> create()
From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFileTwoDimensions.java
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); /* Analyzing Query here */ String hiveQuery = job.get(HIVE_QUERY, "Kossher"); QueryType queryType = QueryType.NOLIMIT; // default mode /*/*from www .j a v a 2 s . co m*/ if(hiveQuery.contains("where") || hiveQuery.contains("WHERE")) { if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) { queryType = QueryType.TIME; } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) { queryType = QueryType.LAT; } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) { queryType = QueryType.LON; } } */ float latTopLimit = -1; float latBottomLimit = -1; float lonTopLimit = -1; float lonBottomLimit = -1; String[] querySplitted = hiveQuery.split(" "); for (int i = 0; i < querySplitted.length; i++) { if (querySplitted[i].equals("lat") || querySplitted[i].equals("LAT")) { if (querySplitted[i + 1].equals(">")) { latBottomLimit = Float.valueOf(querySplitted[i + 2]); } else if (querySplitted[i + 1].equals("<")) { latTopLimit = Float.valueOf(querySplitted[i + 2]); } } if (querySplitted[i].equals("lon") || querySplitted[i].equals("LON")) { if (querySplitted[i + 1].equals(">")) { lonBottomLimit = Float.valueOf(querySplitted[i + 2]); } else if (querySplitted[i + 1].equals("<")) { lonTopLimit = Float.valueOf(querySplitted[i + 2]); } } } System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] " + "latTopLimit=" + latTopLimit + ",latBottomLimit=" + latBottomLimit + ",lonTopLimit=" + lonTopLimit + ",lonBottomLimit=" + lonBottomLimit); System.out.println("[SAMANPruner] beginning of getSplits"); job.setLong(NUM_INPUT_FILES, files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } // generate splits ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits); ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>(); NetworkTopology clusterMap = new NetworkTopology(); for (FileStatus file : files) { Path path = file.getPath(); int fileIndex = 0; int dimIndex = 0; String[] parts = path.getName().split("-"); dimIndex = Integer.valueOf(parts[1]); //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName()); System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName()); FileSystem fs = path.getFileSystem(job); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); netInfo = getNetCDFInfo(path, fs, job); // First decide which which files should be considered as the base to be read int latTopTemp = -1; if (latTopLimit == -1) { latTopTemp = result.latLength; } else { latTopTemp = Math.min(result.latLength, (int) latTopLimit); } int latBottomTemp = -1; if (latBottomLimit == -1) { latBottomTemp = 0; } else { latBottomTemp = Math.max(0, (int) latBottomLimit); } int lonTopTemp = -1; if (lonTopLimit == -1) { lonTopTemp = result.lonLength; } else { lonTopTemp = Math.min(result.lonLength, (int) lonTopLimit); } int lonBottomTemp = -1; if (lonBottomLimit == -1) { lonBottomTemp = 0; } else { lonBottomTemp = Math.min(0, (int) lonBottomLimit); } if ((latTopTemp - latBottomTemp) * 4 * result.lonLength * result.timeLength < (lonTopTemp - lonBottomTemp) * 4 * result.latLength * result.timeLength) { chooseLat = true; } else { chooseLat = false; } System.out.println("[SAMAN][NetCDFInputFormat][getSplits] chooseLat = " + chooseLat); if (chooseLat) { if (!path.getName().contains("lat")) continue; } else { if (!path.getName().contains("lon")) continue; } long recStart = netInfo.recStart; long[] chunkStarts = netInfo.chunkStarts; long smallSize = netInfo.smallRecSize; long recSize = netInfo.recSize; long splitSize = 0; int chunkIndex = 0; long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize; long thisStart = recStart; // file position long thisChunk = 0; long blockNo = 1; while (bytesRemaining > 0) { while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) { chunkIndex++; } long tempStart = thisStart; long endChunk; if (chunkIndex >= chunkStarts.length) { splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize; } else { splitSize = chunkStarts[chunkIndex] - thisStart - smallSize; thisStart = chunkStarts[chunkIndex]; } endChunk = chunkIndex; blockNo++; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+ // ", endChunk="+endChunk+", blockNo="+blockNo); System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo); String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap); NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts); if (chooseLat) { if (latTopTemp < thisChunk) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if (latBottomTemp > endChunk) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put(split, splitHosts); // Put the nodes with the specified split into the node to block set for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } // For the test, we would assign everything statically. if (latBottomLimit > thisChunk) { System.out .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = " + latBottomLimit); split.getFileSplit().startChunk.add((long) latBottomLimit); } else { split.getFileSplit().startChunk.add(thisChunk); } if (latTopLimit < endChunk) { System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = " + latTopLimit); split.getFileSplit().endChunk.add((long) latTopLimit); } else { split.getFileSplit().endChunk.add(endChunk); } split.getFileSplit().secondDimStartChunk.add((long) lonBottomTemp); split.getFileSplit().secondDimEndChunk.add((long) lonTopTemp); } if (!chooseLat) { if (lonTopTemp < thisChunk) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if (lonBottomTemp > endChunk) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put(split, splitHosts); // Put the nodes with the specified split into the node to block set for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } if (lonBottomLimit > thisChunk) { System.out .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = " + latBottomLimit); split.getFileSplit().startChunk.add((long) lonBottomLimit); } else { split.getFileSplit().startChunk.add(thisChunk); } if (lonTopLimit < endChunk) { System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = " + latTopLimit); split.getFileSplit().endChunk.add((long) lonTopLimit); } else { split.getFileSplit().endChunk.add(endChunk); } split.getFileSplit().secondDimStartChunk.add((long) latBottomTemp); split.getFileSplit().secondDimEndChunk.add((long) latTopTemp); } splits.add(split); bytesRemaining -= splitSize; thisChunk = endChunk; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); //splits.add(new FileSplit(path, 0, length, splitHosts)); } else { //Create empty hosts array for zero length files //splits.add(new FileSplit(path, 0, length, new String[0])); } } // Now it's time to merge non-complete splits. // Check if each split has enough space to include another split too Set<String> completedNodes = new HashSet<String>(); ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>(); long curSplitSize = 0; Multiset<String> splitsPerNode = HashMultiset.create(); for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next(); String node = one.getKey(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node); // Skip the node if it has previously been marked as completed. if (completedNodes.contains(node)) { continue; } Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue(); // for each block, copy it into validBlocks. Delete it from // blockToNodes so that the same block does not appear in // two different splits. Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator(); while (oneBlockIter.hasNext()) { NetCDFFileSplit oneblock = oneBlockIter.next(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: " + oneblock.getFileSplit().getPath()); // Remove all blocks which may already have been assigned to other // splits. if (!blockToNodes.containsKey(oneblock)) { oneBlockIter.remove(); continue; } validBlocks.add(oneblock); if (chooseLat) { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength * netInfo.timeLength; } else { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength * netInfo.timeLength; } blockToNodes.remove(oneblock); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = " + curSplitSize); //curSplitSize += singleSplitSize; System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "Added to valid blocks!"); // if the accumulated split size exceeds the maximum, then // create this split. if (blockSize != 0 && curSplitSize >= blockSize) { // create an input split and add it to the splits array addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); //totalLength -= curSplitSize; System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "addCreatedSplit called!"); curSplitSize = 0; splitsPerNode.add(node); // Remove entries from blocksInNode so that we don't walk these // again. //blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); // Done creating a single split for this node. Move on to the next // node so that splits are distributed across nodes. //break; } } if (!validBlocks.isEmpty()) { System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!"); addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); curSplitSize = 0; splitsPerNode.add(node); blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); } } Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet(); Iterator itrSingle = singleSplitsSet.iterator(); while (itrSingle.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next(); addCreatedSingleSplit(finalSplits, temp.getLocations(), temp); } Iterator itr = finalSplits.iterator(); while (itr.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itr.next(); String[] locations = temp.getFileSplit().getLocations(); String locationsString = ""; for (int i = 0; i < locations.length; i++) locationsString += locations[i]; String pathsString = ""; List<Path> paths = temp.getFileSplit().getPaths(); for (Path path : paths) pathsString += path.getName() + ","; String startsString = ""; List<Long> starts = temp.getFileSplit().startChunk; for (Long start : starts) startsString += (start + ","); String endsString = ""; List<Long> ends = temp.getFileSplit().endChunk; for (Long end : ends) endsString += (end + ","); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations=" + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + "," + "ends=" + endsString + ","); } return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]); }
From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java
public void addSchedulingOppurtunity(Priority p, int count) { if (schedulingOpportunitiesToAdd == null) { schedulingOpportunitiesToAdd = HashMultiset.create(); }//from ww w .ja v a 2 s . c o m schedulingOpportunitiesToAdd.setCount(p, count); }
From source file:it.units.malelab.ege.distributed.master.UIRunnable.java
private void inc(String keyName, Object keyValue, JobInfo.Status status, Map<String, Map<Object, Multiset<JobInfo.Status>>> map) { Map<Object, Multiset<JobInfo.Status>> valueCounts = map.get(keyName); if (valueCounts == null) { valueCounts = new TreeMap<>(); map.put(keyName, valueCounts);//from w w w .ja v a 2 s. co m } Multiset<JobInfo.Status> statuses = valueCounts.get(keyValue); if (statuses == null) { statuses = HashMultiset.create(); valueCounts.put(keyValue, statuses); } statuses.add(status); }
From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java
public void addReReservation(Priority p) { if (reReservations == null) { reReservations = HashMultiset.create(); } reReservations.add(p); }
From source file:i5.las2peer.services.recommender.librec.data.FilmTrustDataDAO.java
/** * print out distributions of the dataset <br/> * //from www .ja v a 2s . c om * <ul> * <li>#users (y) -- #ratings (x) (that are issued by each user)</li> * <li>#items (y) -- #ratings (x) (that received by each item)</li> * </ul> */ public void printDistr(boolean isWriteOut) throws Exception { if (rateMatrix == null) readData(); // count how many users give the same number of ratings Multiset<Integer> numURates = HashMultiset.create(); // count how many items recieve the same number of ratings Multiset<Integer> numIRates = HashMultiset.create(); for (int r = 0, rm = rateMatrix.numRows; r < rm; r++) { int numRates = rateMatrix.rowSize(r); numURates.add(numRates); } for (int c = 0, cm = rateMatrix.numColumns; c < cm; c++) { int numRates = rateMatrix.columnSize(c); numIRates.add(numRates); } String ustrs = Strings.toString(numURates); String istrs = Strings.toString(numIRates); if (isWriteOut) { FileIO.writeString(FileIO.desktop + "user-distr.txt", ustrs); FileIO.writeString(FileIO.desktop + "item-distr.txt", istrs); } else { Logs.debug("#ratings (x) ~ #users (y): \n" + ustrs); Logs.debug("#ratings (x) ~ #items (y): \n" + istrs); } Logs.debug("Done!"); }
From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFile.java
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); /* Analyzing Query here */ String hiveQuery = job.get(HIVE_QUERY, "Kossher"); QueryType queryType = QueryType.NOLIMIT; // default mode if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) { if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) { queryType = QueryType.TIME;//from w w w. ja v a 2 s . c om } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) { queryType = QueryType.LAT; } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) { queryType = QueryType.LON; } } float topLimit = -1; float bottomLimit = -1; if (queryType != QueryType.NOLIMIT) { if (hiveQuery.contains("<")) { String[] querySplitted = hiveQuery.split(" "); int i = Arrays.asList(querySplitted).indexOf("<"); topLimit = Float.valueOf(querySplitted[i + 1]); } if (hiveQuery.contains(">")) { String[] querySplitted = hiveQuery.split(" "); int i = Arrays.asList(querySplitted).indexOf(">"); bottomLimit = Float.valueOf(querySplitted[i + 1]); } } //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString() // +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit ); //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString() // + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit); /* End Analyzing Query here */ System.out.println("[SAMANPruner] beginning of getSplits"); LOG.info("[SAMANPruner] beginning of getSplits"); //System.out.println( "[SAMAN] " + files.length ); //LOG.info( "[SAMAN] " + files.length ); // Save the number of input files in the job-conf job.setLong(NUM_INPUT_FILES, files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } //long minSize = Math.max(job.getLong("mapred.min.split.size", 1), // minSplitSize); // generate splits ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits); ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>(); NetworkTopology clusterMap = new NetworkTopology(); for (FileStatus file : files) { Path path = file.getPath(); int fileIndex = 0; int dimIndex = 0; if (queryType == QueryType.TIME || queryType == QueryType.NOLIMIT) { if (path.getName().contains("lat") || path.getName().contains("lon")) continue; } else if (queryType == QueryType.LAT) { if (!path.getName().contains("lat")) continue; } else if (queryType == QueryType.LON) { if (!path.getName().contains("lon")) continue; } if (queryType == QueryType.TIME) { String[] parts = path.getName().split("-"); fileIndex = Integer.valueOf(parts[1]); } else if (queryType == QueryType.LAT || queryType == QueryType.LON) { if (path.getName().contains("_")) { String[] parts = path.getName().split("_"); fileIndex = Integer.valueOf(parts[2]); dimIndex = Integer.valueOf(parts[0].substring(7)); } else { //dimIndex = Integer.valueOf(path.getName().substring(7)); String[] parts = path.getName().split("-"); dimIndex = Integer.valueOf(parts[1]); } } //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName()); System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName()); FileSystem fs = path.getFileSystem(job); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); netInfo = getNetCDFInfo(path, fs, job); long recStart = netInfo.recStart; long[] chunkStarts = netInfo.chunkStarts; long smallSize = netInfo.smallRecSize; long recSize = netInfo.recSize; long splitSize = 0; int chunkIndex = 0; long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize; long thisStart = recStart; // file position long thisChunk = 0; long blockNo = 1; long numChunksPerKey = 0; if (queryType == QueryType.LAT) { long chunkSize = netInfo.timeLength * netInfo.lonLength * 4; numChunksPerKey = blockSize / chunkSize; } else if (queryType == QueryType.LON) { long chunkSize = netInfo.timeLength * netInfo.latLength * 4; numChunksPerKey = blockSize / chunkSize; } System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey); //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts + // ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining + // ", thisStart = " + thisStart); //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts + // ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining + // ", thisStart = " + thisStart); while (bytesRemaining > 0) { while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) { chunkIndex++; } long tempStart = thisStart; long endChunk; if (chunkIndex >= chunkStarts.length) { splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize; //bytesRemaining should be 0 after this round } else { splitSize = chunkStarts[chunkIndex] - thisStart - smallSize; thisStart = chunkStarts[chunkIndex]; } endChunk = chunkIndex; blockNo++; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+ // ", endChunk="+endChunk+", blockNo="+blockNo); System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo); String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap); NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts); if (queryType == QueryType.TIME) { if ((topLimit < thisChunk + (fileIndex * netInfo.timeLength)) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk + (fileIndex * netInfo.timeLength)) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put(split, splitHosts); // Put the nodes with the specified split into the node to block set System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set"); for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!"); split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); } else if (queryType == QueryType.LAT || queryType == QueryType.LON) { //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = " // + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex ); /* if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){ bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){ bytesRemaining -= splitSize; thisChunk = endChunk; continue; }*/ if (topLimit < thisChunk && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if (bottomLimit > endChunk && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } /* if ((topLimit < thisChunk) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } */ //split.getNetCDFFileSplit().endChunk = (long)topLimit; /* split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); */ // Put the block into the block to node set blockToNodes.put(split, splitHosts); // Put the nodes with the specified split into the node to block set for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } // For the test, we would assign everything statically. if (bottomLimit > thisChunk && (bottomLimit != -1)) { System.out .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = " + bottomLimit); split.getFileSplit().startChunk.add((long) bottomLimit); } else { split.getFileSplit().startChunk.add(thisChunk); } if (topLimit < endChunk && (topLimit != -1)) { System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = " + endChunk); split.getFileSplit().endChunk.add((long) topLimit); } else { split.getFileSplit().endChunk.add(endChunk); } } else { if ((topLimit < thisChunk) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put(split, splitHosts); // Put the nodes with the specified split into the node to block set for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); } splits.add(split); bytesRemaining -= splitSize; thisChunk = endChunk; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); //splits.add(new FileSplit(path, 0, length, splitHosts)); } else { //Create empty hosts array for zero length files //splits.add(new FileSplit(path, 0, length, new String[0])); } } // Now it's time to merge non-complete splits. // Check if each split has enough space to include another split too Set<String> completedNodes = new HashSet<String>(); ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>(); long curSplitSize = 0; Multiset<String> splitsPerNode = HashMultiset.create(); for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next(); String node = one.getKey(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node); // Skip the node if it has previously been marked as completed. if (completedNodes.contains(node)) { continue; } Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue(); // for each block, copy it into validBlocks. Delete it from // blockToNodes so that the same block does not appear in // two different splits. Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator(); while (oneBlockIter.hasNext()) { NetCDFFileSplit oneblock = oneBlockIter.next(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: " + oneblock.getFileSplit().getPath()); // Remove all blocks which may already have been assigned to other // splits. if (!blockToNodes.containsKey(oneblock)) { oneBlockIter.remove(); continue; } validBlocks.add(oneblock); if (queryType == QueryType.LAT) { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength * netInfo.timeLength; } else if (queryType == QueryType.LON) { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength * netInfo.timeLength; } else if (queryType == QueryType.TIME) { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength * netInfo.lonLength; } else { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength * netInfo.lonLength; } blockToNodes.remove(oneblock); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = " + curSplitSize); //curSplitSize += singleSplitSize; System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "Added to valid blocks!"); // if the accumulated split size exceeds the maximum, then // create this split. if (blockSize != 0 && curSplitSize >= blockSize) { // create an input split and add it to the splits array addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); //totalLength -= curSplitSize; System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "addCreatedSplit called!"); curSplitSize = 0; splitsPerNode.add(node); // Remove entries from blocksInNode so that we don't walk these // again. //blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); // Done creating a single split for this node. Move on to the next // node so that splits are distributed across nodes. //break; } } if (!validBlocks.isEmpty()) { System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!"); addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); curSplitSize = 0; splitsPerNode.add(node); blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); } } Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet(); Iterator itrSingle = singleSplitsSet.iterator(); while (itrSingle.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next(); addCreatedSingleSplit(finalSplits, temp.getLocations(), temp); } Iterator itr = finalSplits.iterator(); while (itr.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itr.next(); String[] locations = temp.getFileSplit().getLocations(); String locationsString = ""; for (int i = 0; i < locations.length; i++) locationsString += locations[i]; String pathsString = ""; List<Path> paths = temp.getFileSplit().getPaths(); for (Path path : paths) pathsString += path.getName() + ","; String startsString = ""; List<Long> starts = temp.getFileSplit().startChunk; for (Long start : starts) startsString += (start + ","); String endsString = ""; List<Long> ends = temp.getFileSplit().endChunk; for (Long end : ends) endsString += (end + ","); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations=" + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + "," + "ends=" + endsString + ","); } return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]); }
From source file:BibTex.IOmethods.java
public void writeJournalsPerCategories(Set<BibTexRef> refs) throws IOException { JournalAbbreviationsMapping jmap = new JournalAbbreviationsMapping(); jmap.loadMap();/* w ww .java2 s . c om*/ BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "journals per categories.csv")); StringBuilder sb = new StringBuilder(); String sep = "|"; //creation of 2 convenient data structures for I/O Map<String, Multiset<String>> categoriesToJournals = new TreeMap(); List<String> categoryNames = new ArrayList(); for (BibTexRef ref : refs) { Set<Category> categories = ref.getCategories(); String title = ref.getJournal(); if (title == null || title.isEmpty()) { continue; } title = title.toLowerCase(); Set<String> abbrev = (Set<String>) jmap.getJournalsToAbbrev().get(title); if (abbrev == null || abbrev.isEmpty()) { abbrev = new HashSet(); abbrev.add(title); } String abbreviation = abbrev.iterator().next(); for (Category category : categories) { if (!categoryNames.contains(category.getCategoryName())) { categoryNames.add(category.getCategoryName()); } if (categoriesToJournals.containsKey(category.getCategoryName())) { categoriesToJournals.get(category.getCategoryName()).add(abbreviation); } else { Multiset<String> journalsForOneCategory = HashMultiset.create(); journalsForOneCategory.add(abbreviation); categoriesToJournals.put(category.getCategoryName(), journalsForOneCategory); } } } Collections.sort(categoryNames); //writing of the first line of the csv: headers of the categories. for (String categoryName : categoryNames) { sb.append(categoryName); sb.append(sep); } sb.append("\n"); //writing of all subsequent lines: one per year int countCategoriesdone = 0; boolean continueLoop = true; while (continueLoop) { for (Iterator<String> it = categoriesToJournals.keySet().iterator(); it.hasNext();) { String category = it.next(); Multiset<String> journalsForOneCategory = categoriesToJournals.get(category); Iterator<String> journalsIterator = Multisets.copyHighestCountFirst(journalsForOneCategory) .elementSet().iterator(); if (journalsIterator.hasNext()) { String journal = journalsIterator.next(); sb.append(journal).append(" (").append(journalsForOneCategory.count(journal)).append(")") .append(sep); journalsForOneCategory.remove(journal, journalsForOneCategory.count(journal)); } else { sb.append(sep); } } sb.append("\n"); for (String cat : categoriesToJournals.keySet()) { if (categoriesToJournals.get(cat).isEmpty()) { countCategoriesdone++; } } if (countCategoriesdone == categoryNames.size()) { continueLoop = false; } else { countCategoriesdone = 0; } } bw.write(sb.toString()); bw.close(); }
From source file:org.apache.hadoop.mapred.NetCDFInputFormatPartToMemoryMultiSplit.java
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); LOG.info("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); System.out.println("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: " + job.get(HIVE_QUERY, "Kossher")); /* Analyzing Query here */ String hiveQuery = job.get(HIVE_QUERY, "Kossher"); QueryType queryType = QueryType.NOLIMIT; // default mode if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) { if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) { queryType = QueryType.TIME;/* w ww . ja v a2 s. com*/ } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) { queryType = QueryType.LAT; } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) { queryType = QueryType.LON; } } float topLimit = -1; float bottomLimit = -1; if (queryType != QueryType.NOLIMIT) { if (hiveQuery.contains("<")) { String[] querySplitted = hiveQuery.split(" "); int i = Arrays.asList(querySplitted).indexOf("<"); topLimit = Float.valueOf(querySplitted[i + 1]); } if (hiveQuery.contains(">")) { String[] querySplitted = hiveQuery.split(" "); int i = Arrays.asList(querySplitted).indexOf(">"); bottomLimit = Float.valueOf(querySplitted[i + 1]); } } //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString() // +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit ); //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString() // + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit); /* End Analyzing Query here */ System.out.println("[SAMANPruner] beginning of getSplits"); LOG.info("[SAMANPruner] beginning of getSplits"); //System.out.println( "[SAMAN] " + files.length ); //LOG.info( "[SAMAN] " + files.length ); // Save the number of input files in the job-conf job.setLong(NUM_INPUT_FILES, files.length); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } //long minSize = Math.max(job.getLong("mapred.min.split.size", 1), // minSplitSize); // generate splits ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits); ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>(); NetworkTopology clusterMap = new NetworkTopology(); for (FileStatus file : files) { Path path = file.getPath(); int fileIndex = 0; int dimIndex = 0; //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName()); System.out.println( "[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] File name is : " + path.getName()); FileSystem fs = path.getFileSystem(job); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); netInfo = getNetCDFInfo(path, fs, job); long recStart = netInfo.recStart; long[] chunkStarts = netInfo.chunkStarts; long smallSize = netInfo.smallRecSize; long recSize = netInfo.recSize; long splitSize = 0; int chunkIndex = 0; long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize; long thisStart = recStart; // file position long thisChunk = 0; long blockNo = 1; long numChunksPerKey = 0; if (queryType == QueryType.LAT) { long chunkSize = netInfo.timeLength * netInfo.lonLength * 4; numChunksPerKey = blockSize / chunkSize; } else if (queryType == QueryType.LON) { long chunkSize = netInfo.timeLength * netInfo.latLength * 4; numChunksPerKey = blockSize / chunkSize; } System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey); //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts + // ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining + // ", thisStart = " + thisStart); //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts + // ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining + // ", thisStart = " + thisStart); while (bytesRemaining > 0) { while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) { chunkIndex++; } long tempStart = thisStart; long endChunk; if (chunkIndex >= chunkStarts.length) { splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize; //bytesRemaining should be 0 after this round } else { splitSize = chunkStarts[chunkIndex] - thisStart - smallSize; thisStart = chunkStarts[chunkIndex]; } endChunk = chunkIndex; blockNo++; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+ // ", endChunk="+endChunk+", blockNo="+blockNo); System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo); String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap); NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts); split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); if (queryType == QueryType.TIME) { split.getFileSplit().timeStartLimit.add((long) bottomLimit); split.getFileSplit().timeEndLimit.add((long) topLimit); split.getFileSplit().latStartLimit.add((long) -1); split.getFileSplit().latEndLimit.add((long) -1); split.getFileSplit().lonStartLimit.add((long) -1); split.getFileSplit().lonEndLimit.add((long) -1); } else if (queryType == QueryType.LAT) { split.getFileSplit().timeStartLimit.add((long) -1); split.getFileSplit().timeEndLimit.add((long) -1); split.getFileSplit().latStartLimit.add((long) bottomLimit); split.getFileSplit().latEndLimit.add((long) topLimit); split.getFileSplit().lonStartLimit.add((long) -1); split.getFileSplit().lonEndLimit.add((long) -1); } else if (queryType == QueryType.LON) { split.getFileSplit().timeStartLimit.add((long) -1); split.getFileSplit().timeEndLimit.add((long) -1); split.getFileSplit().latStartLimit.add((long) -1); split.getFileSplit().latEndLimit.add((long) -1); split.getFileSplit().lonStartLimit.add((long) bottomLimit); split.getFileSplit().lonEndLimit.add((long) topLimit); } blockToNodes.put(split, splitHosts); System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set"); for (int i = 0; i < splitHosts.length; i++) { Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if (splitList == null) { splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put(splitHosts[i], splitList); } splitList.add(split); } /* if( queryType == QueryType.TIME ) { if ((topLimit < thisChunk + (fileIndex*netInfo.timeLength)) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk + (fileIndex*netInfo.timeLength)) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put( split, splitHosts ); // Put the nodes with the specified split into the node to block set System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set" ); for( int i = 0; i < splitHosts.length; i++ ){ Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if( splitList == null ){ splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put( splitHosts[i], splitList ); } splitList.add( split ); } System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!" ); split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); } else if( queryType == QueryType.LAT || queryType == QueryType.LON ){ //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = " // + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex ); */ /* if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){ bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){ bytesRemaining -= splitSize; thisChunk = endChunk; continue; }*/ /* if (topLimit < thisChunk && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if (bottomLimit > endChunk && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } */ /* if ((topLimit < thisChunk) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } */ //split.getNetCDFFileSplit().endChunk = (long)topLimit; /* split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); */ // Put the block into the block to node set /* blockToNodes.put( split, splitHosts ); // Put the nodes with the specified split into the node to block set for( int i = 0; i < splitHosts.length; i++ ){ Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if( splitList == null ){ splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put( splitHosts[i], splitList ); } splitList.add( split ); } // For the test, we would assign everything statically. if( bottomLimit > thisChunk && (bottomLimit != -1) ){ System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = " + bottomLimit ); split.getFileSplit().startChunk.add((long)bottomLimit); }else{ split.getFileSplit().startChunk.add(thisChunk); } if( topLimit < endChunk && (topLimit != -1) ){ System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = " + endChunk ); split.getFileSplit().endChunk.add((long)topLimit); }else{ split.getFileSplit().endChunk.add(endChunk); } } else { if ((topLimit < thisChunk) && (topLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } if ((bottomLimit > endChunk) && (bottomLimit != -1)) { bytesRemaining -= splitSize; thisChunk = endChunk; continue; } blockToNodes.put( split, splitHosts ); // Put the nodes with the specified split into the node to block set for( int i = 0; i < splitHosts.length; i++ ){ Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]); if( splitList == null ){ splitList = new LinkedHashSet<NetCDFFileSplit>(); nodeToBlocks.put( splitHosts[i], splitList ); } splitList.add( split ); } split.getFileSplit().startChunk.add(thisChunk); split.getFileSplit().endChunk.add(endChunk); } */ splits.add(split); bytesRemaining -= splitSize; thisChunk = endChunk; //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk ); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); //splits.add(new FileSplit(path, 0, length, splitHosts)); } else { //Create empty hosts array for zero length files //splits.add(new FileSplit(path, 0, length, new String[0])); } } // Now it's time to merge non-complete splits. // Check if each split has enough space to include another split too Set<String> completedNodes = new HashSet<String>(); ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>(); long curSplitSize = 0; Multiset<String> splitsPerNode = HashMultiset.create(); for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next(); String node = one.getKey(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node); // Skip the node if it has previously been marked as completed. if (completedNodes.contains(node)) { continue; } Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue(); // for each block, copy it into validBlocks. Delete it from // blockToNodes so that the same block does not appear in // two different splits. Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator(); while (oneBlockIter.hasNext()) { NetCDFFileSplit oneblock = oneBlockIter.next(); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: " + oneblock.getFileSplit().getPath()); // Remove all blocks which may already have been assigned to other // splits. if (!blockToNodes.containsKey(oneblock)) { oneBlockIter.remove(); continue; } validBlocks.add(oneblock); if (queryType == QueryType.LAT) { curSplitSize += (oneblock.getFileSplit().latEndLimit.get(0) - oneblock.getFileSplit().latStartLimit.get(0)) * 4 * netInfo.lonLength * netInfo.timeLength; } else if (queryType == QueryType.LON) { curSplitSize += (oneblock.getFileSplit().lonEndLimit.get(0) - oneblock.getFileSplit().lonStartLimit.get(0)) * 4 * netInfo.latLength * netInfo.timeLength; } else if (queryType == QueryType.TIME) { curSplitSize += (oneblock.getFileSplit().timeEndLimit.get(0) - oneblock.getFileSplit().timeStartLimit.get(0)) * 4 * netInfo.latLength * netInfo.lonLength; } else { curSplitSize += (oneblock.getFileSplit().endChunk.get(0) - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength * netInfo.lonLength; } blockToNodes.remove(oneblock); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = " + curSplitSize); //curSplitSize += singleSplitSize; //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + // "Added to valid blocks!" ); // if the accumulated split size exceeds the maximum, then // create this split. if (blockSize != 0 && curSplitSize >= blockSize) { // create an input split and add it to the splits array addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); //totalLength -= curSplitSize; //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + // "addCreatedSplit called!" ); curSplitSize = 0; splitsPerNode.add(node); // Remove entries from blocksInNode so that we don't walk these // again. //blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); // Done creating a single split for this node. Move on to the next // node so that splits are distributed across nodes. //break; } } if (!validBlocks.isEmpty()) { //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!" ); addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks); curSplitSize = 0; splitsPerNode.add(node); blocksInCurrentNode.removeAll(validBlocks); validBlocks.clear(); } } Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet(); Iterator itrSingle = singleSplitsSet.iterator(); while (itrSingle.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next(); addCreatedSingleSplit(finalSplits, temp.getLocations(), temp); } Iterator itr = finalSplits.iterator(); while (itr.hasNext()) { NetCDFFileSplit temp = (NetCDFFileSplit) itr.next(); String[] locations = temp.getFileSplit().getLocations(); String locationsString = ""; for (int i = 0; i < locations.length; i++) locationsString += locations[i]; String pathsString = ""; List<Path> paths = temp.getFileSplit().getPaths(); for (Path path : paths) pathsString += path.getName() + ","; String startsString = ""; List<Long> starts = temp.getFileSplit().startChunk; for (Long start : starts) startsString += (start + ","); String endsString = ""; List<Long> ends = temp.getFileSplit().endChunk; for (Long end : ends) endsString += (end + ","); System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations=" + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + "," + "ends=" + endsString + ","); } return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]); }
From source file:org.datanucleus.store.types.guava.wrappers.backed.Multiset.java
/** * The writeReplace method is called when ObjectOutputStream is preparing * to write the object to the stream. The ObjectOutputStream checks * whether the class defines the writeReplace method. If the method is * defined, the writeReplace method is called to allow the object to * designate its replacement in the stream. The object returned should be * either of the same type as the object passed in or an object that when * read and resolved will result in an object of a type that is compatible * with all references to the object.// ww w . jav a2 s . c o m * * @return the replaced object * @throws ObjectStreamException if an error occurs */ protected Object writeReplace() throws ObjectStreamException { if (useCache) { loadFromStore(); HashMultiset multi = HashMultiset.create(); multi.addAll(delegate); return multi; } // TODO Cater for non-cached collection, load elements in a DB call. HashMultiset multi = HashMultiset.create(); multi.addAll(delegate); return multi; }
From source file:org.apache.twill.internal.appmaster.ApplicationMasterService.java
/** * Handling containers that are completed. *///from w w w. j a v a 2 s . c o m private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) { Multiset<String> restartRunnables = HashMultiset.create(); for (YarnContainerStatus status : completedContainersStatuses) { LOG.info("Container {} completed with {}:{}.", status.getContainerId(), status.getState(), status.getDiagnostics()); runningContainers.handleCompleted(status, restartRunnables); } for (Multiset.Entry<String> entry : restartRunnables.entrySet()) { LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount()); runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(), entry.getCount())); } // For all runnables that needs to re-request for containers, update the expected count timestamp // so that the EventHandler would triggered with the right expiration timestamp. expectedContainers.updateRequestTime(restartRunnables.elementSet()); }