Example usage for com.google.common.collect HashMultiset create

Introduction

In this page you can find the example usage for com.google.common.collect HashMultiset create.

Prototype

public static <E> HashMultiset<E> create()

Source Link

Document

Creates a new, empty HashMultiset using the default initial capacity.

Usage

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFileTwoDimensions.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    /*/*from  www .j a v a  2  s  . co m*/
    if(hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
    if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
        queryType = QueryType.TIME;
    } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
        queryType = QueryType.LAT;
    } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
        queryType = QueryType.LON;
    }
    }
    */

    float latTopLimit = -1;
    float latBottomLimit = -1;
    float lonTopLimit = -1;
    float lonBottomLimit = -1;

    String[] querySplitted = hiveQuery.split(" ");
    for (int i = 0; i < querySplitted.length; i++) {
        if (querySplitted[i].equals("lat") || querySplitted[i].equals("LAT")) {
            if (querySplitted[i + 1].equals(">")) {
                latBottomLimit = Float.valueOf(querySplitted[i + 2]);
            } else if (querySplitted[i + 1].equals("<")) {
                latTopLimit = Float.valueOf(querySplitted[i + 2]);
            }
        }
        if (querySplitted[i].equals("lon") || querySplitted[i].equals("LON")) {
            if (querySplitted[i + 1].equals(">")) {
                lonBottomLimit = Float.valueOf(querySplitted[i + 2]);
            } else if (querySplitted[i + 1].equals("<")) {
                lonTopLimit = Float.valueOf(querySplitted[i + 2]);
            }
        }
    }
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] " + "latTopLimit=" + latTopLimit
            + ",latBottomLimit=" + latBottomLimit + ",lonTopLimit=" + lonTopLimit + ",lonBottomLimit="
            + lonBottomLimit);

    System.out.println("[SAMANPruner] beginning of getSplits");

    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;
        String[] parts = path.getName().split("-");
        dimIndex = Integer.valueOf(parts[1]);

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);

            // First decide which which files should be considered as the base to be read
            int latTopTemp = -1;
            if (latTopLimit == -1) {
                latTopTemp = result.latLength;
            } else {
                latTopTemp = Math.min(result.latLength, (int) latTopLimit);
            }

            int latBottomTemp = -1;
            if (latBottomLimit == -1) {
                latBottomTemp = 0;
            } else {
                latBottomTemp = Math.max(0, (int) latBottomLimit);
            }

            int lonTopTemp = -1;
            if (lonTopLimit == -1) {
                lonTopTemp = result.lonLength;
            } else {
                lonTopTemp = Math.min(result.lonLength, (int) lonTopLimit);
            }

            int lonBottomTemp = -1;
            if (lonBottomLimit == -1) {
                lonBottomTemp = 0;
            } else {
                lonBottomTemp = Math.min(0, (int) lonBottomLimit);
            }

            if ((latTopTemp - latBottomTemp) * 4 * result.lonLength
                    * result.timeLength < (lonTopTemp - lonBottomTemp) * 4 * result.latLength
                            * result.timeLength) {
                chooseLat = true;
            } else {
                chooseLat = false;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] chooseLat = " + chooseLat);

            if (chooseLat) {
                if (!path.getName().contains("lat"))
                    continue;
            } else {
                if (!path.getName().contains("lon"))
                    continue;
            }

            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;

            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                if (chooseLat) {
                    if (latTopTemp < thisChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (latBottomTemp > endChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    // For the test, we would assign everything statically.
                    if (latBottomLimit > thisChunk) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + latBottomLimit);
                        split.getFileSplit().startChunk.add((long) latBottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (latTopLimit < endChunk) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + latTopLimit);
                        split.getFileSplit().endChunk.add((long) latTopLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }

                    split.getFileSplit().secondDimStartChunk.add((long) lonBottomTemp);
                    split.getFileSplit().secondDimEndChunk.add((long) lonTopTemp);
                }
                if (!chooseLat) {
                    if (lonTopTemp < thisChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (lonBottomTemp > endChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    if (lonBottomLimit > thisChunk) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + latBottomLimit);
                        split.getFileSplit().startChunk.add((long) lonBottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (lonTopLimit < endChunk) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + latTopLimit);
                        split.getFileSplit().endChunk.add((long) lonTopLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }

                    split.getFileSplit().secondDimStartChunk.add((long) latBottomTemp);
                    split.getFileSplit().secondDimEndChunk.add((long) latTopTemp);
                }

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (chooseLat) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                    + "Added to valid blocks!");

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                        + "addCreatedSplit called!");

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            System.out.println(
                    "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!");
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java

public void addSchedulingOppurtunity(Priority p, int count) {
    if (schedulingOpportunitiesToAdd == null) {
        schedulingOpportunitiesToAdd = HashMultiset.create();
    }//from   ww w .ja v  a  2 s . c  o  m
    schedulingOpportunitiesToAdd.setCount(p, count);
}

From source file:it.units.malelab.ege.distributed.master.UIRunnable.java

private void inc(String keyName, Object keyValue, JobInfo.Status status,
        Map<String, Map<Object, Multiset<JobInfo.Status>>> map) {
    Map<Object, Multiset<JobInfo.Status>> valueCounts = map.get(keyName);
    if (valueCounts == null) {
        valueCounts = new TreeMap<>();
        map.put(keyName, valueCounts);//from  w  w w  .ja v a  2 s. co m
    }
    Multiset<JobInfo.Status> statuses = valueCounts.get(keyValue);
    if (statuses == null) {
        statuses = HashMultiset.create();
        valueCounts.put(keyValue, statuses);
    }
    statuses.add(status);
}

From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java

public void addReReservation(Priority p) {
    if (reReservations == null) {
        reReservations = HashMultiset.create();
    }
    reReservations.add(p);
}

From source file:i5.las2peer.services.recommender.librec.data.FilmTrustDataDAO.java

/**
 * print out distributions of the dataset <br/>
 * //from  www .ja v  a 2s  .  c  om
 * <ul>
 * <li>#users (y) -- #ratings (x) (that are issued by each user)</li>
 * <li>#items (y) -- #ratings (x) (that received by each item)</li>
 * </ul>
 */
public void printDistr(boolean isWriteOut) throws Exception {
    if (rateMatrix == null)
        readData();

    // count how many users give the same number of ratings
    Multiset<Integer> numURates = HashMultiset.create();

    // count how many items recieve the same number of ratings
    Multiset<Integer> numIRates = HashMultiset.create();

    for (int r = 0, rm = rateMatrix.numRows; r < rm; r++) {
        int numRates = rateMatrix.rowSize(r);
        numURates.add(numRates);
    }

    for (int c = 0, cm = rateMatrix.numColumns; c < cm; c++) {
        int numRates = rateMatrix.columnSize(c);
        numIRates.add(numRates);
    }

    String ustrs = Strings.toString(numURates);
    String istrs = Strings.toString(numIRates);

    if (isWriteOut) {
        FileIO.writeString(FileIO.desktop + "user-distr.txt", ustrs);
        FileIO.writeString(FileIO.desktop + "item-distr.txt", istrs);
    } else {
        Logs.debug("#ratings (x) ~ #users (y): \n" + ustrs);
        Logs.debug("#ratings (x) ~ #items (y): \n" + istrs);
    }

    Logs.debug("Done!");

}

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFile.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
        if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
            queryType = QueryType.TIME;//from w w w.  ja  v  a  2 s  .  c  om
        } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
            queryType = QueryType.LAT;
        } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
            queryType = QueryType.LON;
        }
    }

    float topLimit = -1;
    float bottomLimit = -1;

    if (queryType != QueryType.NOLIMIT) {
        if (hiveQuery.contains("<")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf("<");
            topLimit = Float.valueOf(querySplitted[i + 1]);
        }
        if (hiveQuery.contains(">")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf(">");
            bottomLimit = Float.valueOf(querySplitted[i + 1]);
        }
    }

    //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit );
    //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit);
    /* End Analyzing Query here */

    System.out.println("[SAMANPruner] beginning of getSplits");
    LOG.info("[SAMANPruner] beginning of getSplits");
    //System.out.println( "[SAMAN] " + files.length );
    //LOG.info( "[SAMAN] " + files.length );
    // Save the number of input files in the job-conf
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    //long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
    //                       minSplitSize);

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;
        if (queryType == QueryType.TIME || queryType == QueryType.NOLIMIT) {
            if (path.getName().contains("lat") || path.getName().contains("lon"))
                continue;
        } else if (queryType == QueryType.LAT) {
            if (!path.getName().contains("lat"))
                continue;
        } else if (queryType == QueryType.LON) {
            if (!path.getName().contains("lon"))
                continue;
        }
        if (queryType == QueryType.TIME) {
            String[] parts = path.getName().split("-");
            fileIndex = Integer.valueOf(parts[1]);
        } else if (queryType == QueryType.LAT || queryType == QueryType.LON) {
            if (path.getName().contains("_")) {
                String[] parts = path.getName().split("_");
                fileIndex = Integer.valueOf(parts[2]);
                dimIndex = Integer.valueOf(parts[0].substring(7));
            } else {
                //dimIndex = Integer.valueOf(path.getName().substring(7));
                String[] parts = path.getName().split("-");
                dimIndex = Integer.valueOf(parts[1]);
            }
        }

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);
            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;
            long numChunksPerKey = 0;
            if (queryType == QueryType.LAT) {
                long chunkSize = netInfo.timeLength * netInfo.lonLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            } else if (queryType == QueryType.LON) {
                long chunkSize = netInfo.timeLength * netInfo.latLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey);

            //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                    //bytesRemaining should be 0 after this round
                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                if (queryType == QueryType.TIME) {
                    if ((topLimit < thisChunk + (fileIndex * netInfo.timeLength)) && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if ((bottomLimit > endChunk + (fileIndex * netInfo.timeLength)) && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    System.out.println(
                            "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set");
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!");

                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                } else if (queryType == QueryType.LAT || queryType == QueryType.LON) {
                    //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = "
                    //        + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex );
                    /*
                    if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }*/
                    if (topLimit < thisChunk && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (bottomLimit > endChunk && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    /*
                    if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    */
                    //split.getNetCDFFileSplit().endChunk = (long)topLimit;
                    /*
                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                    */
                    // Put the block into the block to node set
                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    // For the test, we would assign everything statically.
                    if (bottomLimit > thisChunk && (bottomLimit != -1)) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + bottomLimit);
                        split.getFileSplit().startChunk.add((long) bottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (topLimit < endChunk && (topLimit != -1)) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + endChunk);
                        split.getFileSplit().endChunk.add((long) topLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }
                } else {
                    if ((topLimit < thisChunk) && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                }

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (queryType == QueryType.LAT) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.LON) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.TIME) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                    + "Added to valid blocks!");

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                        + "addCreatedSplit called!");

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            System.out.println(
                    "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!");
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:BibTex.IOmethods.java

public void writeJournalsPerCategories(Set<BibTexRef> refs) throws IOException {
    JournalAbbreviationsMapping jmap = new JournalAbbreviationsMapping();
    jmap.loadMap();/*  w  ww .java2  s . c om*/

    BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "journals per categories.csv"));

    StringBuilder sb = new StringBuilder();
    String sep = "|";

    //creation of 2 convenient data structures for I/O
    Map<String, Multiset<String>> categoriesToJournals = new TreeMap();
    List<String> categoryNames = new ArrayList();

    for (BibTexRef ref : refs) {
        Set<Category> categories = ref.getCategories();

        String title = ref.getJournal();
        if (title == null || title.isEmpty()) {
            continue;
        }
        title = title.toLowerCase();

        Set<String> abbrev = (Set<String>) jmap.getJournalsToAbbrev().get(title);
        if (abbrev == null || abbrev.isEmpty()) {
            abbrev = new HashSet();
            abbrev.add(title);
        }

        String abbreviation = abbrev.iterator().next();

        for (Category category : categories) {
            if (!categoryNames.contains(category.getCategoryName())) {
                categoryNames.add(category.getCategoryName());
            }
            if (categoriesToJournals.containsKey(category.getCategoryName())) {
                categoriesToJournals.get(category.getCategoryName()).add(abbreviation);
            } else {
                Multiset<String> journalsForOneCategory = HashMultiset.create();
                journalsForOneCategory.add(abbreviation);

                categoriesToJournals.put(category.getCategoryName(), journalsForOneCategory);
            }
        }

    }
    Collections.sort(categoryNames);

    //writing of the first line of the csv: headers of the categories.
    for (String categoryName : categoryNames) {
        sb.append(categoryName);
        sb.append(sep);
    }
    sb.append("\n");

    //writing of all subsequent lines: one per year
    int countCategoriesdone = 0;
    boolean continueLoop = true;
    while (continueLoop) {

        for (Iterator<String> it = categoriesToJournals.keySet().iterator(); it.hasNext();) {
            String category = it.next();
            Multiset<String> journalsForOneCategory = categoriesToJournals.get(category);

            Iterator<String> journalsIterator = Multisets.copyHighestCountFirst(journalsForOneCategory)
                    .elementSet().iterator();
            if (journalsIterator.hasNext()) {
                String journal = journalsIterator.next();
                sb.append(journal).append(" (").append(journalsForOneCategory.count(journal)).append(")")
                        .append(sep);
                journalsForOneCategory.remove(journal, journalsForOneCategory.count(journal));
            } else {
                sb.append(sep);
            }
        }
        sb.append("\n");

        for (String cat : categoriesToJournals.keySet()) {
            if (categoriesToJournals.get(cat).isEmpty()) {
                countCategoriesdone++;
            }
        }
        if (countCategoriesdone == categoryNames.size()) {
            continueLoop = false;
        } else {
            countCategoriesdone = 0;
        }

    }

    bw.write(sb.toString());
    bw.close();

}

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPartToMemoryMultiSplit.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
        if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
            queryType = QueryType.TIME;/* w  ww . ja  v  a2 s.  com*/
        } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
            queryType = QueryType.LAT;
        } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
            queryType = QueryType.LON;
        }
    }

    float topLimit = -1;
    float bottomLimit = -1;

    if (queryType != QueryType.NOLIMIT) {
        if (hiveQuery.contains("<")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf("<");
            topLimit = Float.valueOf(querySplitted[i + 1]);
        }
        if (hiveQuery.contains(">")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf(">");
            bottomLimit = Float.valueOf(querySplitted[i + 1]);
        }
    }

    //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit );
    //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit);
    /* End Analyzing Query here */

    System.out.println("[SAMANPruner] beginning of getSplits");
    LOG.info("[SAMANPruner] beginning of getSplits");
    //System.out.println( "[SAMAN] " + files.length );
    //LOG.info( "[SAMAN] " + files.length );
    // Save the number of input files in the job-conf
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    //long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
    //                       minSplitSize);

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);
            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;
            long numChunksPerKey = 0;
            if (queryType == QueryType.LAT) {
                long chunkSize = netInfo.timeLength * netInfo.lonLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            } else if (queryType == QueryType.LON) {
                long chunkSize = netInfo.timeLength * netInfo.latLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey);

            //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                    //bytesRemaining should be 0 after this round
                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);

                if (queryType == QueryType.TIME) {
                    split.getFileSplit().timeStartLimit.add((long) bottomLimit);
                    split.getFileSplit().timeEndLimit.add((long) topLimit);
                    split.getFileSplit().latStartLimit.add((long) -1);
                    split.getFileSplit().latEndLimit.add((long) -1);
                    split.getFileSplit().lonStartLimit.add((long) -1);
                    split.getFileSplit().lonEndLimit.add((long) -1);
                } else if (queryType == QueryType.LAT) {
                    split.getFileSplit().timeStartLimit.add((long) -1);
                    split.getFileSplit().timeEndLimit.add((long) -1);
                    split.getFileSplit().latStartLimit.add((long) bottomLimit);
                    split.getFileSplit().latEndLimit.add((long) topLimit);
                    split.getFileSplit().lonStartLimit.add((long) -1);
                    split.getFileSplit().lonEndLimit.add((long) -1);
                } else if (queryType == QueryType.LON) {
                    split.getFileSplit().timeStartLimit.add((long) -1);
                    split.getFileSplit().timeEndLimit.add((long) -1);
                    split.getFileSplit().latStartLimit.add((long) -1);
                    split.getFileSplit().latEndLimit.add((long) -1);
                    split.getFileSplit().lonStartLimit.add((long) bottomLimit);
                    split.getFileSplit().lonEndLimit.add((long) topLimit);
                }

                blockToNodes.put(split, splitHosts);

                System.out.println(
                        "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set");
                for (int i = 0; i < splitHosts.length; i++) {
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if (splitList == null) {
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put(splitHosts[i], splitList);
                    }
                    splitList.add(split);
                }

                /*
                if( queryType == QueryType.TIME ) {
                if ((topLimit < thisChunk + (fileIndex*netInfo.timeLength)) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk + (fileIndex*netInfo.timeLength)) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set" );
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!" );
                        
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                } else if( queryType == QueryType.LAT || queryType == QueryType.LON ){
                //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = "
                //        + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex );
                */
                /*
                if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }*/
                /*
                if (topLimit < thisChunk && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if (bottomLimit > endChunk && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                */
                /*
                if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                */
                //split.getNetCDFFileSplit().endChunk = (long)topLimit;
                /*
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                */
                // Put the block into the block to node set
                /*
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                // For the test, we would assign everything statically.
                if( bottomLimit > thisChunk && (bottomLimit != -1) ){
                    System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                            + bottomLimit );
                    split.getFileSplit().startChunk.add((long)bottomLimit);
                }else{
                    split.getFileSplit().startChunk.add(thisChunk);
                }
                if( topLimit < endChunk && (topLimit != -1) ){
                    System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                            + endChunk );
                    split.getFileSplit().endChunk.add((long)topLimit);
                }else{
                    split.getFileSplit().endChunk.add(endChunk);
                }
                } else {
                if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                }
                */

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (queryType == QueryType.LAT) {
                curSplitSize += (oneblock.getFileSplit().latEndLimit.get(0)
                        - oneblock.getFileSplit().latStartLimit.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.LON) {
                curSplitSize += (oneblock.getFileSplit().lonEndLimit.get(0)
                        - oneblock.getFileSplit().lonStartLimit.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.TIME) {
                curSplitSize += (oneblock.getFileSplit().timeEndLimit.get(0)
                        - oneblock.getFileSplit().timeStartLimit.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " +
            //        "Added to valid blocks!" );

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " +
                //        "addCreatedSplit called!" );

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!" );
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:org.datanucleus.store.types.guava.wrappers.backed.Multiset.java

/**
 * The writeReplace method is called when ObjectOutputStream is preparing
 * to write the object to the stream. The ObjectOutputStream checks
 * whether the class defines the writeReplace method. If the method is
 * defined, the writeReplace method is called to allow the object to
 * designate its replacement in the stream. The object returned should be
 * either of the same type as the object passed in or an object that when
 * read and resolved will result in an object of a type that is compatible
 * with all references to the object.// ww w . jav  a2  s . c  o m
 * 
 * @return the replaced object
 * @throws ObjectStreamException if an error occurs
 */
protected Object writeReplace() throws ObjectStreamException {
    if (useCache) {
        loadFromStore();
        HashMultiset multi = HashMultiset.create();
        multi.addAll(delegate);
        return multi;
    }

    // TODO Cater for non-cached collection, load elements in a DB call.
    HashMultiset multi = HashMultiset.create();
    multi.addAll(delegate);
    return multi;
}

From source file:org.apache.twill.internal.appmaster.ApplicationMasterService.java

/**
 * Handling containers that are completed.
 *///from  w  w  w. j a  v  a 2  s  .  c o m
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
    Multiset<String> restartRunnables = HashMultiset.create();
    for (YarnContainerStatus status : completedContainersStatuses) {
        LOG.info("Container {} completed with {}:{}.", status.getContainerId(), status.getState(),
                status.getDiagnostics());
        runningContainers.handleCompleted(status, restartRunnables);
    }

    for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
        LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
        runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(), entry.getCount()));
    }

    // For all runnables that needs to re-request for containers, update the expected count timestamp
    // so that the EventHandler would triggered with the right expiration timestamp.
    expectedContainers.updateRequestTime(restartRunnables.elementSet());
}