Example usage for com.google.common.collect HashMultiset create

List of usage examples for com.google.common.collect HashMultiset create

Introduction

In this page you can find the example usage for com.google.common.collect HashMultiset create.

Prototype

public static <E> HashMultiset<E> create() 

Source Link

Document

Creates a new, empty HashMultiset using the default initial capacity.

Usage

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFileTwoDimensions.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    /*/*from  www .j a v a  2  s  . co m*/
    if(hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
    if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
        queryType = QueryType.TIME;
    } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
        queryType = QueryType.LAT;
    } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
        queryType = QueryType.LON;
    }
    }
    */

    float latTopLimit = -1;
    float latBottomLimit = -1;
    float lonTopLimit = -1;
    float lonBottomLimit = -1;

    String[] querySplitted = hiveQuery.split(" ");
    for (int i = 0; i < querySplitted.length; i++) {
        if (querySplitted[i].equals("lat") || querySplitted[i].equals("LAT")) {
            if (querySplitted[i + 1].equals(">")) {
                latBottomLimit = Float.valueOf(querySplitted[i + 2]);
            } else if (querySplitted[i + 1].equals("<")) {
                latTopLimit = Float.valueOf(querySplitted[i + 2]);
            }
        }
        if (querySplitted[i].equals("lon") || querySplitted[i].equals("LON")) {
            if (querySplitted[i + 1].equals(">")) {
                lonBottomLimit = Float.valueOf(querySplitted[i + 2]);
            } else if (querySplitted[i + 1].equals("<")) {
                lonTopLimit = Float.valueOf(querySplitted[i + 2]);
            }
        }
    }
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] " + "latTopLimit=" + latTopLimit
            + ",latBottomLimit=" + latBottomLimit + ",lonTopLimit=" + lonTopLimit + ",lonBottomLimit="
            + lonBottomLimit);

    System.out.println("[SAMANPruner] beginning of getSplits");

    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;
        String[] parts = path.getName().split("-");
        dimIndex = Integer.valueOf(parts[1]);

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);

            // First decide which which files should be considered as the base to be read
            int latTopTemp = -1;
            if (latTopLimit == -1) {
                latTopTemp = result.latLength;
            } else {
                latTopTemp = Math.min(result.latLength, (int) latTopLimit);
            }

            int latBottomTemp = -1;
            if (latBottomLimit == -1) {
                latBottomTemp = 0;
            } else {
                latBottomTemp = Math.max(0, (int) latBottomLimit);
            }

            int lonTopTemp = -1;
            if (lonTopLimit == -1) {
                lonTopTemp = result.lonLength;
            } else {
                lonTopTemp = Math.min(result.lonLength, (int) lonTopLimit);
            }

            int lonBottomTemp = -1;
            if (lonBottomLimit == -1) {
                lonBottomTemp = 0;
            } else {
                lonBottomTemp = Math.min(0, (int) lonBottomLimit);
            }

            if ((latTopTemp - latBottomTemp) * 4 * result.lonLength
                    * result.timeLength < (lonTopTemp - lonBottomTemp) * 4 * result.latLength
                            * result.timeLength) {
                chooseLat = true;
            } else {
                chooseLat = false;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] chooseLat = " + chooseLat);

            if (chooseLat) {
                if (!path.getName().contains("lat"))
                    continue;
            } else {
                if (!path.getName().contains("lon"))
                    continue;
            }

            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;

            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                if (chooseLat) {
                    if (latTopTemp < thisChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (latBottomTemp > endChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    // For the test, we would assign everything statically.
                    if (latBottomLimit > thisChunk) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + latBottomLimit);
                        split.getFileSplit().startChunk.add((long) latBottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (latTopLimit < endChunk) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + latTopLimit);
                        split.getFileSplit().endChunk.add((long) latTopLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }

                    split.getFileSplit().secondDimStartChunk.add((long) lonBottomTemp);
                    split.getFileSplit().secondDimEndChunk.add((long) lonTopTemp);
                }
                if (!chooseLat) {
                    if (lonTopTemp < thisChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (lonBottomTemp > endChunk) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    if (lonBottomLimit > thisChunk) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + latBottomLimit);
                        split.getFileSplit().startChunk.add((long) lonBottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (lonTopLimit < endChunk) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + latTopLimit);
                        split.getFileSplit().endChunk.add((long) lonTopLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }

                    split.getFileSplit().secondDimStartChunk.add((long) latBottomTemp);
                    split.getFileSplit().secondDimEndChunk.add((long) latTopTemp);
                }

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (chooseLat) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                    + "Added to valid blocks!");

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                        + "addCreatedSplit called!");

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            System.out.println(
                    "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!");
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java

public void addSchedulingOppurtunity(Priority p, int count) {
    if (schedulingOpportunitiesToAdd == null) {
        schedulingOpportunitiesToAdd = HashMultiset.create();
    }//from   ww w .ja v  a  2 s . c  o  m
    schedulingOpportunitiesToAdd.setCount(p, count);
}

From source file:it.units.malelab.ege.distributed.master.UIRunnable.java

private void inc(String keyName, Object keyValue, JobInfo.Status status,
        Map<String, Map<Object, Multiset<JobInfo.Status>>> map) {
    Map<Object, Multiset<JobInfo.Status>> valueCounts = map.get(keyName);
    if (valueCounts == null) {
        valueCounts = new TreeMap<>();
        map.put(keyName, valueCounts);//from  w  w w  .ja v a  2 s. co m
    }
    Multiset<JobInfo.Status> statuses = valueCounts.get(keyValue);
    if (statuses == null) {
        statuses = HashMultiset.create();
        valueCounts.put(keyValue, statuses);
    }
    statuses.add(status);
}

From source file:io.hops.ha.common.FiCaSchedulerAppInfo.java

public void addReReservation(Priority p) {
    if (reReservations == null) {
        reReservations = HashMultiset.create();
    }
    reReservations.add(p);
}

From source file:i5.las2peer.services.recommender.librec.data.FilmTrustDataDAO.java

/**
 * print out distributions of the dataset <br/>
 * //from  www .ja v  a 2s  .  c  om
 * <ul>
 * <li>#users (y) -- #ratings (x) (that are issued by each user)</li>
 * <li>#items (y) -- #ratings (x) (that received by each item)</li>
 * </ul>
 */
public void printDistr(boolean isWriteOut) throws Exception {
    if (rateMatrix == null)
        readData();

    // count how many users give the same number of ratings
    Multiset<Integer> numURates = HashMultiset.create();

    // count how many items recieve the same number of ratings
    Multiset<Integer> numIRates = HashMultiset.create();

    for (int r = 0, rm = rateMatrix.numRows; r < rm; r++) {
        int numRates = rateMatrix.rowSize(r);
        numURates.add(numRates);
    }

    for (int c = 0, cm = rateMatrix.numColumns; c < cm; c++) {
        int numRates = rateMatrix.columnSize(c);
        numIRates.add(numRates);
    }

    String ustrs = Strings.toString(numURates);
    String istrs = Strings.toString(numIRates);

    if (isWriteOut) {
        FileIO.writeString(FileIO.desktop + "user-distr.txt", ustrs);
        FileIO.writeString(FileIO.desktop + "item-distr.txt", istrs);
    } else {
        Logs.debug("#ratings (x) ~ #users (y): \n" + ustrs);
        Logs.debug("#ratings (x) ~ #items (y): \n" + istrs);
    }

    Logs.debug("Done!");

}

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPrunerByFileIndexMultiFile.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
        if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
            queryType = QueryType.TIME;//from w w w.  ja  v  a  2 s  .  c  om
        } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
            queryType = QueryType.LAT;
        } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
            queryType = QueryType.LON;
        }
    }

    float topLimit = -1;
    float bottomLimit = -1;

    if (queryType != QueryType.NOLIMIT) {
        if (hiveQuery.contains("<")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf("<");
            topLimit = Float.valueOf(querySplitted[i + 1]);
        }
        if (hiveQuery.contains(">")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf(">");
            bottomLimit = Float.valueOf(querySplitted[i + 1]);
        }
    }

    //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit );
    //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit);
    /* End Analyzing Query here */

    System.out.println("[SAMANPruner] beginning of getSplits");
    LOG.info("[SAMANPruner] beginning of getSplits");
    //System.out.println( "[SAMAN] " + files.length );
    //LOG.info( "[SAMAN] " + files.length );
    // Save the number of input files in the job-conf
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    //long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
    //                       minSplitSize);

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;
        if (queryType == QueryType.TIME || queryType == QueryType.NOLIMIT) {
            if (path.getName().contains("lat") || path.getName().contains("lon"))
                continue;
        } else if (queryType == QueryType.LAT) {
            if (!path.getName().contains("lat"))
                continue;
        } else if (queryType == QueryType.LON) {
            if (!path.getName().contains("lon"))
                continue;
        }
        if (queryType == QueryType.TIME) {
            String[] parts = path.getName().split("-");
            fileIndex = Integer.valueOf(parts[1]);
        } else if (queryType == QueryType.LAT || queryType == QueryType.LON) {
            if (path.getName().contains("_")) {
                String[] parts = path.getName().split("_");
                fileIndex = Integer.valueOf(parts[2]);
                dimIndex = Integer.valueOf(parts[0].substring(7));
            } else {
                //dimIndex = Integer.valueOf(path.getName().substring(7));
                String[] parts = path.getName().split("-");
                dimIndex = Integer.valueOf(parts[1]);
            }
        }

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);
            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;
            long numChunksPerKey = 0;
            if (queryType == QueryType.LAT) {
                long chunkSize = netInfo.timeLength * netInfo.lonLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            } else if (queryType == QueryType.LON) {
                long chunkSize = netInfo.timeLength * netInfo.latLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey);

            //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                    //bytesRemaining should be 0 after this round
                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                if (queryType == QueryType.TIME) {
                    if ((topLimit < thisChunk + (fileIndex * netInfo.timeLength)) && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if ((bottomLimit > endChunk + (fileIndex * netInfo.timeLength)) && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    System.out.println(
                            "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set");
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!");

                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                } else if (queryType == QueryType.LAT || queryType == QueryType.LON) {
                    //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = "
                    //        + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex );
                    /*
                    if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }*/
                    if (topLimit < thisChunk && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if (bottomLimit > endChunk && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    /*
                    if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                    }
                    */
                    //split.getNetCDFFileSplit().endChunk = (long)topLimit;
                    /*
                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                    */
                    // Put the block into the block to node set
                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    // For the test, we would assign everything statically.
                    if (bottomLimit > thisChunk && (bottomLimit != -1)) {
                        System.out
                                .println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                                        + bottomLimit);
                        split.getFileSplit().startChunk.add((long) bottomLimit);
                    } else {
                        split.getFileSplit().startChunk.add(thisChunk);
                    }
                    if (topLimit < endChunk && (topLimit != -1)) {
                        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                                + endChunk);
                        split.getFileSplit().endChunk.add((long) topLimit);
                    } else {
                        split.getFileSplit().endChunk.add(endChunk);
                    }
                } else {
                    if ((topLimit < thisChunk) && (topLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }
                    if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                        bytesRemaining -= splitSize;
                        thisChunk = endChunk;
                        continue;
                    }

                    blockToNodes.put(split, splitHosts);

                    // Put the nodes with the specified split into the node to block set
                    for (int i = 0; i < splitHosts.length; i++) {
                        Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                        if (splitList == null) {
                            splitList = new LinkedHashSet<NetCDFFileSplit>();
                            nodeToBlocks.put(splitHosts[i], splitList);
                        }
                        splitList.add(split);
                    }

                    split.getFileSplit().startChunk.add(thisChunk);
                    split.getFileSplit().endChunk.add(endChunk);
                }

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (queryType == QueryType.LAT) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.LON) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.TIME) {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                    + "Added to valid blocks!");

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] "
                        + "addCreatedSplit called!");

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            System.out.println(
                    "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!");
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:BibTex.IOmethods.java

public void writeJournalsPerCategories(Set<BibTexRef> refs) throws IOException {
    JournalAbbreviationsMapping jmap = new JournalAbbreviationsMapping();
    jmap.loadMap();/*  w  ww .java2  s . c om*/

    BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "journals per categories.csv"));

    StringBuilder sb = new StringBuilder();
    String sep = "|";

    //creation of 2 convenient data structures for I/O
    Map<String, Multiset<String>> categoriesToJournals = new TreeMap();
    List<String> categoryNames = new ArrayList();

    for (BibTexRef ref : refs) {
        Set<Category> categories = ref.getCategories();

        String title = ref.getJournal();
        if (title == null || title.isEmpty()) {
            continue;
        }
        title = title.toLowerCase();

        Set<String> abbrev = (Set<String>) jmap.getJournalsToAbbrev().get(title);
        if (abbrev == null || abbrev.isEmpty()) {
            abbrev = new HashSet();
            abbrev.add(title);
        }

        String abbreviation = abbrev.iterator().next();

        for (Category category : categories) {
            if (!categoryNames.contains(category.getCategoryName())) {
                categoryNames.add(category.getCategoryName());
            }
            if (categoriesToJournals.containsKey(category.getCategoryName())) {
                categoriesToJournals.get(category.getCategoryName()).add(abbreviation);
            } else {
                Multiset<String> journalsForOneCategory = HashMultiset.create();
                journalsForOneCategory.add(abbreviation);

                categoriesToJournals.put(category.getCategoryName(), journalsForOneCategory);
            }
        }

    }
    Collections.sort(categoryNames);

    //writing of the first line of the csv: headers of the categories.
    for (String categoryName : categoryNames) {
        sb.append(categoryName);
        sb.append(sep);
    }
    sb.append("\n");

    //writing of all subsequent lines: one per year
    int countCategoriesdone = 0;
    boolean continueLoop = true;
    while (continueLoop) {

        for (Iterator<String> it = categoriesToJournals.keySet().iterator(); it.hasNext();) {
            String category = it.next();
            Multiset<String> journalsForOneCategory = categoriesToJournals.get(category);

            Iterator<String> journalsIterator = Multisets.copyHighestCountFirst(journalsForOneCategory)
                    .elementSet().iterator();
            if (journalsIterator.hasNext()) {
                String journal = journalsIterator.next();
                sb.append(journal).append(" (").append(journalsForOneCategory.count(journal)).append(")")
                        .append(sep);
                journalsForOneCategory.remove(journal, journalsForOneCategory.count(journal));
            } else {
                sb.append(sep);
            }
        }
        sb.append("\n");

        for (String cat : categoriesToJournals.keySet()) {
            if (categoriesToJournals.get(cat).isEmpty()) {
                countCategoriesdone++;
            }
        }
        if (countCategoriesdone == categoryNames.size()) {
            continueLoop = false;
        } else {
            countCategoriesdone = 0;
        }

    }

    bw.write(sb.toString());
    bw.close();

}

From source file:org.apache.hadoop.mapred.NetCDFInputFormatPartToMemoryMultiSplit.java

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    FileStatus[] files = listStatus(job);

    LOG.info("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));
    System.out.println("[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] hive query is: "
            + job.get(HIVE_QUERY, "Kossher"));

    /* Analyzing Query here */
    String hiveQuery = job.get(HIVE_QUERY, "Kossher");
    QueryType queryType = QueryType.NOLIMIT; // default mode
    if (hiveQuery.contains("where") || hiveQuery.contains("WHERE")) {
        if (hiveQuery.contains("time") || hiveQuery.contains("TIME")) {
            queryType = QueryType.TIME;/* w  ww . ja  v  a2 s.  com*/
        } else if (hiveQuery.contains("lat") || hiveQuery.contains("LAT")) {
            queryType = QueryType.LAT;
        } else if (hiveQuery.contains("lon") || hiveQuery.contains("LON")) {
            queryType = QueryType.LON;
        }
    }

    float topLimit = -1;
    float bottomLimit = -1;

    if (queryType != QueryType.NOLIMIT) {
        if (hiveQuery.contains("<")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf("<");
            topLimit = Float.valueOf(querySplitted[i + 1]);
        }
        if (hiveQuery.contains(">")) {
            String[] querySplitted = hiveQuery.split(" ");
            int i = Arrays.asList(querySplitted).indexOf(">");
            bottomLimit = Float.valueOf(querySplitted[i + 1]);
        }
    }

    //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        +", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit );
    //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex] QueryType = " + queryType.toString()
    //        + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit);
    /* End Analyzing Query here */

    System.out.println("[SAMANPruner] beginning of getSplits");
    LOG.info("[SAMANPruner] beginning of getSplits");
    //System.out.println( "[SAMAN] " + files.length );
    //LOG.info( "[SAMAN] " + files.length );
    // Save the number of input files in the job-conf
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    //long minSize = Math.max(job.getLong("mapred.min.split.size", 1),
    //                       minSplitSize);

    // generate splits
    ArrayList<NetCDFFileSplit> splits = new ArrayList<NetCDFFileSplit>(numSplits);
    ArrayList<NetCDFFileSplit> finalSplits = new ArrayList<NetCDFFileSplit>();
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        int fileIndex = 0;
        int dimIndex = 0;

        //LOG.info("[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] File name is : " + path.getName());
        System.out.println(
                "[SAMAN][NetCDFInputFormatPartToMemoryMultiSplit][getSplits] File name is : " + path.getName());
        FileSystem fs = path.getFileSystem(job);
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();
            netInfo = getNetCDFInfo(path, fs, job);
            long recStart = netInfo.recStart;
            long[] chunkStarts = netInfo.chunkStarts;
            long smallSize = netInfo.smallRecSize;
            long recSize = netInfo.recSize;
            long splitSize = 0;
            int chunkIndex = 0;
            long bytesRemaining = chunkStarts[chunkStarts.length - 1] + recSize - recStart - 2 * smallSize;
            long thisStart = recStart; // file position
            long thisChunk = 0;
            long blockNo = 1;
            long numChunksPerKey = 0;
            if (queryType == QueryType.LAT) {
                long chunkSize = netInfo.timeLength * netInfo.lonLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            } else if (queryType == QueryType.LON) {
                long chunkSize = netInfo.timeLength * netInfo.latLength * 4;
                numChunksPerKey = blockSize / chunkSize;
            }

            System.out.println("[SAMAN][NetCDFInputFormat][getSplits] numChunksPerKey = " + numChunksPerKey);

            //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => recStart = " + recStart + ", chunkStarts = " + chunkStarts +
            //        ", smallSize = " + smallSize + ", recSize = " + recSize + ", bytesRemaining = " + bytesRemaining +
            //        ", thisStart = " + thisStart);
            while (bytesRemaining > 0) {
                while (chunkIndex < chunkStarts.length && chunkStarts[chunkIndex] < blockNo * blockSize) {
                    chunkIndex++;
                }
                long tempStart = thisStart;
                long endChunk;
                if (chunkIndex >= chunkStarts.length) {
                    splitSize = chunkStarts[chunkStarts.length - 1] + recSize - thisStart - smallSize;

                    //bytesRemaining should be 0 after this round
                } else {
                    splitSize = chunkStarts[chunkIndex] - thisStart - smallSize;
                    thisStart = chunkStarts[chunkIndex];
                }
                endChunk = chunkIndex;
                blockNo++;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize="+splitSize+", thisStart="+thisStart+
                //        ", endChunk="+endChunk+", blockNo="+blockNo);
                System.out.println("[SAMAN] NetCDFInputFormatPruner.getSplits => splitSize=" + splitSize
                        + ", thisStart=" + thisStart + ", endChunk=" + endChunk + ", blockNo=" + blockNo);
                String[] splitHosts = getSplitHosts(blkLocations, tempStart, splitSize, clusterMap);
                NetCDFFileSplit split = new NetCDFFileSplit(path, tempStart, splitSize, splitHosts);

                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);

                if (queryType == QueryType.TIME) {
                    split.getFileSplit().timeStartLimit.add((long) bottomLimit);
                    split.getFileSplit().timeEndLimit.add((long) topLimit);
                    split.getFileSplit().latStartLimit.add((long) -1);
                    split.getFileSplit().latEndLimit.add((long) -1);
                    split.getFileSplit().lonStartLimit.add((long) -1);
                    split.getFileSplit().lonEndLimit.add((long) -1);
                } else if (queryType == QueryType.LAT) {
                    split.getFileSplit().timeStartLimit.add((long) -1);
                    split.getFileSplit().timeEndLimit.add((long) -1);
                    split.getFileSplit().latStartLimit.add((long) bottomLimit);
                    split.getFileSplit().latEndLimit.add((long) topLimit);
                    split.getFileSplit().lonStartLimit.add((long) -1);
                    split.getFileSplit().lonEndLimit.add((long) -1);
                } else if (queryType == QueryType.LON) {
                    split.getFileSplit().timeStartLimit.add((long) -1);
                    split.getFileSplit().timeEndLimit.add((long) -1);
                    split.getFileSplit().latStartLimit.add((long) -1);
                    split.getFileSplit().latEndLimit.add((long) -1);
                    split.getFileSplit().lonStartLimit.add((long) bottomLimit);
                    split.getFileSplit().lonEndLimit.add((long) topLimit);
                }

                blockToNodes.put(split, splitHosts);

                System.out.println(
                        "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set");
                for (int i = 0; i < splitHosts.length; i++) {
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if (splitList == null) {
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put(splitHosts[i], splitList);
                    }
                    splitList.add(split);
                }

                /*
                if( queryType == QueryType.TIME ) {
                if ((topLimit < thisChunk + (fileIndex*netInfo.timeLength)) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk + (fileIndex*netInfo.timeLength)) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] Put the nodes with the specified split into the node to block set" );
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                System.out.println("[SAMAN][NetCDFInputFormat][getSplits] set start and end!" );
                        
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                } else if( queryType == QueryType.LAT || queryType == QueryType.LON ){
                //System.out.println( "[SAMAN][NetCDFInputFormat][getSplits] file = "
                //        + path.getName() + ", topLimit = " + topLimit + ", bottomLimit = " + bottomLimit + ", dimIndex = " + dimIndex );
                */
                /*
                if( topLimit < dimIndex*numChunksPerKey && (topLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if( bottomLimit > dimIndex*numChunksPerKey && (bottomLimit != -1) ){
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }*/
                /*
                if (topLimit < thisChunk && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if (bottomLimit > endChunk && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                */
                /*
                if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                */
                //split.getNetCDFFileSplit().endChunk = (long)topLimit;
                /*
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                */
                // Put the block into the block to node set
                /*
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                // For the test, we would assign everything statically.
                if( bottomLimit > thisChunk && (bottomLimit != -1) ){
                    System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] startChunk = "
                            + bottomLimit );
                    split.getFileSplit().startChunk.add((long)bottomLimit);
                }else{
                    split.getFileSplit().startChunk.add(thisChunk);
                }
                if( topLimit < endChunk && (topLimit != -1) ){
                    System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndex][getSplits] endChunk = "
                            + endChunk );
                    split.getFileSplit().endChunk.add((long)topLimit);
                }else{
                    split.getFileSplit().endChunk.add(endChunk);
                }
                } else {
                if ((topLimit < thisChunk) && (topLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                if ((bottomLimit > endChunk) && (bottomLimit != -1)) {
                    bytesRemaining -= splitSize;
                    thisChunk = endChunk;
                    continue;
                }
                        
                blockToNodes.put( split, splitHosts );
                        
                // Put the nodes with the specified split into the node to block set
                for( int i = 0; i < splitHosts.length; i++ ){
                    Set<NetCDFFileSplit> splitList = nodeToBlocks.get(splitHosts[i]);
                    if( splitList == null ){
                        splitList = new LinkedHashSet<NetCDFFileSplit>();
                        nodeToBlocks.put( splitHosts[i], splitList );
                    }
                    splitList.add( split );
                }
                        
                split.getFileSplit().startChunk.add(thisChunk);
                split.getFileSplit().endChunk.add(endChunk);
                }
                */

                splits.add(split);

                bytesRemaining -= splitSize;
                thisChunk = endChunk;
                //LOG.info( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
                //System.out.println( "[SAMAN] NetCDFInputFormatPruner.getSplits => bytesRemaining="+bytesRemaining+", thisChunk="+thisChunk );
            }

        } else if (length != 0) {
            String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap);
            //splits.add(new FileSplit(path, 0, length, splitHosts));
        } else {
            //Create empty hosts array for zero length files
            //splits.add(new FileSplit(path, 0, length, new String[0]));
        }
    }

    // Now it's time to merge non-complete splits.
    // Check if each split has enough space to include another split too

    Set<String> completedNodes = new HashSet<String>();
    ArrayList<NetCDFFileSplit> validBlocks = new ArrayList<NetCDFFileSplit>();
    long curSplitSize = 0;
    Multiset<String> splitsPerNode = HashMultiset.create();

    for (Iterator<Map.Entry<String, Set<NetCDFFileSplit>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {
        Map.Entry<String, Set<NetCDFFileSplit>> one = iter.next();
        String node = one.getKey();

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] node is = " + node);

        // Skip the node if it has previously been marked as completed.
        if (completedNodes.contains(node)) {
            continue;
        }

        Set<NetCDFFileSplit> blocksInCurrentNode = one.getValue();

        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        Iterator<NetCDFFileSplit> oneBlockIter = blocksInCurrentNode.iterator();
        while (oneBlockIter.hasNext()) {
            NetCDFFileSplit oneblock = oneBlockIter.next();

            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "split is: "
                    + oneblock.getFileSplit().getPath());

            // Remove all blocks which may already have been assigned to other
            // splits.
            if (!blockToNodes.containsKey(oneblock)) {
                oneBlockIter.remove();
                continue;
            }

            validBlocks.add(oneblock);
            if (queryType == QueryType.LAT) {
                curSplitSize += (oneblock.getFileSplit().latEndLimit.get(0)
                        - oneblock.getFileSplit().latStartLimit.get(0)) * 4 * netInfo.lonLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.LON) {
                curSplitSize += (oneblock.getFileSplit().lonEndLimit.get(0)
                        - oneblock.getFileSplit().lonStartLimit.get(0)) * 4 * netInfo.latLength
                        * netInfo.timeLength;
            } else if (queryType == QueryType.TIME) {
                curSplitSize += (oneblock.getFileSplit().timeEndLimit.get(0)
                        - oneblock.getFileSplit().timeStartLimit.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            } else {
                curSplitSize += (oneblock.getFileSplit().endChunk.get(0)
                        - oneblock.getFileSplit().startChunk.get(0)) * 4 * netInfo.latLength
                        * netInfo.lonLength;
            }
            blockToNodes.remove(oneblock);
            System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] curSplitSize = "
                    + curSplitSize);

            //curSplitSize += singleSplitSize;

            //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " +
            //        "Added to valid blocks!" );

            // if the accumulated split size exceeds the maximum, then
            // create this split.
            if (blockSize != 0 && curSplitSize >= blockSize) {
                // create an input split and add it to the splits array
                addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
                //totalLength -= curSplitSize;

                //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " +
                //        "addCreatedSplit called!" );

                curSplitSize = 0;
                splitsPerNode.add(node);

                // Remove entries from blocksInNode so that we don't walk these
                // again.
                //blocksInCurrentNode.removeAll(validBlocks);
                validBlocks.clear();

                // Done creating a single split for this node. Move on to the next
                // node so that splits are distributed across nodes.
                //break;
            }

        }
        if (!validBlocks.isEmpty()) {
            //System.out.println( "[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] validBlocks not empty!" );
            addCreatedSplit(finalSplits, Collections.singleton(node), validBlocks);
            curSplitSize = 0;
            splitsPerNode.add(node);
            blocksInCurrentNode.removeAll(validBlocks);
            validBlocks.clear();
        }
    }

    Set<NetCDFFileSplit> singleSplitsSet = blockToNodes.keySet();
    Iterator itrSingle = singleSplitsSet.iterator();
    while (itrSingle.hasNext()) {
        NetCDFFileSplit temp = (NetCDFFileSplit) itrSingle.next();
        addCreatedSingleSplit(finalSplits, temp.getLocations(), temp);
    }

    Iterator itr = finalSplits.iterator();
    while (itr.hasNext()) {

        NetCDFFileSplit temp = (NetCDFFileSplit) itr.next();

        String[] locations = temp.getFileSplit().getLocations();
        String locationsString = "";
        for (int i = 0; i < locations.length; i++)
            locationsString += locations[i];

        String pathsString = "";
        List<Path> paths = temp.getFileSplit().getPaths();
        for (Path path : paths)
            pathsString += path.getName() + ",";

        String startsString = "";
        List<Long> starts = temp.getFileSplit().startChunk;
        for (Long start : starts)
            startsString += (start + ",");

        String endsString = "";
        List<Long> ends = temp.getFileSplit().endChunk;
        for (Long end : ends)
            endsString += (end + ",");

        System.out.println("[SAMAN][NetCDFInputFormatPrunerByFileIndexMultiFile][getSplits] " + "locations="
                + locationsString + "," + "paths=" + pathsString + "," + "starts=" + startsString + ","
                + "ends=" + endsString + ",");
    }

    return finalSplits.toArray(new NetCDFFileSplit[finalSplits.size()]);

}

From source file:org.datanucleus.store.types.guava.wrappers.backed.Multiset.java

/**
 * The writeReplace method is called when ObjectOutputStream is preparing
 * to write the object to the stream. The ObjectOutputStream checks
 * whether the class defines the writeReplace method. If the method is
 * defined, the writeReplace method is called to allow the object to
 * designate its replacement in the stream. The object returned should be
 * either of the same type as the object passed in or an object that when
 * read and resolved will result in an object of a type that is compatible
 * with all references to the object.// ww w . jav  a2  s . c  o m
 * 
 * @return the replaced object
 * @throws ObjectStreamException if an error occurs
 */
protected Object writeReplace() throws ObjectStreamException {
    if (useCache) {
        loadFromStore();
        HashMultiset multi = HashMultiset.create();
        multi.addAll(delegate);
        return multi;
    }

    // TODO Cater for non-cached collection, load elements in a DB call.
    HashMultiset multi = HashMultiset.create();
    multi.addAll(delegate);
    return multi;
}

From source file:org.apache.twill.internal.appmaster.ApplicationMasterService.java

/**
 * Handling containers that are completed.
 *///from  w  w  w. j a  v  a 2  s  .  c o m
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
    Multiset<String> restartRunnables = HashMultiset.create();
    for (YarnContainerStatus status : completedContainersStatuses) {
        LOG.info("Container {} completed with {}:{}.", status.getContainerId(), status.getState(),
                status.getDiagnostics());
        runningContainers.handleCompleted(status, restartRunnables);
    }

    for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
        LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
        runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(), entry.getCount()));
    }

    // For all runnables that needs to re-request for containers, update the expected count timestamp
    // so that the EventHandler would triggered with the right expiration timestamp.
    expectedContainers.updateRequestTime(restartRunnables.elementSet());
}