List of usage examples for com.google.common.collect Multiset size
int size();
From source file:io.prestosql.verifier.Validator.java
public String getResultsComparison(int precision) { List<List<Object>> controlResults = controlResult.getResults(); List<List<Object>> testResults = testResult.getResults(); if (valid() || (controlResults == null) || (testResults == null)) { return ""; }//from www. j a v a2 s . c om Multiset<List<Object>> control = ImmutableSortedMultiset.copyOf(rowComparator(precision), controlResults); Multiset<List<Object>> test = ImmutableSortedMultiset.copyOf(rowComparator(precision), testResults); try { Iterable<ChangedRow> diff = ImmutableSortedMultiset.<ChangedRow>naturalOrder() .addAll(Iterables.transform(Multisets.difference(control, test), row -> new ChangedRow(Changed.REMOVED, row, precision))) .addAll(Iterables.transform(Multisets.difference(test, control), row -> new ChangedRow(Changed.ADDED, row, precision))) .build(); diff = Iterables.limit(diff, 100); StringBuilder sb = new StringBuilder(); sb.append(format("Control %s rows, Test %s rows%n", control.size(), test.size())); if (verboseResultsComparison) { Joiner.on("\n").appendTo(sb, diff); } else { sb.append("RESULTS DO NOT MATCH\n"); } return sb.toString(); } catch (TypesDoNotMatchException e) { return e.getMessage(); } }
From source file:edu.umd.cs.submitServer.servlets.AnalyzeArchives.java
@Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { Connection conn = null;/* w ww. j a v a2 s . c o m*/ response.setContentType("text/plain"); PrintWriter writer = response.getWriter(); Project project = (Project) request.getAttribute("project"); Course course = (Course) request.getAttribute("course"); long totalArchiveSpace = 0; long totalDistinctArchiveSpace = 0; HashSet<Integer> seen = new HashSet<Integer>(); HashMap<String, FileContents> archiveContents = new HashMap<String, FileContents>(); Multiset<String> files = HashMultiset.create(); Multiset<String> checksums = HashMultiset.create(); try { conn = getConnection(); List<Integer> archives = Submission.getAllArchivesForProject(project.getProjectPK(), conn); writer.printf("Analyzing %d submissions for %s project %s%n", archives.size(), course.getCourseName(), project.getProjectNumber()); for (Integer archivePK : archives) { byte[] bytes = Archive.downloadBytesFromArchive((String) Submission.SUBMISSION_ARCHIVES, (Integer) archivePK, (Connection) conn); totalArchiveSpace += bytes.length; if (!seen.add(archivePK)) continue; totalDistinctArchiveSpace += bytes.length; TreeMap<String, byte[]> contents = Archive.unzip(new ByteArrayInputStream(bytes)); for (Map.Entry<String, byte[]> e : contents.entrySet()) { byte[] archiveBytes = e.getValue(); String checksum = Checksums.getChecksum(archiveBytes); String name = e.getKey(); files.add(name); checksums.add(checksum); FileContents info = archiveContents.get(checksum); if (info == null) { info = new FileContents(name, TextUtilities.isText(TextUtilities.simpleName(name)), archiveBytes.length, checksum, null); archiveContents.put(checksum, info); } } } } catch (SQLException e) { throw new ServletException(e); } finally { releaseConnection(conn); } long totalSize = 0; TreeSet<FileContents> ordered = new TreeSet<FileContents>(archiveContents.values()); writer.printf("%5s %9s %s%n", "#", "size", "name"); String prevName = null; for (FileContents info : ordered) { if (prevName == null || !prevName.equals(info.name)) { if (prevName != null) writer.println(); writer.printf("%5d %9s %s%n", files.count(info.name), " ", info.name); prevName = info.name; } int count = checksums.count(info.checksum); writer.printf("%5d %9d %s%n", count, info.size, info.name); totalSize += info.size; } writer.printf("%n"); writer.printf("%d distinct archives%n", seen.size()); writer.printf("%d distinct files%n", files.elementSet().size()); writer.printf("%d total files%n", files.size()); writer.printf("%d bytes in distinct archives%n", totalDistinctArchiveSpace); writer.printf("%d bytes in repeated archives%n", totalArchiveSpace); writer.printf("%d bytes as files%n", totalSize); }
From source file:de.andreasschoknecht.LS3.DocumentCollection.java
/** * Insert a model to a model collection. This means that the underlying Term-Document Matrix has to be updated. * * @param modelPath the path to the model to be inserted. *///from w w w . j a v a 2 s .c o m public void insertModel(String modelPath) { // Make sure file name is correct if (!modelPath.endsWith(".pnml")) modelPath = modelPath + ".pnml"; // Create new LS3Document object and add it to the document collection list of documents System.out.println("------------------------"); System.out.println("Model to insert:"); System.out.println("------------------------"); System.out.println(modelPath.substring(modelPath.lastIndexOf(File.separator) + 1)); System.out.println("------------------------"); System.out.println("Models in list:"); System.out.println("------------------------"); String[] updatedFileList = new String[fileList.length + 1]; for (int i = 0; i <= fileList.length; i++) { if (i != fileList.length) updatedFileList[i] = fileList[i]; else updatedFileList[i] = modelPath.substring(modelPath.lastIndexOf(File.separator) + 1); System.out.println(updatedFileList[i]); } documentNumber++; LS3Document newDocument = new LS3Document(modelPath); PNMLReader pnmlReader = new PNMLReader(); try { pnmlReader.processDocument(newDocument); } catch (JDOMException | IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("------------------------"); System.out.println("New LS3Document data:"); System.out.println("------------------------"); System.out.println(newDocument.getPNMLPath()); System.out.println("Amount of terms = " + newDocument.getAmountTerms()); for (String term : newDocument.getTermCollection()) { System.out.println(term); } // Add new column to the Term-Document Matrix int t = tdMatrix.getRowNumber(); double[] termFrequencies = new double[t]; String[] termCollectionArray = new String[termCollection.size()]; termCollection.toArray(termCollectionArray); Multiset<String> termsM = newDocument.getTermCollection(); for (int i = 0; i < t; i++) { termFrequencies[i] = termsM.count(termCollectionArray[i]); termsM.remove(termCollectionArray[i]); } System.out.println("------------------------"); System.out.println("Term frequencies:"); System.out.println("------------------------"); System.out.println(Arrays.toString(termFrequencies)); System.out.println("------------------------"); System.out.println("Old TD Matrix:"); System.out.println("------------------------"); for (int i = 0, k = tdMatrix.getRowNumber(); i < k; i++) { System.out.print(tdMatrix.getTermArray()[i] + " "); for (int j = 0, l = tdMatrix.getColumnNumber(); j < l; j++) { System.out.print(" " + tdMatrix.getMatrix()[i][j] + " "); } System.out.println(""); } System.out.println("---------------------\r\n\r\n"); tdMatrix.addColumn(termFrequencies); System.out.println("------------------------"); System.out.println("New TD Matrix:"); System.out.println("------------------------"); for (int i = 0, k = tdMatrix.getRowNumber(); i < k; i++) { System.out.print(tdMatrix.getTermArray()[i] + " "); for (int j = 0, l = tdMatrix.getColumnNumber(); j < l; j++) { System.out.print(" " + tdMatrix.getMatrix()[i][j] + " "); } System.out.println(""); } System.out.println("---------------------\r\n\r\n"); // Add new terms of the new model to the term list of the document collection System.out.println("------------------------"); System.out.println("Old term collection:"); System.out.println("------------------------"); for (String term : termCollection) { System.out.println(term); } System.out.println("------------------------"); System.out.println("Terms remaining in insertion model:"); System.out.println("------------------------"); System.out.println(Arrays.toString(termsM.toArray(new String[termsM.size()]))); Set<String> termSet = termsM.elementSet(); String[] newTerms = termSet.toArray(new String[termSet.size()]); for (String term : newTerms) { termCollection.add(term); } System.out.println("------------------------"); System.out.println("New term collection:"); System.out.println("------------------------"); for (String term : termCollection) { System.out.println(term); } System.out.println("------------------------"); System.out.println("New term collection TD Matrix:"); System.out.println("------------------------"); for (String term : tdMatrix.getTermArray()) { System.out.println(term); } // Add one row for each new term and add the corresponding Term-Document Matrix entries double[] newTermsFrequencies = new double[newTerms.length]; for (int i = 0; i < newTerms.length; i++) { newTermsFrequencies[i] = termsM.count(newTerms[i]); } System.out.println("------------------------"); System.out.println("New term frequencies:"); System.out.println("------------------------"); System.out.println(Arrays.toString(newTermsFrequencies)); int n = tdMatrix.getColumnNumber(); for (int i = 0; i < newTermsFrequencies.length; i++) { double[] newRow = new double[n]; for (int j = 0; j < n - 2; j++) newRow[j] = 0; newRow[n - 1] = newTermsFrequencies[i]; tdMatrix.addRow(newRow); } // Update term list of TDMatrix object tdMatrix.setTermArray(termCollection.toArray(new String[0])); System.out.println("------------------------"); System.out.println("Final TD Matrix:"); System.out.println("------------------------"); for (int i = 0, k = tdMatrix.getRowNumber(); i < k; i++) { System.out.print(tdMatrix.getTermArray()[i] + " "); for (int j = 0, l = tdMatrix.getColumnNumber(); j < l; j++) { System.out.print(" " + tdMatrix.getMatrix()[i][j] + " "); } System.out.println(""); } System.out.println("---------------------\r\n\r\n"); }
From source file:com.wasteofplastic.askyblock.LevelCalcByChunk.java
/** * Calculates the level of an island// w ww. ja v a 2 s. c o m * @param plugin * @param targetPlayer - UUID of island owner or team member * @param sender - requester of the level calculation, if anyone * @param silent - if true, calculation will be done silently * @param report - provide a report to the asker */ public LevelCalcByChunk(final ASkyBlock plugin, final UUID targetPlayer, final CommandSender sender, final boolean report) { if (report && plugin.getServer().getVersion().contains("(MC: 1.7")) { sender.sendMessage(ChatColor.RED + "This option is not available in V1.7 servers, sorry."); return; } //plugin.getLogger().info("DEBUG: running level calc " + silent); // Get player's island final Island island = plugin.getGrid().getIsland(targetPlayer, ((Player) sender).getWorld().getEnvironment()); if (island != null) { // Get the permission multiplier if it is available Player player = plugin.getServer().getPlayer(targetPlayer); int multiplier = 1; if (player != null) { // Get permission multiplier for (PermissionAttachmentInfo perms : player.getEffectivePermissions()) { if (perms.getPermission().startsWith(Settings.PERMPREFIX + "island.multiplier.")) { // Get the max value should there be more than one multiplier = Math.max(multiplier, Integer.valueOf( perms.getPermission().split(Settings.PERMPREFIX + "island.multiplier.")[1])); } // Do some sanity checking if (multiplier < 1) { multiplier = 1; } } } final int levelMultiplier = multiplier; // Get the handicap final int levelHandicap = island.getLevelHandicap(); // Get the death handicap int deaths = plugin.getPlayers().getDeaths(targetPlayer); if (plugin.getPlayers().inTeam(targetPlayer)) { // Get the team leader's deaths deaths = plugin.getPlayers().getDeaths(plugin.getPlayers().getTeamLeader(targetPlayer)); if (Settings.sumTeamDeaths) { deaths = 0; //plugin.getLogger().info("DEBUG: player is in team"); for (UUID member : plugin.getPlayers().getMembers(targetPlayer)) { deaths += plugin.getPlayers().getDeaths(member); } } } final int deathHandicap = deaths; // Check if player's island world is the nether or overworld and adjust accordingly final World world = plugin.getPlayers().getIslandLocation(targetPlayer).getWorld(); // Get the chunks //long nano = System.nanoTime(); Set<ChunkSnapshot> chunkSnapshot = new HashSet<ChunkSnapshot>(); for (int x = island.getMinProtectedX(); x < (island.getMinProtectedX() + island.getProtectionSize() + 16); x += 16) { for (int z = island.getMinProtectedZ(); z < (island.getMinProtectedZ() + island.getProtectionSize() + 16); z += 16) { if (!world.getBlockAt(x, 0, z).getChunk().isLoaded()) { world.getBlockAt(x, 0, z).getChunk().load(); chunkSnapshot.add(world.getBlockAt(x, 0, z).getChunk().getChunkSnapshot()); world.getBlockAt(x, 0, z).getChunk().unload(); } else { chunkSnapshot.add(world.getBlockAt(x, 0, z).getChunk().getChunkSnapshot()); } //plugin.getLogger().info("DEBUG: getting chunk at " + x + ", " + z); } } //plugin.getLogger().info("DEBUG: time = " + (System.nanoTime() - nano) / 1000000 + " ms"); //plugin.getLogger().info("DEBUG: size of chunk ss = " + chunkSnapshot.size()); final Set<ChunkSnapshot> finalChunk = chunkSnapshot; final int worldHeight = world.getMaxHeight(); //plugin.getLogger().info("DEBUG:world height = " +worldHeight); plugin.getServer().getScheduler().runTaskAsynchronously(plugin, new Runnable() { @SuppressWarnings("deprecation") @Override public void run() { // Logging File log = null; PrintWriter out = null; List<MaterialData> mdLog = null; List<MaterialData> uwLog = null; List<MaterialData> noCountLog = null; List<MaterialData> overflowLog = null; if (Settings.levelLogging) { log = new File(plugin.getDataFolder(), "level.log"); try { if (log.exists()) { out = new PrintWriter(new FileWriter(log, true)); } else { out = new PrintWriter(log); } } catch (FileNotFoundException e) { System.out.println("Level log (level.log) could not be opened..."); e.printStackTrace(); } catch (IOException e) { System.out.println("Level log (level.log) could not be opened..."); e.printStackTrace(); } } if (Settings.levelLogging || report) { mdLog = new ArrayList<MaterialData>(); uwLog = new ArrayList<MaterialData>(); noCountLog = new ArrayList<MaterialData>(); overflowLog = new ArrayList<MaterialData>(); } // Copy the limits hashmap HashMap<MaterialData, Integer> limitCount = new HashMap<MaterialData, Integer>( Settings.blockLimits); // Calculate the island score int blockCount = 0; int underWaterBlockCount = 0; for (ChunkSnapshot chunk : finalChunk) { for (int x = 0; x < 16; x++) { // Check if the block coord is inside the protection zone and if not, don't count it if (chunk.getX() * 16 + x < island.getMinProtectedX() || chunk.getX() * 16 + x >= island.getMinProtectedX() + island.getProtectionSize()) { //plugin.getLogger().info("Block is outside protected area - x = " + (chunk.getX() * 16 + x)); continue; } for (int z = 0; z < 16; z++) { // Check if the block coord is inside the protection zone and if not, don't count it if (chunk.getZ() * 16 + z < island.getMinProtectedZ() || chunk.getZ() * 16 + z >= island.getMinProtectedZ() + island.getProtectionSize()) { //plugin.getLogger().info("Block is outside protected area - z = " + (chunk.getZ() * 16 + z)); continue; } for (int y = 0; y < worldHeight; y++) { int type = chunk.getBlockTypeId(x, y, z); int data = chunk.getBlockData(x, y, z); MaterialData md = new MaterialData(type, (byte) data); MaterialData generic = new MaterialData(type); if (type != 0) { // AIR if (limitCount.containsKey(md) && Settings.blockValues.containsKey(md)) { int count = limitCount.get(md); //plugin.getLogger().info("DEBUG: Count for non-generic " + md + " is " + count); if (count > 0) { limitCount.put(md, --count); if (Settings.sea_level > 0 && y <= Settings.sea_level) { underWaterBlockCount += Settings.blockValues.get(md); if (Settings.levelLogging || report) { uwLog.add(md); } } else { blockCount += Settings.blockValues.get(md); if (Settings.levelLogging || report) { mdLog.add(md); } } } else if (Settings.levelLogging || report) { overflowLog.add(md); } } else if (limitCount.containsKey(generic) && Settings.blockValues.containsKey(generic)) { int count = limitCount.get(generic); //plugin.getLogger().info("DEBUG: Count for generic " + generic + " is " + count); if (count > 0) { limitCount.put(generic, --count); if (Settings.sea_level > 0 && y <= Settings.sea_level) { underWaterBlockCount += Settings.blockValues.get(generic); if (Settings.levelLogging || report) { uwLog.add(md); } } else { blockCount += Settings.blockValues.get(generic); if (Settings.levelLogging || report) { mdLog.add(md); } } } else if (Settings.levelLogging || report) { overflowLog.add(md); } } else if (Settings.blockValues.containsKey(md)) { //plugin.getLogger().info("DEBUG: Adding " + md + " = " + Settings.blockValues.get(md)); if (Settings.sea_level > 0 && y <= Settings.sea_level) { underWaterBlockCount += Settings.blockValues.get(md); if (Settings.levelLogging || report) { uwLog.add(md); } } else { blockCount += Settings.blockValues.get(md); if (Settings.levelLogging || report) { mdLog.add(md); } } } else if (Settings.blockValues.containsKey(generic)) { //plugin.getLogger().info("DEBUG: Adding " + generic + " = " + Settings.blockValues.get(generic)); if (Settings.sea_level > 0 && y <= Settings.sea_level) { underWaterBlockCount += Settings.blockValues.get(generic); if (Settings.levelLogging || report) { uwLog.add(md); } } else { blockCount += Settings.blockValues.get(generic); if (Settings.levelLogging || report) { mdLog.add(md); } } } else if (Settings.levelLogging || report) { noCountLog.add(md); } } } } } } blockCount += (int) ((double) underWaterBlockCount * Settings.underWaterMultiplier); //System.out.println("block count = "+blockCount); final int score = (((blockCount * levelMultiplier) - (deathHandicap * Settings.deathpenalty)) / Settings.levelCost) - levelHandicap; // Logging or report if (Settings.levelLogging || report) { // provide counts Multiset<MaterialData> uwCount = HashMultiset.create(uwLog); Multiset<MaterialData> mdCount = HashMultiset.create(mdLog); Multiset<MaterialData> ncCount = HashMultiset.create(noCountLog); Multiset<MaterialData> ofCount = HashMultiset.create(overflowLog); reportLines.add("Level Log for island at " + island.getCenter()); if (sender instanceof Player) { reportLines.add("Asker is " + sender.getName() + " (" + ((Player) sender).getUniqueId().toString() + ")"); } else { reportLines.add("Asker is console"); } reportLines.add("Target player UUID = " + targetPlayer.toString()); reportLines.add("Total block value count = " + String.format("%,d", blockCount)); reportLines.add("Level cost = " + Settings.levelCost); reportLines.add("Level multiplier = " + levelMultiplier + " (Player must be online to get a permission multiplier)"); reportLines.add("Schematic level handicap = " + levelHandicap + " (level is reduced by this amount)"); reportLines.add("Deaths handicap = " + (deathHandicap * Settings.deathpenalty) + " (" + deathHandicap + " deaths)"); reportLines.add("Level calculated = " + score); reportLines.add("=================================="); int total = 0; if (!uwCount.isEmpty()) { reportLines.add("Underwater block count (Multiplier = x" + Settings.underWaterMultiplier + ") value"); reportLines.add( "Total number of underwater blocks = " + String.format("%,d", uwCount.size())); Iterable<Multiset.Entry<MaterialData>> entriesSortedByCount = Multisets .copyHighestCountFirst(uwCount).entrySet(); Iterator<Entry<MaterialData>> it = entriesSortedByCount.iterator(); while (it.hasNext()) { Entry<MaterialData> type = it.next(); int value = 0; if (Settings.blockValues.containsKey(type)) { // Specific value = Settings.blockValues.get(type); } else if (Settings.blockValues .containsKey(new MaterialData(type.getElement().getItemType()))) { // Generic value = Settings.blockValues .get(new MaterialData(type.getElement().getItemType())); } if (value > 0) { reportLines.add(type.getElement().toString() + ":" + String.format("%,d", type.getCount()) + " blocks x " + value + " = " + (value * type.getCount())); total += (value * type.getCount()); } } reportLines.add("Subtotal = " + total); reportLines.add("=================================="); } reportLines.add("Regular block count"); reportLines.add("Total number of blocks = " + String.format("%,d", mdCount.size())); Iterable<Multiset.Entry<MaterialData>> entriesSortedByCount = Multisets .copyHighestCountFirst(mdCount).entrySet(); Iterator<Entry<MaterialData>> it = entriesSortedByCount.iterator(); while (it.hasNext()) { Entry<MaterialData> type = it.next(); int value = 0; if (Settings.blockValues.containsKey(type)) { // Specific value = Settings.blockValues.get(type); } else if (Settings.blockValues .containsKey(new MaterialData(type.getElement().getItemType()))) { // Generic value = Settings.blockValues.get(new MaterialData(type.getElement().getItemType())); } if (value > 0) { reportLines.add( type.getElement().toString() + ":" + String.format("%,d", type.getCount()) + " blocks x " + value + " = " + (value * type.getCount())); total += (value * type.getCount()); } } reportLines.add("Total = " + total); reportLines.add("=================================="); reportLines.add("Blocks not counted because they exceeded limits: " + String.format("%,d", ofCount.size())); entriesSortedByCount = Multisets.copyHighestCountFirst(ofCount).entrySet(); it = entriesSortedByCount.iterator(); while (it.hasNext()) { Entry<MaterialData> type = it.next(); Integer limit = Settings.blockLimits.get(type.getElement()); String explain = ")"; if (limit == null) { MaterialData generic = new MaterialData(type.getElement().getItemType()); limit = Settings.blockLimits.get(generic); explain = " - All types)"; } reportLines.add(type.getElement().toString() + ": " + String.format("%,d", type.getCount()) + " blocks (max " + limit + explain); } reportLines.add("=================================="); reportLines.add("Blocks on island that are not in blockvalues.yml"); reportLines.add("Total number = " + String.format("%,d", ncCount.size())); entriesSortedByCount = Multisets.copyHighestCountFirst(ncCount).entrySet(); it = entriesSortedByCount.iterator(); while (it.hasNext()) { Entry<MaterialData> type = it.next(); reportLines.add(type.getElement().toString() + ": " + String.format("%,d", type.getCount()) + " blocks"); } reportLines.add("================================="); } if (out != null) { // Write to file for (String line : reportLines) { out.println(line); } System.out.println("Finished writing level log."); out.close(); } // Return to main thread plugin.getServer().getScheduler().runTask(plugin, new Runnable() { @Override public void run() { //plugin.getLogger().info("DEBUG: updating player"); int oldLevel = plugin.getPlayers().getIslandLevel(targetPlayer); // Update player and team mates plugin.getPlayers().setIslandLevel(targetPlayer, score); //plugin.getLogger().info("DEBUG: set island level, now trying to save player"); plugin.getPlayers().save(targetPlayer); //plugin.getLogger().info("DEBUG: save player, now looking at team members"); // Update any team members too if (plugin.getPlayers().inTeam(targetPlayer)) { //plugin.getLogger().info("DEBUG: player is in team"); for (UUID member : plugin.getPlayers().getMembers(targetPlayer)) { //plugin.getLogger().info("DEBUG: updating team member level too"); plugin.getPlayers().setIslandLevel(member, score); plugin.getPlayers().save(member); } } //plugin.getLogger().info("DEBUG: finished team member saving"); if (sender != null) { if (!(sender instanceof Player)) { // Console if (!report) { sender.sendMessage(ChatColor.GREEN + plugin.myLocale().islandislandLevelis + " " + ChatColor.WHITE + plugin.getPlayers().getIslandLevel(targetPlayer)); } else { for (String line : reportLines) { sender.sendMessage(line); } } } else { if (!report) { // Tell offline team members the island level increased. if (plugin.getPlayers().getIslandLevel(targetPlayer) > oldLevel) { //plugin.getLogger().info("DEBUG: telling offline players"); plugin.getMessages().tellOfflineTeam(targetPlayer, ChatColor.GREEN + plugin.myLocale(targetPlayer).islandislandLevelis + " " + ChatColor.WHITE + plugin.getPlayers().getIslandLevel(targetPlayer)); } if (sender instanceof Player && ((Player) sender).isOnline()) { String message = ChatColor.GREEN + plugin.myLocale( ((Player) sender).getUniqueId()).islandislandLevelis + " " + ChatColor.WHITE + plugin.getPlayers().getIslandLevel(targetPlayer); if (Settings.deathpenalty != 0) { message += " " + plugin .myLocale(((Player) sender).getUniqueId()).levelDeaths .replace("[number]", String.valueOf(plugin .getPlayers().getDeaths(targetPlayer))); } sender.sendMessage(message); } } else { if (((Player) sender).isOnline()) { for (String line : reportLines) { sender.sendMessage(line); } } } } } //plugin.getLogger().info("DEBUG: updating top ten"); // Only update top ten if the asker doesn't have this permission if (!(sender instanceof Player)) { return; } if (!(((Player) sender).getUniqueId().equals(targetPlayer) && sender.hasPermission(Settings.PERMPREFIX + "excludetopten"))) { if (plugin.getPlayers().inTeam(targetPlayer)) { UUID leader = plugin.getPlayers().getTeamLeader(targetPlayer); if (leader != null) { TopTen.topTenAddEntry(leader, score); } } else { TopTen.topTenAddEntry(targetPlayer, score); } } // Fire the level event Island island = plugin.getGrid().getIsland(targetPlayer, ((Player) sender).getWorld().getEnvironment()); final IslandLevelEvent event = new IslandLevelEvent(targetPlayer, island, score); plugin.getServer().getPluginManager().callEvent(event); } }); } }); } }
From source file:tufts.vue.ds.DataTree.java
private void addMissingRowsToMap(final LWMap map) { // todo: we'll want to merge some of this code w/DropHandler code, as // this is somewhat of a special case of doing a drop final List<DataRow> newRows = new ArrayList(); for (DataNode n : mAllRowsNode.getChildren()) { if (!n.isMapPresent()) { //Log.debug("ADDING TO MAP: " + n); newRows.add(n.getRow());//from w w w . ja v a2s . co m } } final List<LWComponent> newRowNodes = DataAction.makeRowNodes(mSchema, newRows); Multiset<LWComponent> targetsUsed = null; List<LWLink> linksAdded = Collections.EMPTY_LIST; try { final Object[] result = DataAction.addDataLinksForNodes(map, newRowNodes, (Field) null); targetsUsed = (Multiset) result[0]; linksAdded = (List) result[1]; } catch (Throwable t) { Log.error("problem creating links on " + map + " for new nodes: " + Util.tags(newRowNodes), t); } if (DEBUG.Enabled && targetsUsed != null) { final Set entries = targetsUsed.entrySet(); Log.debug("TARGETS USED: " + targetsUsed.size() + " / " + entries.size()); Util.dump(entries); } if (newRowNodes.size() > 0) { // we cannot run setXYByClustering before adding to the map w/out refactoring projectNodes // (or for that matter, centroidCluster, which also uses projectNodes). E.g. -- we // can't use this as an initial fallback/failsafe. //tufts.vue.VueUtil.setXYByClustering(map, nodes); //----------------------------------------------------------------------------- // add all the "missing" / newly-arrived rows to the map //----------------------------------------------------------------------------- map.getOrCreateLayer("New Data Nodes").addChildren(newRowNodes); // PROBLEM/BUG: the above add to a special layer appears to be failing (to // the user) somtimes and the nodes wind up in the same layer as the // relating nodes -- this is when ArrangeAction.clusterLinked is then used // below. It does some reparenting which it needs to do in case nodes had // been collected as children, but in some cases, it doesn't need doing and // ends up just pulling the nodes right back out of the "New Data Nodes" // layer after we just moved them there. // ----------------------------------------------------------------------------- if (newRowNodes.size() > NEW_ROW_NODE_MAP_REORG_THRESHOLD) { if (targetsUsed.size() > 0) { // Note: won't currently trigger for cross-schema joins, as targesUsed aren't reported //------------------------------------------------------- // RE-CLUSTER THE ENTIRE MAP //------------------------------------------------------- // If there is was more than one value-node link per row-node created (e.g., // multiple sets of value nodes are already on the map), prioritizing those // targets with the most first spreads the nodes out the most as the targets // with the fewest links would are at least be guaranteed to get some of the // row nodes. Using the push-method in this case would be far too slow -- we'd // have to push based on every row node. final List<Multiset.Entry<LWComponent>> ordered = ByDecreasingFrequency .sortedCopy(targetsUsed.entrySet()); for (Multiset.Entry<LWComponent> e : ordered) { tufts.vue.Actions.ArrangeAction.clusterLinked(e.getElement()); } // note: if we wished, we could also decide here // what to cluster on based on what targets are // selected (currently have the selection bit set) } else { // fallback: randomly layout anything that isn't first XY clustered: tufts.vue.LayoutAction.random.act(tufts.vue.VueUtil.setXYByClustering(newRowNodes)); } } else { //------------------------------------------------------- // Centroid cluster //------------------------------------------------------- DataAction.centroidCluster(map, newRowNodes, true); //------------------------------------------------------- } VUE.getSelection().setTo(newRowNodes); } map.getUndoManager().mark("Add New Data Nodes"); }
From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java
@Override public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) { entityScoreMap = new HashMap<String, EntityScores>(); selectedEntities = new HashSet<String>(); Multiset<String> entityFrequencyMultiset = HashMultiset.create(); EntityMatchList entities = sml.getEntities(); String inputText = localParams.getParams().get("text"); String algorithm = Property.getInstance().get("algorithm"); String ambigious = Property.getInstance().get("algorithm.ambigious"); List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true); List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText); Multiset<String> inputTokensMultiset = HashMultiset.create(); for (Token token : inputTokens) { inputTokensMultiset.add(token.getMorphText()); }/* w w w . jav a 2s . c om*/ Multiset<String> domainMultiset = HashMultiset.create(); Multiset<String> typeMultiset = HashMultiset.create(); HashMap<String, Double> entitySimMap = new HashMap<String, Double>(); // if (printCandidateEntities) { // printEntities(entities); // } HashSet<String> words = new HashSet<String>(); Multiset<String> leskWords = HashMultiset.create(); // first pass for finding number of types and domains for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); if (!entityFrequencyMultiset.contains(id)) { entityFrequencyMultiset.add(id); Entity entity = em.getEntity(); words.add(entity.getShingle().getText()); String type = entity.getPage().getType(); if (type != null && type.length() > 0) { typeMultiset.add(type); } String domain = entity.getPage().getDomain(); if (domain != null && domain.length() > 0) { domainMultiset.add(domain); } String desc = entity.getPage().getDescription(); List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true); for (Token token : tokens) { leskWords.add(token.getMorphText()); } } else { entityFrequencyMultiset.add(id); } } int maxDomainCount = 0; for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) { maxDomainCount = domainMultiset.count(domain); break; } int maxTypeCount = 0; for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) { maxTypeCount = typeMultiset.count(type); break; } double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0, maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0, maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0; // second pass compute similarities between entities in a window int currentSpotIndex = -1; SpotMatch currentSpot = null; for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); SpotMatch spot = em.getSpot(); if (currentSpot == null || spot != currentSpot) { currentSpotIndex++; currentSpot = spot; } String id = em.getId(); Entity entity = entities.get(i).getEntity(); EntityPage page = entities.get(i).getEntity().getPage(); String domain = page.getDomain(); String type = page.getType(); Shingle shingle = entity.getShingle(); /* windowing algorithms stars */ int left = currentSpotIndex - window; int right = currentSpotIndex + window; if (left < 0) { right -= left; left = 0; } if (right > sml.size()) { left += (sml.size()) - right; right = sml.size(); if (left < 0) { left = 0; } } double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0, wordvecLinksScore = 0; for (int j = left; j < right; j++) { SpotMatch sm2 = sml.get(j); EntityMatchList entities2 = sm2.getEntities(); for (EntityMatch em2 : entities2) { String id2 = em2.getId(); EntityPage page2 = em2.getEntity().getPage(); int counter = 0; if (!ambigious.equals("true")) { for (EntityMatch entityMatch : entities2) { if (entityMatch.getId().startsWith("w")) { counter++; } } } if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot() && !id.equals(id2)) { // Link Similarity calculation starts double linkSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("link" + id + id2)) { linkSim = entitySimMap.get("link" + id + id2); } else { HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" ")); HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" ")); linkSim = JaccardCalculator.calculateSimilarity(set1, set2); entitySimMap.put("link" + id + id2, linkSim); } linkScore += linkSim; // Link Similarity calculation ends } // Entity embedding similarity calculation starts double eeSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("ee" + id + id2)) { eeSim = entitySimMap.get("ee" + id + id2); } else { eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2); entitySimMap.put("ee" + id + id2, eeSim); } hashInfoboxScore += eeSim; } double w2veclinksSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("wl" + id + id2)) { w2veclinksSim = entitySimMap.get("wl" + id + id2); } else { w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(), page2.getWord2vec()); entitySimMap.put("wl" + id + id2, w2veclinksSim); } wordvecLinksScore += w2veclinksSim; } // Entity embedding similarity calculation ends // Description word2vec similarity calculation // starts double word2vecSim = 0; if (entitySimMap.containsKey("w2v" + id + id2)) { word2vecSim = entitySimMap.get("w2v" + id + id2); } else { word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(), page.getDword2vec()); entitySimMap.put("w2v" + id + id2, word2vecSim); } wordvecDescriptionLocalScore += word2vecSim; // Description word2vec similarity calculation ends // Description autoencoder similarity calculation // starts double autoVecSim = 0; if (entitySimMap.containsKey("a2v" + id + id2)) { autoVecSim = entitySimMap.get("a2v" + id + id2); } else { autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(), page.getDautoencoder()); entitySimMap.put("a2v" + id + id2, autoVecSim); } hashDescriptionScore += autoVecSim; // Description autoencoder similarity calculation // ends } } } if (linkScore > maxLinkScore) { maxLinkScore = linkScore; } if (hashInfoboxScore > maxHashInfoboxScore) { maxHashInfoboxScore = hashInfoboxScore; } if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) { maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore; } if (hashDescriptionScore > maxHashDescriptionScore) { maxHashDescriptionScore = hashDescriptionScore; } if (wordvecLinksScore > maxWordvecLinksScore) { maxWordvecLinksScore = wordvecLinksScore; } /* windowing algorithms ends */ double domainScore = 0; if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) { domainScore = (double) domainMultiset.count(domain) / maxDomainCount; } double typeScore = 0; if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) { typeScore = (double) typeMultiset.count(type) / maxTypeCount; } if (typeBlackList.contains(type)) { typeScore /= 10; } double typeContentScore = 0; if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) { typeContentScore = 1; } double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(), entity.getShingle().getSentence()); double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector, page.getDword2vec()); if (wordvecDescriptionScore > maxWordvectorAverage) { maxWordvectorAverage = wordvecDescriptionScore; } double suffixScore = 0; if (type != null && type.length() > 0) { Set<String> suffixes = new HashSet<String>(); String t = entity.getTitle().toLowerCase(new Locale("tr", "TR")); for (int x = 0; x < entities.size(); x++) { EntityMatch e2 = entities.get(x); if (e2.getId().equals(entity.getId())) { suffixes.add(e2.getMention()); } } suffixes.remove(t); suffixes.remove(entity.getTitle()); // String inputTextLower = inputText.toLowerCase(new // Locale("tr", // "TR")); // while (inputTextLower.contains(t)) { // int start = inputTextLower.indexOf(t); // int end = inputTextLower.indexOf(" ", start + t.length()); // if (end > start) { // String suffix = inputTextLower.substring(start, end); // // .replaceAll("\\W", ""); // if (suffix.contains("'") // || (Zemberek.getInstance().hasMorph(suffix) // && !suffix.equals(t) && suffix.length() > 4)) { // suffixes.add(suffix); // } // inputTextLower = inputTextLower.substring(end); // } else { // break; // } // } if (suffixes.size() >= minSuffix) { for (String suffix : suffixes) { double sim = gd.calculateSimilarity(suffix, type); suffixScore += sim; } } } // String entitySuffix = page.getSuffix(); // String[] inputSuffix = shingle.getSuffix().split(" "); // for (int j = 0; j < inputSuffix.length; j++) { // if (entitySuffix.contains(inputSuffix[j])) { // suffixScore += 0.25f; // } // } if (suffixScore > maxSuffixScore) { maxSuffixScore = suffixScore; } // if (id.equals("w691538")) { // LOGGER.info(""); // } double letterCaseScore = 0; int lc = page.getLetterCase(); if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) { letterCaseScore = 1; } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) { letterCaseScore = 1; } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) { letterCaseScore = 1; } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) { letterCaseScore = 1; } double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(), Zemberek.removeAfterSpostrophe(em.getMention())); double popularityScore = page.getRank(); if (id.startsWith("w")) { popularityScore = Math.log10(popularityScore + 1); if (popularityScore > maxPopularityScore) { maxPopularityScore = popularityScore; } } double leskScore = 0, simpleLeskScore = 0; String desc = em.getEntity().getPage().getDescription(); if (desc != null) { List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true); for (Token token : tokens) { if (inputTokensMultiset.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) { simpleLeskScore += inputTokensMultiset.count(token.getMorphText()); } if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) { leskScore += leskWords.count(token.getMorphText()); } } leskScore /= Math.log(tokens.size() + 1); simpleLeskScore /= Math.log(tokens.size() + 1); if (leskScore > maxLeskScore) { maxLeskScore = leskScore; } if (simpleLeskScore > maxSimpleLeskScore) { maxSimpleLeskScore = simpleLeskScore; } if (!entityScoreMap.containsKey(id)) { EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore, suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore, hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore, wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore); entityScoreMap.put(id, scores); } else { EntityScores entityScores = entityScoreMap.get(id); entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2); entityScores.setHashDescriptionScore( (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2); entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2); entityScores.setWordvecDescriptionLocalScore( (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2); entityScores .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2); entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2); } } } /* normalization and total score calculation starts */ Set<String> set = new HashSet<String>(); for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); EntityScores entityScores = entityScoreMap.get(id); if (set.contains(id)) { continue; } if (id.startsWith("w")) { if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) { entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore); } if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) { entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore); } if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) { entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore); } if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) { entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore); } } if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) { entityScores.setWordvecDescriptionLocalScore( entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore); } if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) { entityScores .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore); } if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) { entityScores.setWordvecDescriptionScore( entityScores.getWordvecDescriptionScore() / maxWordvectorAverage); } if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) { entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore); } if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) { entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore); } if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) { entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore); } set.add(id); } LOGGER.info("\t" + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription"); for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); EntityScores e = entityScoreMap.get(id); double wikiScore = 0; if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) { wikiScore = wikiWeight; } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) { wikiScore = wikiWeight; } // if(id.equals("w508792")){ // LOGGER.info(""); // } double totalScore = wikiScore + e.getPopularityScore() * popularityWeight + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + e.getHashDescriptionScore() * hashDescriptionWeight + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight; if (ranklib == true) { totalScore = RankLib.getInstance().score(e); } if (em.getEntity().getPage().getUrlTitle().contains("(")) { totalScore /= 2; } em.setScore(totalScore); e.setScore(totalScore); LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t" + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t" + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t" + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t" + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t" + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t" + e.getDomainScore() * domainWeight + "\t" + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t" + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t" + e.getHashDescriptionScore() * hashDescriptionWeight + "\t" + e.getHashInfoboxScore() * hashInfoboxWeight + "\t" + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t" + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t" + em.getEntity().getPage().getDescription()); } // if (annotateEntities) { // annotateEntities(localParams.getParams().get("originalText"), sml); // } EntityMatchList eml = new EntityMatchList(); for (SpotMatch match : sml) { EntityMatchList list = match.getEntities(); if (!list.isEmpty()) { list.sort(); eml.add(list.get(0)); selectedEntities.add(list.get(0).getId()); } } return eml; }
From source file:org.opencb.opencga.storage.mongodb.variant.VariantMongoDBAdaptor.java
/** * Two steps insertion:/* www . jav a 2 s .com*/ * First check that the variant and study exists making an update. * For those who doesn't exist, pushes a study with the file and genotype information * <p> * The documents that throw a "dup key" exception are those variants that exist and have the study. * Then, only for those variants, make a second update. * <p> * *An interesting idea would be to invert this actions depending on the number of already inserted variants. * * @param data Variants to insert * @param fileId File ID * @param variantConverter Variant converter to be used * @param variantSourceEntryConverter Variant source converter to be used * @param studyConfiguration Configuration for the study * @param loadedSampleIds Other loaded sampleIds EXCEPT those that are going to be loaded * @return QueryResult object */ QueryResult<MongoDBVariantWriteResult> insert(List<Variant> data, int fileId, DocumentToVariantConverter variantConverter, DocumentToStudyVariantEntryConverter variantSourceEntryConverter, StudyConfiguration studyConfiguration, List<Integer> loadedSampleIds) { MongoDBVariantWriteResult writeResult = new MongoDBVariantWriteResult(); long startTime = System.currentTimeMillis(); if (data.isEmpty()) { return new QueryResult<>("insertVariants", 0, 1, 1, "", "", Collections.singletonList(writeResult)); } List<Bson> queries = new ArrayList<>(data.size()); List<Bson> updates = new ArrayList<>(data.size()); // Use a multiset instead of a normal set, to keep tracking of duplicated variants Multiset<String> nonInsertedVariants = HashMultiset.create(); String fileIdStr = Integer.toString(fileId); // List<String> extraFields = studyConfiguration.getAttributes().getAsStringList(VariantStorageManager.Options.EXTRA_GENOTYPE_FIELDS // .key()); boolean excludeGenotypes = studyConfiguration.getAttributes().getBoolean( VariantStorageManager.Options.EXCLUDE_GENOTYPES.key(), VariantStorageManager.Options.EXCLUDE_GENOTYPES.defaultValue()); long nanoTime = System.nanoTime(); Map missingSamples = Collections.emptyMap(); String defaultGenotype = studyConfiguration.getAttributes().getString(DEFAULT_GENOTYPE.key(), ""); if (defaultGenotype.equals(DocumentToSamplesConverter.UNKNOWN_GENOTYPE)) { logger.debug("Do not need fill gaps. DefaultGenotype is UNKNOWN_GENOTYPE({}).", DocumentToSamplesConverter.UNKNOWN_GENOTYPE); } else if (excludeGenotypes) { logger.debug("Do not need fill gaps. Excluding genotypes."); } else if (!loadedSampleIds.isEmpty()) { missingSamples = new Document(DocumentToSamplesConverter.UNKNOWN_GENOTYPE, loadedSampleIds); // ?/? } // List<Object> missingOtherValues = new ArrayList<>(loadedSampleIds.size()); // for (int i = 0; i < loadedSampleIds.size(); i++) { // missingOtherValues.add(DBObjectToSamplesConverter.UNKNOWN_FIELD); // } for (Variant variant : data) { if (variant.getType().equals(VariantType.NO_VARIATION)) { //Storage-MongoDB is not able to store NON VARIANTS writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } else if (variant.getType().equals(VariantType.SYMBOLIC)) { logger.warn("Skip symbolic variant " + variant.toString()); writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } String id = variantConverter.buildStorageId(variant); for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } int studyId = studyConfiguration.getStudyId(); Document study = variantSourceEntryConverter.convertToStorageType(studyEntry); Document genotypes = study.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded genotypes.putAll(missingSamples); //Add missing samples // for (String extraField : extraFields) { // List<Object> otherFieldValues = (List<Object>) study.get(extraField.toLowerCase()); // otherFieldValues.addAll(0, missingOtherValues); // } } Document push = new Document(DocumentToVariantConverter.STUDIES_FIELD, study); Document update = new Document().append("$push", push).append("$setOnInsert", variantConverter.convertToStorageType(variant)); if (variant.getIds() != null && !variant.getIds().isEmpty() && !variant.getIds().iterator().next().isEmpty()) { update.put("$addToSet", new Document(DocumentToVariantConverter.IDS_FIELD, new Document("$each", variant.getIds()))); } // { _id: <variant_id>, "studies.sid": {$ne: <studyId> } } //If the variant exists and contains the study, this find will fail, will try to do the upsert, and throw a // duplicated key exception. queries.add(new Document("_id", id).append( DocumentToVariantConverter.STUDIES_FIELD + "." + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, new Document("$ne", studyId))); updates.add(update); } } // if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, true); options.put(MULTI, false); int newDocuments; int updatedObjects; try { BulkWriteResult bulkWriteResult; bulkWriteResult = variantsCollection.update(queries, updates, options).first(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); } catch (MongoBulkWriteException e) { BulkWriteResult bulkWriteResult; bulkWriteResult = e.getWriteResult(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); for (BulkWriteError writeError : e.getWriteErrors()) { if (writeError.getCode() == 11000) { //Dup Key error code Matcher matcher = writeResultErrorPattern.matcher(writeError.getMessage()); if (matcher.find()) { String id = matcher.group(1); nonInsertedVariants.add(id); } else { throw e; } } else { throw e; } } } writeResult.setNewVariants(newDocuments); writeResult.setUpdatedVariants(updatedObjects); // writeResult.setNewDocuments(data.size() - nonInsertedVariants.size() - writeResult.getSkippedVariants()); queries.clear(); updates.clear(); } writeResult.setNewVariantsNanoTime(System.nanoTime() - nanoTime); nanoTime = System.nanoTime(); for (Variant variant : data) { variant.setAnnotation(null); String id = variantConverter.buildStorageId(variant); if (nonInsertedVariants != null && !nonInsertedVariants.contains(id)) { continue; //Already inserted variant } for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } Document studyObject = variantSourceEntryConverter.convertToStorageType(studyEntry); Document genotypes = studyObject.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); Document push = new Document(); if (!excludeGenotypes) { if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded for (String genotype : genotypes.keySet()) { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD + "." + genotype, new Document("$each", genotypes.get(genotype))); } // for (String extraField : extraFields) { // List values = (List) studyObject.get(extraField.toLowerCase()); // push.put(DBObjectToVariantConverter.STUDIES_FIELD + ".$." + extraField.toLowerCase(), // new Document("$each", values).append("$position", loadedSampleIds.size())); // } } else { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Collections.emptyMap()); } } push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.FILES_FIELD, ((List) studyObject.get(DocumentToStudyVariantEntryConverter.FILES_FIELD)).get(0)); Document update = new Document(new Document("$push", push)); queries.add(new Document("_id", id) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, studyConfiguration.getStudyId()) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILEID_FIELD, new Document("$ne", fileId))); updates.add(update); } } writeResult.setExistingVariantsNanoTime(System.nanoTime() - nanoTime); if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, false); options.put(MULTI, false); QueryResult<BulkWriteResult> update = variantsCollection.update(queries, updates, options); // Can happen that nonInsertedVariantsNum != queries.size() != nonInsertedVariants.size() if there was // a duplicated variant. writeResult.setNonInsertedVariants(nonInsertedVariants.size() - update.first().getMatchedCount()); writeResult.setUpdatedVariants(writeResult.getUpdatedVariants() + update.first().getModifiedCount()); } return new QueryResult<>("insertVariants", ((int) (System.currentTimeMillis() - startTime)), 1, 1, "", "", Collections.singletonList(writeResult)); }
From source file:org.opencb.opencga.storage.mongodb.variant.adaptors.VariantMongoDBAdaptor.java
/** * Two steps insertion://w w w . ja v a 2 s.c om * First check that the variant and study exists making an update. * For those who doesn't exist, pushes a study with the file and genotype information * <p> * The documents that throw a "dup key" exception are those variants that exist and have the study. * Then, only for those variants, make a second update. * <p> * *An interesting idea would be to invert this actions depending on the number of already inserted variants. * * @param data Variants to insert * @param fileId File ID * @param variantConverter Variant converter to be used * @param variantSourceEntryConverter Variant source converter to be used * @param studyConfiguration Configuration for the study * @param loadedSampleIds Other loaded sampleIds EXCEPT those that are going to be loaded * @return QueryResult object */ QueryResult<MongoDBVariantWriteResult> insert(List<Variant> data, int fileId, DocumentToVariantConverter variantConverter, DocumentToStudyVariantEntryConverter variantSourceEntryConverter, StudyConfiguration studyConfiguration, List<Integer> loadedSampleIds) { MongoDBVariantWriteResult writeResult = new MongoDBVariantWriteResult(); long startTime = System.currentTimeMillis(); if (data.isEmpty()) { return new QueryResult<>("insertVariants", 0, 1, 1, "", "", Collections.singletonList(writeResult)); } List<Bson> queries = new ArrayList<>(data.size()); List<Bson> updates = new ArrayList<>(data.size()); // Use a multiset instead of a normal set, to keep tracking of duplicated variants Multiset<String> nonInsertedVariants = HashMultiset.create(); String fileIdStr = Integer.toString(fileId); // List<String> extraFields = studyConfiguration.getAttributes().getAsStringList(VariantStorageEngine.Options.EXTRA_GENOTYPE_FIELDS // .key()); boolean excludeGenotypes = studyConfiguration.getAttributes().getBoolean( VariantStorageEngine.Options.EXCLUDE_GENOTYPES.key(), VariantStorageEngine.Options.EXCLUDE_GENOTYPES.defaultValue()); long nanoTime = System.nanoTime(); Map missingSamples = Collections.emptyMap(); String defaultGenotype = studyConfiguration.getAttributes().getString(DEFAULT_GENOTYPE.key(), ""); if (defaultGenotype.equals(DocumentToSamplesConverter.UNKNOWN_GENOTYPE)) { logger.debug("Do not need fill gaps. DefaultGenotype is UNKNOWN_GENOTYPE({}).", DocumentToSamplesConverter.UNKNOWN_GENOTYPE); } else if (excludeGenotypes) { logger.debug("Do not need fill gaps. Excluding genotypes."); } else if (!loadedSampleIds.isEmpty()) { missingSamples = new Document(DocumentToSamplesConverter.UNKNOWN_GENOTYPE, loadedSampleIds); // ?/? } // List<Object> missingOtherValues = new ArrayList<>(loadedSampleIds.size()); // for (int i = 0; i < loadedSampleIds.size(); i++) { // missingOtherValues.add(DBObjectToSamplesConverter.UNKNOWN_FIELD); // } for (Variant variant : data) { if (variant.getType().equals(VariantType.NO_VARIATION)) { //Storage-MongoDB is not able to store NON VARIANTS writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } else if (variant.getType().equals(VariantType.SYMBOLIC)) { logger.warn("Skip symbolic variant " + variant.toString()); writeResult.setSkippedVariants(writeResult.getSkippedVariants() + 1); continue; } String id = variantConverter.buildStorageId(variant); for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } int studyId = studyConfiguration.getStudyId(); Document study = variantSourceEntryConverter.convertToStorageType(variant, studyEntry); Document genotypes = study.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded genotypes.putAll(missingSamples); //Add missing samples // for (String extraField : extraFields) { // List<Object> otherFieldValues = (List<Object>) study.get(extraField.toLowerCase()); // otherFieldValues.addAll(0, missingOtherValues); // } } Document push = new Document(DocumentToVariantConverter.STUDIES_FIELD, study); Document update = new Document().append("$push", push).append("$setOnInsert", variantConverter.convertToStorageType(variant)); if (variant.getIds() != null && !variant.getIds().isEmpty() && !variant.getIds().iterator().next().isEmpty()) { update.put("$addToSet", new Document(DocumentToVariantConverter.IDS_FIELD, new Document("$each", variant.getIds()))); } // { _id: <variant_id>, "studies.sid": {$ne: <studyId> } } //If the variant exists and contains the study, this find will fail, will try to do the upsert, and throw a // duplicated key exception. queries.add(new Document("_id", id).append( DocumentToVariantConverter.STUDIES_FIELD + "." + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, new Document("$ne", studyId))); updates.add(update); } } // if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, true); options.put(MULTI, false); int newDocuments; int updatedObjects; try { BulkWriteResult bulkWriteResult; bulkWriteResult = variantsCollection.update(queries, updates, options).first(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); } catch (MongoBulkWriteException e) { BulkWriteResult bulkWriteResult; bulkWriteResult = e.getWriteResult(); newDocuments = bulkWriteResult.getUpserts().size(); updatedObjects = bulkWriteResult.getModifiedCount(); for (BulkWriteError writeError : e.getWriteErrors()) { if (writeError.getCode() == 11000) { //Dup Key error code Matcher matcher = writeResultErrorPattern.matcher(writeError.getMessage()); if (matcher.find()) { String id = matcher.group(1); nonInsertedVariants.add(id); } else { throw e; } } else { throw e; } } } writeResult.setNewVariants(newDocuments); writeResult.setUpdatedVariants(updatedObjects); // writeResult.setNewDocuments(data.size() - nonInsertedVariants.size() - writeResult.getSkippedVariants()); queries.clear(); updates.clear(); } writeResult.setNewVariantsNanoTime(System.nanoTime() - nanoTime); nanoTime = System.nanoTime(); for (Variant variant : data) { variant.setAnnotation(null); String id = variantConverter.buildStorageId(variant); if (nonInsertedVariants != null && !nonInsertedVariants.contains(id)) { continue; //Already inserted variant } for (StudyEntry studyEntry : variant.getStudies()) { if (studyEntry.getFiles().size() == 0 || !studyEntry.getFiles().get(0).getFileId().equals(fileIdStr)) { continue; } Document studyObject = variantSourceEntryConverter.convertToStorageType(variant, studyEntry); Document genotypes = studyObject.get(DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Document.class); Document push = new Document(); if (!excludeGenotypes) { if (genotypes != null) { //If genotypes is null, genotypes are not suppose to be loaded for (String genotype : genotypes.keySet()) { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD + "." + genotype, new Document("$each", genotypes.get(genotype))); } // for (String extraField : extraFields) { // List values = (List) studyObject.get(extraField.toLowerCase()); // push.put(DBObjectToVariantConverter.STUDIES_FIELD + ".$." + extraField.toLowerCase(), // new Document("$each", values).append("$position", loadedSampleIds.size())); // } } else { push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.GENOTYPES_FIELD, Collections.emptyMap()); } } push.put( DocumentToVariantConverter.STUDIES_FIELD + ".$." + DocumentToStudyVariantEntryConverter.FILES_FIELD, ((List) studyObject.get(DocumentToStudyVariantEntryConverter.FILES_FIELD)).get(0)); Document update = new Document(new Document("$push", push)); queries.add(new Document("_id", id) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.STUDYID_FIELD, studyConfiguration.getStudyId()) .append(DocumentToVariantConverter.STUDIES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILES_FIELD + '.' + DocumentToStudyVariantEntryConverter.FILEID_FIELD, new Document("$ne", fileId))); updates.add(update); } } writeResult.setExistingVariantsNanoTime(System.nanoTime() - nanoTime); if (!queries.isEmpty()) { QueryOptions options = new QueryOptions(UPSERT, false); options.put(MULTI, false); QueryResult<BulkWriteResult> update = variantsCollection.update(queries, updates, options); // Can happen that nonInsertedVariantsNum != queries.size() != nonInsertedVariants.size() if there was // a duplicated variant. writeResult.setNonInsertedVariants(nonInsertedVariants.size() - update.first().getMatchedCount()); writeResult.setUpdatedVariants(writeResult.getUpdatedVariants() + update.first().getModifiedCount()); } return new QueryResult<>("insertVariants", ((int) (System.currentTimeMillis() - startTime)), 1, 1, "", "", Collections.singletonList(writeResult)); }