List of usage examples for com.google.common.collect Multiset add
@Override
boolean add(E element);
From source file:buildcraft.transport.Pipe.java
private void resolveActions() { if (!hasGate()) return;// w w w .j av a 2 s . c o m boolean oldBroadcastRedstone = broadcastRedstone; boolean[] oldBroadcastSignal = broadcastSignal; broadcastRedstone = false; broadcastSignal = new boolean[] { false, false, false, false }; // Tell the gate to prepare for resolving actions. (Disable pulser) gate.startResolution(); HashMap<Integer, Boolean> actions = new HashMap<Integer, Boolean>(); Multiset<Integer> actionCount = HashMultiset.create(); // Computes the actions depending on the triggers for (int it = 0; it < 8; ++it) { ITrigger trigger = activatedTriggers[it]; IAction action = activatedActions[it]; ITriggerParameter parameter = triggerParameters[it]; if (trigger != null && action != null) { actionCount.add(action.getId()); if (!actions.containsKey(action.getId())) { actions.put(action.getId(), isNearbyTriggerActive(trigger, parameter)); } else if (gate.getConditional() == GateConditional.AND) { actions.put(action.getId(), actions.get(action.getId()) && isNearbyTriggerActive(trigger, parameter)); } else { actions.put(action.getId(), actions.get(action.getId()) || isNearbyTriggerActive(trigger, parameter)); } } } // Activate the actions for (Integer i : actions.keySet()) if (actions.get(i)) { // Custom gate actions take precedence over defaults. if (gate.resolveAction(ActionManager.actions[i], actionCount.count(i))) { continue; } if (ActionManager.actions[i] instanceof ActionRedstoneOutput) { broadcastRedstone = true; } else if (ActionManager.actions[i] instanceof ActionSignalOutput) { broadcastSignal[((ActionSignalOutput) ActionManager.actions[i]).color.ordinal()] = true; } else { for (int a = 0; a < container.tileBuffer.length; ++a) if (container.tileBuffer[a].getTile() instanceof IActionReceptor) { IActionReceptor recept = (IActionReceptor) container.tileBuffer[a].getTile(); recept.actionActivated(ActionManager.actions[i]); } } } actionsActivated(actions); if (oldBroadcastRedstone != broadcastRedstone) { container.scheduleRenderUpdate(); updateNeighbors(true); } for (int i = 0; i < oldBroadcastSignal.length; ++i) if (oldBroadcastSignal[i] != broadcastSignal[i]) { // worldObj.markBlockNeedsUpdate(xCoord, yCoord, zCoord); container.scheduleRenderUpdate(); updateSignalState(); break; } }
From source file:edu.uci.ics.sourcerer.tools.java.metrics.db.NumberOfClassChildrenCalculator.java
@Override public void calculate(QueryExecutor exec, Integer projectID, ProjectMetricModel metrics, TypeModel model) { TaskProgressLogger task = TaskProgressLogger.get(); Pattern anon = Pattern.compile(".*\\$\\d+$"); task.start("Computing NumberOfClassChildren"); Multiset<ModeledDeclaredType> parents = HashMultiset.create(); Multiset<ModeledDeclaredType> directParents = HashMultiset.create(); task.start("Processing classes", "classes processed"); for (ModeledEntity entity : model.getEntities()) { if (projectID.equals(entity.getProjectID()) && entity.getType() == Entity.CLASS && !anon.matcher(entity.getFqn()).matches()) { ModeledDeclaredType dec = (ModeledDeclaredType) entity; ModeledEntity sup = dec.getSuperclass(); boolean first = true; while (sup != null) { if (sup.getType() == Entity.PARAMETERIZED_TYPE) { sup = ((ModeledParametrizedType) sup).getBaseType(); } else if (projectID.equals(sup.getProjectID())) { if (sup instanceof ModeledDeclaredType) { ModeledDeclaredType ssup = (ModeledDeclaredType) sup; if (first) { directParents.add(ssup); first = false; }/*from w w w.ja va 2s . c om*/ parents.add(ssup); sup = ssup.getSuperclass(); } else { logger.severe("Declared type expected: " + sup); sup = null; } } else { sup = null; } } task.progress(); } } task.finish(); Averager<Double> avgNoc = Averager.create(); Averager<Double> avgDnoc = Averager.create(); for (ModeledEntity entity : model.getEntities()) { if (projectID.equals(entity.getProjectID()) && entity.getType() == Entity.CLASS && !anon.matcher(entity.getFqn()).matches()) { ModeledDeclaredType dec = (ModeledDeclaredType) entity; Double value = metrics.getEntityValue(dec.getEntityID(), Metric.NUMBER_OF_CLASS_CHILDREN); if (value == null) { value = (double) parents.count(dec); metrics.setEntityValue(dec.getEntityID(), dec.getFileID(), Metric.NUMBER_OF_CLASS_CHILDREN, value); exec.insert(EntityMetricsTable.createInsert(projectID, dec.getFileID(), dec.getEntityID(), Metric.NUMBER_OF_CLASS_CHILDREN, value)); } avgNoc.addValue(value); value = metrics.getEntityValue(dec.getEntityID(), Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN); if (value == null) { value = (double) directParents.count(dec); metrics.setEntityValue(dec.getEntityID(), dec.getFileID(), Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, value); exec.insert(EntityMetricsTable.createInsert(projectID, dec.getFileID(), dec.getEntityID(), Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, value)); } avgDnoc.addValue(value); } } if (metrics.missingValue(Metric.NUMBER_OF_CLASS_CHILDREN)) { metrics.setValue(Metric.NUMBER_OF_CLASS_CHILDREN, avgNoc); exec.insert(ProjectMetricsTable.createInsert(projectID, Metric.NUMBER_OF_CLASS_CHILDREN, avgNoc)); } if (metrics.missingValue(Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN)) { metrics.setValue(Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, avgDnoc); exec.insert( ProjectMetricsTable.createInsert(projectID, Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, avgDnoc)); } task.finish(); }
From source file:com.b2international.snowowl.snomed.datastore.request.SnomedConceptCreateRequest.java
private void convertDescriptions(TransactionContext context, final String conceptId) { final Set<String> requiredDescriptionTypes = newHashSet(Concepts.FULLY_SPECIFIED_NAME, Concepts.REFSET_DESCRIPTION_ACCEPTABILITY_PREFERRED); final Multiset<String> preferredLanguageRefSetIds = HashMultiset.create(); final Set<String> synonymAndDescendantIds = context.service(Synonyms.class).get(); for (final SnomedDescriptionCreateRequest descriptionRequest : descriptions) { descriptionRequest.setConceptId(conceptId); if (null == descriptionRequest.getModuleId()) { descriptionRequest.setModuleId(getModuleId()); }//from w w w . jav a2s. c om descriptionRequest.execute(context); final String typeId = descriptionRequest.getTypeId(); if (synonymAndDescendantIds.contains(typeId)) { for (final Entry<String, Acceptability> acceptability : descriptionRequest.getAcceptability() .entrySet()) { if (Acceptability.PREFERRED.equals(acceptability.getValue())) { preferredLanguageRefSetIds.add(acceptability.getKey()); requiredDescriptionTypes.remove(Concepts.REFSET_DESCRIPTION_ACCEPTABILITY_PREFERRED); } } } requiredDescriptionTypes.remove(typeId); } if (!requiredDescriptionTypes.isEmpty()) { throw new BadRequestException( "At least one fully specified name and one preferred term must be supplied with the concept."); } for (final com.google.common.collect.Multiset.Entry<String> languageRefSetIdOccurence : preferredLanguageRefSetIds .entrySet()) { if (languageRefSetIdOccurence.getCount() > 1) { throw new BadRequestException( "More than one preferred term has been added for language reference set %s.", languageRefSetIdOccurence.getElement()); } } }
From source file:org.bridgedb.tools.qc.PatternChecker.java
public void run(File f) throws SQLException, IDMapperException { String database = "" + f; //TODO: we can use the new Iterator interface here... DBConnector con = new DataDerby(); Connection sqlcon = null;//from w ww .java2 s .c om sqlcon = con.createConnection(database, 0); Multimap<DataSource, String> missExamples = HashMultimap.create(); Multiset<DataSource> misses = HashMultiset.create(); Multiset<DataSource> totals = HashMultiset.create(); Map<DataSource, Pattern> patterns = DataSourcePatterns.getPatterns(); // String url = "jdbc:derby:jar:(" + f + ")database"; // IDMapperRdb gdb = SimpleGdbFactory.createInstance("" + f, url); Statement st = sqlcon.createStatement(); ResultSet rs = st.executeQuery("select id, code from datanode"); while (rs.next()) { String id = rs.getString(1); String syscode = rs.getString(2); if (DataSource.systemCodeExists(syscode)) { DataSource ds = DataSource.getExistingBySystemCode(syscode); if (patterns.get(ds) == null) continue; // skip if there is no pattern defined. Set<DataSource> matches = DataSourcePatterns.getDataSourceMatches(id); if (!matches.contains(ds)) { if (missExamples.get(ds).size() < 10) missExamples.put(ds, id); misses.add(ds); } totals.add(ds); } } // String code = rs.getString (2); //System.out.println (id + "\t" + code); for (DataSource ds : totals.elementSet()) { int miss = misses.count(ds); int total = totals.count(ds); if (miss > 0) { String severity = miss < (total / 25) ? "WARNING" : "ERROR"; System.out.println(severity + ": " + miss + "/" + total + " (" + miss * 100 / total + "%) ids do not match expected pattern for " + ds); System.out.println(severity + ": expected pattern is '" + patterns.get(ds) + "'"); boolean first = true; for (String id : missExamples.get(ds)) { System.out.print(first ? severity + ": aberrant ids are e.g. " : ", "); first = false; System.out.print("'" + id + "'"); } System.out.println(); } } allMisses.addAll(misses); allTotals.addAll(totals); }
From source file:webreduce.indexing.luceneSearcher.java
public List<Dataset> search() throws IOException { List<Dataset> resultList; resultList = new ArrayList<>(); BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder entityQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder attributeQueryBuilder = new BooleanQuery.Builder(); //gives me queries QueryParser qpa = new QueryParser(ATTRIBUTES_FIELD, new CustomAnalyzer()); QueryParser qpe = new QueryParser(ENTITIES_FIELD, new CustomAnalyzer()); //QueryWrapperFilter queryFilter = new QueryWrapperFilter(query); //CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter); //CachingWrapperQuery typeFilterR = new CachingWrapperFilter(new TermsFilter(new Term(TABLE_TYPE_FIELD, "RELATION"))); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); QueryBuilder queryBuilder = new QueryBuilder(new CustomAnalyzer()); System.out.println("Attributes: \n" + Arrays.deepToString(attributes)); System.out.println("Entities: \n" + Arrays.deepToString(entities)); //add attributes one by one for (String a : attributes) { Query qa;/*from w w w.j a v a 2s . co m*/ try { qa = qpa.parse("\"" + a + "\""); attributeQueryBuilder.add(qa, BooleanClause.Occur.SHOULD); } catch (ParseException ex) { } } //end of for loop //remove null HashSet<String> entitySet; entitySet = new HashSet<>(Arrays.asList(entities)); entitySet.remove(null); entities = entitySet.toArray(new String[entitySet.size()]); System.out.println("Entities after null removal \n" + Arrays.deepToString(entities)); Multiset<Integer> docNoCount; docNoCount = HashMultiset.create(); //Take only top 50 entities; String[] entities50 = new String[50]; System.arraycopy(entities, 0, entities50, 0, 50); System.out.println(Arrays.deepToString(entities50)); for (String e : entities50) { System.out.println(e); if (e == null) { continue; } Query qe; try { qe = qpe.parse(QueryParserBase.escape(e)); //Query qeph = qpe.parse("\"" + QueryParserBase.escape(e) + "\""); finalQueryBuilder.add(qe, BooleanClause.Occur.MUST); //add entities boolean query finalQueryBuilder.add(attributeQueryBuilder.build(), BooleanClause.Occur.MUST); //add attributes query TopDocs td = searcher.search(finalQueryBuilder.build(), numberOfResults * 10); for (ScoreDoc sd : td.scoreDocs) { int docNo = sd.doc; docNoCount.add(docNo); } } catch (ParseException ex) { } System.out.println("Top Doc id: \n" + Multisets.copyHighestCountFirst(docNoCount).entrySet().iterator().next().getElement()); } //Sort the returned docs by their frequency and store it in docNoSorted ImmutableMultiset<Integer> docNoSorted = Multisets.copyHighestCountFirst(docNoCount); //Get the entry set of the frequency ordered document set ImmutableSet<Multiset.Entry<Integer>> entrySet = Multisets.copyHighestCountFirst(docNoCount).entrySet(); //Get the iterator for the sorted entry set UnmodifiableIterator<Multiset.Entry<Integer>> iterator = entrySet.iterator(); int bestDocId = iterator.next().getElement(); System.out.println("first count" + iterator.next()); // Set<Integer> elementSet = docNoSorted.elementSet(); Integer next = elementSet.iterator().next(); System.out.println("Most frequent document id: " + next); int resultSetSize; resultSetSize = docNoSorted.elementSet().size(); System.out.println("Entry Set Size: " + resultSetSize + " Cardinality: " + docNoSorted.size()); Set<Integer> elementSet1 = Multisets.copyHighestCountFirst(docNoSorted).elementSet(); List<Integer> t = new ArrayList<Integer>(elementSet1); List<Integer> subList = t.subList(0, numberOfResults); //ArrayList subArrayList = new ArrayList(subList); Iterator<Integer> subListIterator = subList.iterator(); //we have all the web table doc IDs //We snould take while (subListIterator.hasNext()) { int docID = subListIterator.next(); Document doc; doc = searcher.doc(docID); String jsonString = doc.get("full_result"); Dataset er = Dataset.fromJson(jsonString); resultList.add(er); } return resultList; }
From source file:com.cisco.oss.foundation.logging.structured.AbstractFoundationLoggingMarker.java
protected void populateUserFieldMap() { Class<? extends AbstractFoundationLoggingMarker> clazz = this.getClass(); Multiset<Field> fieldList = markerClassFields.get(clazz); if (fieldList == null) { fieldList = ConcurrentHashMultiset.create(); Class<?> cls = clazz; while (AbstractFoundationLoggingMarker.class.isAssignableFrom(cls)) { Field[] declaredFields = cls.getDeclaredFields(); for (Field field : declaredFields) { if (field.isAnnotationPresent(UserField.class)) { field.setAccessible(true); fieldList.add(field); }//from w w w .j a v a 2s . co m } markerClassFields.put(clazz, fieldList); cls = cls.getSuperclass(); } } for (Field field : fieldList) { try { Object value = field.get(this); UserField userField = field.getAnnotation(UserField.class); if (value == null && userField.suppressNull()) { value = FoundationLoggingMarker.NO_OPERATION; } userFields.put(field.getName(), value == null ? "null" : value); } catch (IllegalAccessException e) { throw new IllegalArgumentException(e); } } }
From source file:com.continuuity.weave.internal.appmaster.RunningContainers.java
/** * Handle completion of container.//from w w w .j a v a 2 s . c o m * @param status The completion status. * @param restartRunnables Set of runnable names that requires restart. */ void handleCompleted(YarnContainerStatus status, Multiset<String> restartRunnables) { containerLock.lock(); String containerId = status.getContainerId(); int exitStatus = status.getExitStatus(); ContainerState state = status.getState(); try { Map<String, WeaveContainerController> lookup = containers.column(containerId); if (lookup.isEmpty()) { // It's OK because if a container is stopped through removeLast, this would be empty. return; } if (lookup.size() != 1) { LOG.warn("More than one controller found for container {}", containerId); } if (exitStatus != 0) { LOG.warn("Container {} exited abnormally with state {}, exit code {}. Re-request the container.", containerId, state, exitStatus); restartRunnables.add(lookup.keySet().iterator().next()); } else { LOG.info("Container {} exited normally with state {}", containerId, state); } for (Map.Entry<String, WeaveContainerController> completedEntry : lookup.entrySet()) { String runnableName = completedEntry.getKey(); WeaveContainerController controller = completedEntry.getValue(); controller.completed(exitStatus); removeInstanceId(runnableName, getInstanceId(controller.getRunId())); resourceReport.removeRunnableResources(runnableName, containerId); } lookup.clear(); containerChange.signalAll(); } finally { containerLock.unlock(); } }
From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java
@Override public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) { entityScoreMap = new HashMap<String, EntityScores>(); selectedEntities = new HashSet<String>(); Multiset<String> entityFrequencyMultiset = HashMultiset.create(); EntityMatchList entities = sml.getEntities(); String inputText = localParams.getParams().get("text"); String algorithm = Property.getInstance().get("algorithm"); String ambigious = Property.getInstance().get("algorithm.ambigious"); List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true); List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText); Multiset<String> inputTokensMultiset = HashMultiset.create(); for (Token token : inputTokens) { inputTokensMultiset.add(token.getMorphText()); }// w w w. j a v a 2 s .c o m Multiset<String> domainMultiset = HashMultiset.create(); Multiset<String> typeMultiset = HashMultiset.create(); HashMap<String, Double> entitySimMap = new HashMap<String, Double>(); // if (printCandidateEntities) { // printEntities(entities); // } HashSet<String> words = new HashSet<String>(); Multiset<String> leskWords = HashMultiset.create(); // first pass for finding number of types and domains for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); if (!entityFrequencyMultiset.contains(id)) { entityFrequencyMultiset.add(id); Entity entity = em.getEntity(); words.add(entity.getShingle().getText()); String type = entity.getPage().getType(); if (type != null && type.length() > 0) { typeMultiset.add(type); } String domain = entity.getPage().getDomain(); if (domain != null && domain.length() > 0) { domainMultiset.add(domain); } String desc = entity.getPage().getDescription(); List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true); for (Token token : tokens) { leskWords.add(token.getMorphText()); } } else { entityFrequencyMultiset.add(id); } } int maxDomainCount = 0; for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) { maxDomainCount = domainMultiset.count(domain); break; } int maxTypeCount = 0; for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) { maxTypeCount = typeMultiset.count(type); break; } double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0, maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0, maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0; // second pass compute similarities between entities in a window int currentSpotIndex = -1; SpotMatch currentSpot = null; for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); SpotMatch spot = em.getSpot(); if (currentSpot == null || spot != currentSpot) { currentSpotIndex++; currentSpot = spot; } String id = em.getId(); Entity entity = entities.get(i).getEntity(); EntityPage page = entities.get(i).getEntity().getPage(); String domain = page.getDomain(); String type = page.getType(); Shingle shingle = entity.getShingle(); /* windowing algorithms stars */ int left = currentSpotIndex - window; int right = currentSpotIndex + window; if (left < 0) { right -= left; left = 0; } if (right > sml.size()) { left += (sml.size()) - right; right = sml.size(); if (left < 0) { left = 0; } } double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0, wordvecLinksScore = 0; for (int j = left; j < right; j++) { SpotMatch sm2 = sml.get(j); EntityMatchList entities2 = sm2.getEntities(); for (EntityMatch em2 : entities2) { String id2 = em2.getId(); EntityPage page2 = em2.getEntity().getPage(); int counter = 0; if (!ambigious.equals("true")) { for (EntityMatch entityMatch : entities2) { if (entityMatch.getId().startsWith("w")) { counter++; } } } if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot() && !id.equals(id2)) { // Link Similarity calculation starts double linkSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("link" + id + id2)) { linkSim = entitySimMap.get("link" + id + id2); } else { HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" ")); HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" ")); linkSim = JaccardCalculator.calculateSimilarity(set1, set2); entitySimMap.put("link" + id + id2, linkSim); } linkScore += linkSim; // Link Similarity calculation ends } // Entity embedding similarity calculation starts double eeSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("ee" + id + id2)) { eeSim = entitySimMap.get("ee" + id + id2); } else { eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2); entitySimMap.put("ee" + id + id2, eeSim); } hashInfoboxScore += eeSim; } double w2veclinksSim = 0; if (id.startsWith("w") && id2.startsWith("w")) { if (entitySimMap.containsKey("wl" + id + id2)) { w2veclinksSim = entitySimMap.get("wl" + id + id2); } else { w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(), page2.getWord2vec()); entitySimMap.put("wl" + id + id2, w2veclinksSim); } wordvecLinksScore += w2veclinksSim; } // Entity embedding similarity calculation ends // Description word2vec similarity calculation // starts double word2vecSim = 0; if (entitySimMap.containsKey("w2v" + id + id2)) { word2vecSim = entitySimMap.get("w2v" + id + id2); } else { word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(), page.getDword2vec()); entitySimMap.put("w2v" + id + id2, word2vecSim); } wordvecDescriptionLocalScore += word2vecSim; // Description word2vec similarity calculation ends // Description autoencoder similarity calculation // starts double autoVecSim = 0; if (entitySimMap.containsKey("a2v" + id + id2)) { autoVecSim = entitySimMap.get("a2v" + id + id2); } else { autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(), page.getDautoencoder()); entitySimMap.put("a2v" + id + id2, autoVecSim); } hashDescriptionScore += autoVecSim; // Description autoencoder similarity calculation // ends } } } if (linkScore > maxLinkScore) { maxLinkScore = linkScore; } if (hashInfoboxScore > maxHashInfoboxScore) { maxHashInfoboxScore = hashInfoboxScore; } if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) { maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore; } if (hashDescriptionScore > maxHashDescriptionScore) { maxHashDescriptionScore = hashDescriptionScore; } if (wordvecLinksScore > maxWordvecLinksScore) { maxWordvecLinksScore = wordvecLinksScore; } /* windowing algorithms ends */ double domainScore = 0; if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) { domainScore = (double) domainMultiset.count(domain) / maxDomainCount; } double typeScore = 0; if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) { typeScore = (double) typeMultiset.count(type) / maxTypeCount; } if (typeBlackList.contains(type)) { typeScore /= 10; } double typeContentScore = 0; if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) { typeContentScore = 1; } double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(), entity.getShingle().getSentence()); double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector, page.getDword2vec()); if (wordvecDescriptionScore > maxWordvectorAverage) { maxWordvectorAverage = wordvecDescriptionScore; } double suffixScore = 0; if (type != null && type.length() > 0) { Set<String> suffixes = new HashSet<String>(); String t = entity.getTitle().toLowerCase(new Locale("tr", "TR")); for (int x = 0; x < entities.size(); x++) { EntityMatch e2 = entities.get(x); if (e2.getId().equals(entity.getId())) { suffixes.add(e2.getMention()); } } suffixes.remove(t); suffixes.remove(entity.getTitle()); // String inputTextLower = inputText.toLowerCase(new // Locale("tr", // "TR")); // while (inputTextLower.contains(t)) { // int start = inputTextLower.indexOf(t); // int end = inputTextLower.indexOf(" ", start + t.length()); // if (end > start) { // String suffix = inputTextLower.substring(start, end); // // .replaceAll("\\W", ""); // if (suffix.contains("'") // || (Zemberek.getInstance().hasMorph(suffix) // && !suffix.equals(t) && suffix.length() > 4)) { // suffixes.add(suffix); // } // inputTextLower = inputTextLower.substring(end); // } else { // break; // } // } if (suffixes.size() >= minSuffix) { for (String suffix : suffixes) { double sim = gd.calculateSimilarity(suffix, type); suffixScore += sim; } } } // String entitySuffix = page.getSuffix(); // String[] inputSuffix = shingle.getSuffix().split(" "); // for (int j = 0; j < inputSuffix.length; j++) { // if (entitySuffix.contains(inputSuffix[j])) { // suffixScore += 0.25f; // } // } if (suffixScore > maxSuffixScore) { maxSuffixScore = suffixScore; } // if (id.equals("w691538")) { // LOGGER.info(""); // } double letterCaseScore = 0; int lc = page.getLetterCase(); if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) { letterCaseScore = 1; } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) { letterCaseScore = 1; } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) { letterCaseScore = 1; } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) { letterCaseScore = 1; } double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(), Zemberek.removeAfterSpostrophe(em.getMention())); double popularityScore = page.getRank(); if (id.startsWith("w")) { popularityScore = Math.log10(popularityScore + 1); if (popularityScore > maxPopularityScore) { maxPopularityScore = popularityScore; } } double leskScore = 0, simpleLeskScore = 0; String desc = em.getEntity().getPage().getDescription(); if (desc != null) { List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true); for (Token token : tokens) { if (inputTokensMultiset.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) { simpleLeskScore += inputTokensMultiset.count(token.getMorphText()); } if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) { leskScore += leskWords.count(token.getMorphText()); } } leskScore /= Math.log(tokens.size() + 1); simpleLeskScore /= Math.log(tokens.size() + 1); if (leskScore > maxLeskScore) { maxLeskScore = leskScore; } if (simpleLeskScore > maxSimpleLeskScore) { maxSimpleLeskScore = simpleLeskScore; } if (!entityScoreMap.containsKey(id)) { EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore, suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore, hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore, wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore); entityScoreMap.put(id, scores); } else { EntityScores entityScores = entityScoreMap.get(id); entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2); entityScores.setHashDescriptionScore( (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2); entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2); entityScores.setWordvecDescriptionLocalScore( (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2); entityScores .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2); entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2); } } } /* normalization and total score calculation starts */ Set<String> set = new HashSet<String>(); for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); EntityScores entityScores = entityScoreMap.get(id); if (set.contains(id)) { continue; } if (id.startsWith("w")) { if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) { entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore); } if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) { entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore); } if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) { entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore); } if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) { entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore); } } if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) { entityScores.setWordvecDescriptionLocalScore( entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore); } if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) { entityScores .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore); } if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) { entityScores.setWordvecDescriptionScore( entityScores.getWordvecDescriptionScore() / maxWordvectorAverage); } if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) { entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore); } if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) { entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore); } if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) { entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore); } set.add(id); } LOGGER.info("\t" + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription"); for (int i = 0; i < entities.size(); i++) { EntityMatch em = entities.get(i); String id = em.getId(); EntityScores e = entityScoreMap.get(id); double wikiScore = 0; if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) { wikiScore = wikiWeight; } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) { wikiScore = wikiWeight; } // if(id.equals("w508792")){ // LOGGER.info(""); // } double totalScore = wikiScore + e.getPopularityScore() * popularityWeight + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + e.getHashDescriptionScore() * hashDescriptionWeight + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight; if (ranklib == true) { totalScore = RankLib.getInstance().score(e); } if (em.getEntity().getPage().getUrlTitle().contains("(")) { totalScore /= 2; } em.setScore(totalScore); e.setScore(totalScore); LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t" + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t" + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t" + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t" + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t" + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t" + e.getDomainScore() * domainWeight + "\t" + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t" + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t" + e.getHashDescriptionScore() * hashDescriptionWeight + "\t" + e.getHashInfoboxScore() * hashInfoboxWeight + "\t" + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t" + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t" + em.getEntity().getPage().getDescription()); } // if (annotateEntities) { // annotateEntities(localParams.getParams().get("originalText"), sml); // } EntityMatchList eml = new EntityMatchList(); for (SpotMatch match : sml) { EntityMatchList list = match.getEntities(); if (!list.isEmpty()) { list.sort(); eml.add(list.get(0)); selectedEntities.add(list.get(0).getId()); } } return eml; }
From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.LuceneConceptScorer.java
private void search(String queryString, String conf) throws RuntimeException { if (queryString.trim().isEmpty()) return;// ww w .java2s . c o m ScoreDoc[] results; try { Query query = parser.parse(queryString); results = searcher.search(query, hits).scoreDocs; } catch (ParseException | IOException e) { throw new RuntimeException(e); } Multiset<String> sourceCounts = HashMultiset.create(); for (int i = 0; i < results.length; i++) { try { Document doc = reader.document(results[i].doc); String source = sourceFieldName == null ? null : doc.get(sourceFieldName); String uri = uriPrefix.get(source) + (idFieldName == null ? null : doc.get(idFieldName)); String gconf = conf + "/global"; if (!uri2conf2rank.contains(uri, gconf) || uri2conf2rank.get(uri, gconf) > i) { uri2conf2rank.put(uri, gconf, i); } double score = results[i].score; if (!uri2conf2score.contains(uri, gconf) || uri2conf2score.get(uri, gconf) < score) { uri2conf2score.put(uri, gconf, score); } String sconf = conf + source; int sourceRank = sourceCounts.count(source); sourceCounts.add(source); if (!uri2conf2rank.contains(uri, sconf) || uri2conf2rank.get(uri, sconf) > sourceRank) { synchronizedPut(uri2conf2rank, uri, sconf, sourceRank); } if (!uri2conf2score.contains(uri, sconf) || uri2conf2score.get(uri, sconf) < score) { synchronizedPut(uri2conf2score, uri, sconf, score); } } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:edu.cmu.lti.oaqa.baseqa.evidence.concept.ConceptMerger.java
@SuppressWarnings("unchecked") @Override/*from ww w . j ava 2 s .c o m*/ public void process(JCas jcas) throws AnalysisEngineProcessException { // create views and get all concepts in the views List<JCas> views = new ArrayList<>(); if (includeDefaultView) { views.add(jcas); } views.addAll(ViewType.listViews(jcas, viewNamePrefix)); List<Concept> concepts = views.stream().map(TypeUtil::getConcepts).flatMap(Collection::stream) .collect(toList()); // preserve concept fields Set<String> uuids = new HashSet<>(); SetMultimap<String, String> uuid2ids = HashMultimap.create(); SetMultimap<String, String> uuid2names = HashMultimap.create(); SetMultimap<String, String> uuid2uris = HashMultimap.create(); SetMultimap<String, ConceptMention> uuid2mentions = HashMultimap.create(); SetMultimap<String, List<String>> uuid2types = HashMultimap.create(); for (Concept concept : concepts) { String uuid = UUID_PREFIX + UUID.randomUUID().toString(); uuids.add(uuid); uuid2ids.putAll(uuid, TypeUtil.getConceptIds(concept)); uuid2names.putAll(uuid, TypeUtil.getConceptNames(concept)); uuid2uris.putAll(uuid, TypeUtil.getConceptUris(concept)); uuid2mentions.putAll(uuid, TypeUtil.getConceptMentions(concept)); // also remove duplicated concept type entries TypeUtil.getConceptTypes(concept).forEach(type -> uuid2types.put(uuid, toTypeList(type))); } // connectivity detection for merging UndirectedGraph<String, DefaultEdge> graph = new SimpleGraph<>(DefaultEdge.class); uuids.forEach(graph::addVertex); uuid2ids.values().forEach(graph::addVertex); uuid2ids.entries().forEach(entry -> graph.addEdge(entry.getKey(), entry.getValue())); if (useName) { uuid2names.values().stream().map(ConceptMerger::nameKey).forEach(graph::addVertex); uuid2names.entries().forEach(entry -> graph.addEdge(entry.getKey(), nameKey(entry.getValue()))); } views.forEach(view -> view.removeAllIncludingSubtypes(Concept.type)); ConnectivityInspector<String, DefaultEdge> ci = new ConnectivityInspector<>(graph); Multiset<Integer> mergedSizes = HashMultiset.create(); List<Concept> mergedConcepts = ci.connectedSets().stream().map(subgraph -> { Set<String> cuuids = subgraph.stream().filter(str -> str.startsWith(UUID_PREFIX)).collect(toSet()); List<String> ids = cuuids.stream().map(uuid2ids::get).flatMap(Set::stream).filter(Objects::nonNull) .distinct().collect(toList()); List<String> names = cuuids.stream().map(uuid2names::get).flatMap(Set::stream).filter(Objects::nonNull) .distinct().collect(toList()); List<String> uris = cuuids.stream().map(uuid2uris::get).flatMap(Set::stream).filter(Objects::nonNull) .distinct().collect(toList()); List<ConceptType> types = cuuids.stream().map(uuid2types::get).flatMap(Set::stream) .filter(Objects::nonNull).distinct().map(type -> parseTypeList(jcas, type)).collect(toList()); List<ConceptMention> mentions = cuuids.stream().map(uuid2mentions::get).flatMap(Set::stream) .filter(Objects::nonNull).collect(toList()); mergedSizes.add(cuuids.size()); return TypeFactory.createConcept(jcas, names, uris, ImmutableList.copyOf(ids), mentions, types); }).collect(toList()); mergedConcepts.forEach(Concept::addToIndexes); LOG.info("Merged concepts from {} concepts.", mergedSizes); if (LOG.isDebugEnabled()) { mergedConcepts.stream().map(TypeUtil::toString).forEachOrdered(c -> LOG.debug(" - {}", c)); } }