Example usage for com.google.common.collect Multiset add

List of usage examples for com.google.common.collect Multiset add

Introduction

In this page you can find the example usage for com.google.common.collect Multiset add.

Prototype

@Override
boolean add(E element);

Source Link

Document

Adds a single occurrence of the specified element to this multiset.

Usage

From source file:buildcraft.transport.Pipe.java

private void resolveActions() {
    if (!hasGate())
        return;//  w  w  w  .j  av a  2  s . c o m

    boolean oldBroadcastRedstone = broadcastRedstone;
    boolean[] oldBroadcastSignal = broadcastSignal;

    broadcastRedstone = false;
    broadcastSignal = new boolean[] { false, false, false, false };

    // Tell the gate to prepare for resolving actions. (Disable pulser)
    gate.startResolution();

    HashMap<Integer, Boolean> actions = new HashMap<Integer, Boolean>();
    Multiset<Integer> actionCount = HashMultiset.create();

    // Computes the actions depending on the triggers
    for (int it = 0; it < 8; ++it) {
        ITrigger trigger = activatedTriggers[it];
        IAction action = activatedActions[it];
        ITriggerParameter parameter = triggerParameters[it];

        if (trigger != null && action != null) {
            actionCount.add(action.getId());
            if (!actions.containsKey(action.getId())) {
                actions.put(action.getId(), isNearbyTriggerActive(trigger, parameter));
            } else if (gate.getConditional() == GateConditional.AND) {
                actions.put(action.getId(),
                        actions.get(action.getId()) && isNearbyTriggerActive(trigger, parameter));
            } else {
                actions.put(action.getId(),
                        actions.get(action.getId()) || isNearbyTriggerActive(trigger, parameter));
            }
        }
    }

    // Activate the actions
    for (Integer i : actions.keySet())
        if (actions.get(i)) {

            // Custom gate actions take precedence over defaults.
            if (gate.resolveAction(ActionManager.actions[i], actionCount.count(i))) {
                continue;
            }

            if (ActionManager.actions[i] instanceof ActionRedstoneOutput) {
                broadcastRedstone = true;
            } else if (ActionManager.actions[i] instanceof ActionSignalOutput) {
                broadcastSignal[((ActionSignalOutput) ActionManager.actions[i]).color.ordinal()] = true;
            } else {
                for (int a = 0; a < container.tileBuffer.length; ++a)
                    if (container.tileBuffer[a].getTile() instanceof IActionReceptor) {
                        IActionReceptor recept = (IActionReceptor) container.tileBuffer[a].getTile();
                        recept.actionActivated(ActionManager.actions[i]);
                    }
            }
        }

    actionsActivated(actions);

    if (oldBroadcastRedstone != broadcastRedstone) {
        container.scheduleRenderUpdate();
        updateNeighbors(true);
    }

    for (int i = 0; i < oldBroadcastSignal.length; ++i)
        if (oldBroadcastSignal[i] != broadcastSignal[i]) {
            // worldObj.markBlockNeedsUpdate(xCoord, yCoord, zCoord);
            container.scheduleRenderUpdate();
            updateSignalState();
            break;
        }
}

From source file:edu.uci.ics.sourcerer.tools.java.metrics.db.NumberOfClassChildrenCalculator.java

@Override
public void calculate(QueryExecutor exec, Integer projectID, ProjectMetricModel metrics, TypeModel model) {
    TaskProgressLogger task = TaskProgressLogger.get();

    Pattern anon = Pattern.compile(".*\\$\\d+$");

    task.start("Computing NumberOfClassChildren");
    Multiset<ModeledDeclaredType> parents = HashMultiset.create();
    Multiset<ModeledDeclaredType> directParents = HashMultiset.create();

    task.start("Processing classes", "classes processed");
    for (ModeledEntity entity : model.getEntities()) {
        if (projectID.equals(entity.getProjectID()) && entity.getType() == Entity.CLASS
                && !anon.matcher(entity.getFqn()).matches()) {
            ModeledDeclaredType dec = (ModeledDeclaredType) entity;
            ModeledEntity sup = dec.getSuperclass();
            boolean first = true;
            while (sup != null) {
                if (sup.getType() == Entity.PARAMETERIZED_TYPE) {
                    sup = ((ModeledParametrizedType) sup).getBaseType();
                } else if (projectID.equals(sup.getProjectID())) {
                    if (sup instanceof ModeledDeclaredType) {
                        ModeledDeclaredType ssup = (ModeledDeclaredType) sup;
                        if (first) {
                            directParents.add(ssup);
                            first = false;
                        }/*from   w w  w.ja va  2s  . c om*/
                        parents.add(ssup);
                        sup = ssup.getSuperclass();
                    } else {
                        logger.severe("Declared type expected: " + sup);
                        sup = null;
                    }
                } else {
                    sup = null;
                }
            }
            task.progress();
        }
    }
    task.finish();
    Averager<Double> avgNoc = Averager.create();
    Averager<Double> avgDnoc = Averager.create();
    for (ModeledEntity entity : model.getEntities()) {
        if (projectID.equals(entity.getProjectID()) && entity.getType() == Entity.CLASS
                && !anon.matcher(entity.getFqn()).matches()) {
            ModeledDeclaredType dec = (ModeledDeclaredType) entity;
            Double value = metrics.getEntityValue(dec.getEntityID(), Metric.NUMBER_OF_CLASS_CHILDREN);
            if (value == null) {
                value = (double) parents.count(dec);
                metrics.setEntityValue(dec.getEntityID(), dec.getFileID(), Metric.NUMBER_OF_CLASS_CHILDREN,
                        value);
                exec.insert(EntityMetricsTable.createInsert(projectID, dec.getFileID(), dec.getEntityID(),
                        Metric.NUMBER_OF_CLASS_CHILDREN, value));
            }
            avgNoc.addValue(value);

            value = metrics.getEntityValue(dec.getEntityID(), Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN);
            if (value == null) {
                value = (double) directParents.count(dec);
                metrics.setEntityValue(dec.getEntityID(), dec.getFileID(),
                        Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, value);
                exec.insert(EntityMetricsTable.createInsert(projectID, dec.getFileID(), dec.getEntityID(),
                        Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, value));
            }
            avgDnoc.addValue(value);
        }
    }
    if (metrics.missingValue(Metric.NUMBER_OF_CLASS_CHILDREN)) {
        metrics.setValue(Metric.NUMBER_OF_CLASS_CHILDREN, avgNoc);
        exec.insert(ProjectMetricsTable.createInsert(projectID, Metric.NUMBER_OF_CLASS_CHILDREN, avgNoc));
    }
    if (metrics.missingValue(Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN)) {
        metrics.setValue(Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, avgDnoc);
        exec.insert(
                ProjectMetricsTable.createInsert(projectID, Metric.NUMBER_OF_DIRECT_CLASS_CHILDREN, avgDnoc));
    }
    task.finish();
}

From source file:com.b2international.snowowl.snomed.datastore.request.SnomedConceptCreateRequest.java

private void convertDescriptions(TransactionContext context, final String conceptId) {
    final Set<String> requiredDescriptionTypes = newHashSet(Concepts.FULLY_SPECIFIED_NAME,
            Concepts.REFSET_DESCRIPTION_ACCEPTABILITY_PREFERRED);
    final Multiset<String> preferredLanguageRefSetIds = HashMultiset.create();
    final Set<String> synonymAndDescendantIds = context.service(Synonyms.class).get();

    for (final SnomedDescriptionCreateRequest descriptionRequest : descriptions) {

        descriptionRequest.setConceptId(conceptId);

        if (null == descriptionRequest.getModuleId()) {
            descriptionRequest.setModuleId(getModuleId());
        }//from   w  w w  .  jav  a2s. c  om

        descriptionRequest.execute(context);

        final String typeId = descriptionRequest.getTypeId();

        if (synonymAndDescendantIds.contains(typeId)) {
            for (final Entry<String, Acceptability> acceptability : descriptionRequest.getAcceptability()
                    .entrySet()) {
                if (Acceptability.PREFERRED.equals(acceptability.getValue())) {
                    preferredLanguageRefSetIds.add(acceptability.getKey());
                    requiredDescriptionTypes.remove(Concepts.REFSET_DESCRIPTION_ACCEPTABILITY_PREFERRED);
                }
            }
        }

        requiredDescriptionTypes.remove(typeId);
    }

    if (!requiredDescriptionTypes.isEmpty()) {
        throw new BadRequestException(
                "At least one fully specified name and one preferred term must be supplied with the concept.");
    }

    for (final com.google.common.collect.Multiset.Entry<String> languageRefSetIdOccurence : preferredLanguageRefSetIds
            .entrySet()) {
        if (languageRefSetIdOccurence.getCount() > 1) {
            throw new BadRequestException(
                    "More than one preferred term has been added for language reference set %s.",
                    languageRefSetIdOccurence.getElement());
        }
    }
}

From source file:org.bridgedb.tools.qc.PatternChecker.java

public void run(File f) throws SQLException, IDMapperException {
    String database = "" + f;
    //TODO: we can use the new Iterator interface here...
    DBConnector con = new DataDerby();
    Connection sqlcon = null;//from w ww .java2  s  .c  om
    sqlcon = con.createConnection(database, 0);

    Multimap<DataSource, String> missExamples = HashMultimap.create();
    Multiset<DataSource> misses = HashMultiset.create();
    Multiset<DataSource> totals = HashMultiset.create();
    Map<DataSource, Pattern> patterns = DataSourcePatterns.getPatterns();

    //      String url = "jdbc:derby:jar:(" + f + ")database";
    //      IDMapperRdb gdb = SimpleGdbFactory.createInstance("" + f, url);

    Statement st = sqlcon.createStatement();
    ResultSet rs = st.executeQuery("select id, code from datanode");

    while (rs.next()) {
        String id = rs.getString(1);
        String syscode = rs.getString(2);
        if (DataSource.systemCodeExists(syscode)) {
            DataSource ds = DataSource.getExistingBySystemCode(syscode);
            if (patterns.get(ds) == null)
                continue; // skip if there is no pattern defined.

            Set<DataSource> matches = DataSourcePatterns.getDataSourceMatches(id);
            if (!matches.contains(ds)) {
                if (missExamples.get(ds).size() < 10)
                    missExamples.put(ds, id);
                misses.add(ds);
            }
            totals.add(ds);
        }
    }

    //         String code = rs.getString (2);
    //System.out.println (id + "\t" + code);

    for (DataSource ds : totals.elementSet()) {
        int miss = misses.count(ds);
        int total = totals.count(ds);

        if (miss > 0) {
            String severity = miss < (total / 25) ? "WARNING" : "ERROR";
            System.out.println(severity + ": " + miss + "/" + total + " (" + miss * 100 / total
                    + "%) ids do not match expected pattern for " + ds);
            System.out.println(severity + ": expected pattern is '" + patterns.get(ds) + "'");
            boolean first = true;
            for (String id : missExamples.get(ds)) {
                System.out.print(first ? severity + ": aberrant ids are e.g. " : ", ");
                first = false;
                System.out.print("'" + id + "'");
            }
            System.out.println();
        }
    }

    allMisses.addAll(misses);
    allTotals.addAll(totals);
}

From source file:webreduce.indexing.luceneSearcher.java

public List<Dataset> search() throws IOException {

    List<Dataset> resultList;
    resultList = new ArrayList<>();

    BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder();
    BooleanQuery.Builder entityQueryBuilder = new BooleanQuery.Builder();
    BooleanQuery.Builder attributeQueryBuilder = new BooleanQuery.Builder();

    //gives me queries
    QueryParser qpa = new QueryParser(ATTRIBUTES_FIELD, new CustomAnalyzer());

    QueryParser qpe = new QueryParser(ENTITIES_FIELD, new CustomAnalyzer());

    //QueryWrapperFilter queryFilter = new QueryWrapperFilter(query);
    //CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter);

    //CachingWrapperQuery typeFilterR = new CachingWrapperFilter(new TermsFilter(new Term(TABLE_TYPE_FIELD, "RELATION")));

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));

    IndexSearcher searcher = new IndexSearcher(reader);

    QueryBuilder queryBuilder = new QueryBuilder(new CustomAnalyzer());

    System.out.println("Attributes: \n" + Arrays.deepToString(attributes));
    System.out.println("Entities: \n" + Arrays.deepToString(entities));

    //add attributes one by one
    for (String a : attributes) {

        Query qa;/*from w w w.j  a  v  a  2s  . co  m*/
        try {
            qa = qpa.parse("\"" + a + "\"");
            attributeQueryBuilder.add(qa, BooleanClause.Occur.SHOULD);

        } catch (ParseException ex) {
        }
    } //end of for loop
      //remove null

    HashSet<String> entitySet;
    entitySet = new HashSet<>(Arrays.asList(entities));
    entitySet.remove(null);

    entities = entitySet.toArray(new String[entitySet.size()]);

    System.out.println("Entities after null removal \n" + Arrays.deepToString(entities));

    Multiset<Integer> docNoCount;
    docNoCount = HashMultiset.create();

    //Take only top 50 entities;
    String[] entities50 = new String[50];
    System.arraycopy(entities, 0, entities50, 0, 50);

    System.out.println(Arrays.deepToString(entities50));

    for (String e : entities50) {
        System.out.println(e);
        if (e == null) {
            continue;
        }
        Query qe;
        try {
            qe = qpe.parse(QueryParserBase.escape(e));
            //Query qeph = qpe.parse("\"" + QueryParserBase.escape(e) + "\"");
            finalQueryBuilder.add(qe, BooleanClause.Occur.MUST); //add entities boolean query
            finalQueryBuilder.add(attributeQueryBuilder.build(), BooleanClause.Occur.MUST); //add attributes query

            TopDocs td = searcher.search(finalQueryBuilder.build(), numberOfResults * 10);
            for (ScoreDoc sd : td.scoreDocs) {
                int docNo = sd.doc;
                docNoCount.add(docNo);
            }
        } catch (ParseException ex) {
        }

        System.out.println("Top Doc id: \n"
                + Multisets.copyHighestCountFirst(docNoCount).entrySet().iterator().next().getElement());

    }

    //Sort the returned docs by their frequency and store it in docNoSorted
    ImmutableMultiset<Integer> docNoSorted = Multisets.copyHighestCountFirst(docNoCount);
    //Get the entry set of the frequency ordered document set
    ImmutableSet<Multiset.Entry<Integer>> entrySet = Multisets.copyHighestCountFirst(docNoCount).entrySet();
    //Get the iterator for the sorted entry set
    UnmodifiableIterator<Multiset.Entry<Integer>> iterator = entrySet.iterator();

    int bestDocId = iterator.next().getElement();
    System.out.println("first count" + iterator.next());

    //
    Set<Integer> elementSet = docNoSorted.elementSet();
    Integer next = elementSet.iterator().next();
    System.out.println("Most frequent document id: " + next);
    int resultSetSize;
    resultSetSize = docNoSorted.elementSet().size();

    System.out.println("Entry Set Size: " + resultSetSize + " Cardinality: " + docNoSorted.size());

    Set<Integer> elementSet1 = Multisets.copyHighestCountFirst(docNoSorted).elementSet();

    List<Integer> t = new ArrayList<Integer>(elementSet1);

    List<Integer> subList = t.subList(0, numberOfResults);
    //ArrayList subArrayList = new ArrayList(subList);
    Iterator<Integer> subListIterator = subList.iterator();

    //we have all the web table doc IDs
    //We snould take
    while (subListIterator.hasNext()) {
        int docID = subListIterator.next();
        Document doc;
        doc = searcher.doc(docID);
        String jsonString = doc.get("full_result");
        Dataset er = Dataset.fromJson(jsonString);
        resultList.add(er);
    }
    return resultList;
}

From source file:com.cisco.oss.foundation.logging.structured.AbstractFoundationLoggingMarker.java

protected void populateUserFieldMap() {

    Class<? extends AbstractFoundationLoggingMarker> clazz = this.getClass();
    Multiset<Field> fieldList = markerClassFields.get(clazz);

    if (fieldList == null) {

        fieldList = ConcurrentHashMultiset.create();

        Class<?> cls = clazz;

        while (AbstractFoundationLoggingMarker.class.isAssignableFrom(cls)) {
            Field[] declaredFields = cls.getDeclaredFields();

            for (Field field : declaredFields) {

                if (field.isAnnotationPresent(UserField.class)) {
                    field.setAccessible(true);
                    fieldList.add(field);
                }//from  w  w w .j  a v a  2s  .  co  m
            }
            markerClassFields.put(clazz, fieldList);
            cls = cls.getSuperclass();
        }

    }

    for (Field field : fieldList) {

        try {

            Object value = field.get(this);

            UserField userField = field.getAnnotation(UserField.class);
            if (value == null && userField.suppressNull()) {
                value = FoundationLoggingMarker.NO_OPERATION;
            }
            userFields.put(field.getName(), value == null ? "null" : value);
        } catch (IllegalAccessException e) {
            throw new IllegalArgumentException(e);
        }
    }

}

From source file:com.continuuity.weave.internal.appmaster.RunningContainers.java

/**
 * Handle completion of container.//from w w w .j  a v  a  2 s  .  c  o  m
 * @param status The completion status.
 * @param restartRunnables Set of runnable names that requires restart.
 */
void handleCompleted(YarnContainerStatus status, Multiset<String> restartRunnables) {
    containerLock.lock();
    String containerId = status.getContainerId();
    int exitStatus = status.getExitStatus();
    ContainerState state = status.getState();

    try {
        Map<String, WeaveContainerController> lookup = containers.column(containerId);
        if (lookup.isEmpty()) {
            // It's OK because if a container is stopped through removeLast, this would be empty.
            return;
        }

        if (lookup.size() != 1) {
            LOG.warn("More than one controller found for container {}", containerId);
        }

        if (exitStatus != 0) {
            LOG.warn("Container {} exited abnormally with state {}, exit code {}. Re-request the container.",
                    containerId, state, exitStatus);
            restartRunnables.add(lookup.keySet().iterator().next());
        } else {
            LOG.info("Container {} exited normally with state {}", containerId, state);
        }

        for (Map.Entry<String, WeaveContainerController> completedEntry : lookup.entrySet()) {
            String runnableName = completedEntry.getKey();
            WeaveContainerController controller = completedEntry.getValue();
            controller.completed(exitStatus);

            removeInstanceId(runnableName, getInstanceId(controller.getRunId()));
            resourceReport.removeRunnableResources(runnableName, containerId);
        }

        lookup.clear();
        containerChange.signalAll();
    } finally {
        containerLock.unlock();
    }
}

From source file:it.cnr.isti.hpc.dexter.disambiguation.TurkishEntityDisambiguator.java

@Override
public EntityMatchList disambiguate(DexterLocalParams localParams, SpotMatchList sml) {
    entityScoreMap = new HashMap<String, EntityScores>();
    selectedEntities = new HashSet<String>();
    Multiset<String> entityFrequencyMultiset = HashMultiset.create();

    EntityMatchList entities = sml.getEntities();
    String inputText = localParams.getParams().get("text");
    String algorithm = Property.getInstance().get("algorithm");

    String ambigious = Property.getInstance().get("algorithm.ambigious");

    List<Token> inputTokens = Zemberek.getInstance().disambiguateFindTokens(inputText, false, true);
    List<Double> documentVector = DescriptionEmbeddingAverage.getAverageVectorList(inputText);
    Multiset<String> inputTokensMultiset = HashMultiset.create();
    for (Token token : inputTokens) {
        inputTokensMultiset.add(token.getMorphText());
    }// w  w w. j  a  v a  2 s  .c o  m

    Multiset<String> domainMultiset = HashMultiset.create();
    Multiset<String> typeMultiset = HashMultiset.create();
    HashMap<String, Double> entitySimMap = new HashMap<String, Double>();
    // if (printCandidateEntities) {
    // printEntities(entities);
    // }
    HashSet<String> words = new HashSet<String>();
    Multiset<String> leskWords = HashMultiset.create();

    // first pass for finding number of types and domains
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        if (!entityFrequencyMultiset.contains(id)) {
            entityFrequencyMultiset.add(id);
            Entity entity = em.getEntity();
            words.add(entity.getShingle().getText());
            String type = entity.getPage().getType();
            if (type != null && type.length() > 0) {
                typeMultiset.add(type);
            }
            String domain = entity.getPage().getDomain();
            if (domain != null && domain.length() > 0) {
                domainMultiset.add(domain);
            }

            String desc = entity.getPage().getDescription();
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                leskWords.add(token.getMorphText());
            }

        } else {
            entityFrequencyMultiset.add(id);
        }
    }

    int maxDomainCount = 0;
    for (String domain : Multisets.copyHighestCountFirst(domainMultiset).elementSet()) {
        maxDomainCount = domainMultiset.count(domain);
        break;
    }
    int maxTypeCount = 0;
    for (String type : Multisets.copyHighestCountFirst(typeMultiset).elementSet()) {
        maxTypeCount = typeMultiset.count(type);
        break;
    }

    double maxSuffixScore = 0, maxLeskScore = 0, maxSimpleLeskScore = 0, maxLinkScore = 0,
            maxHashInfoboxScore = 0, maxwordvecDescriptionLocalScore = 0, maxHashDescriptionScore = 0,
            maxPopularityScore = 0, maxWordvectorAverage = 0, maxWordvecLinksScore = 0;
    // second pass compute similarities between entities in a window
    int currentSpotIndex = -1;
    SpotMatch currentSpot = null;
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        SpotMatch spot = em.getSpot();
        if (currentSpot == null || spot != currentSpot) {
            currentSpotIndex++;
            currentSpot = spot;
        }

        String id = em.getId();
        Entity entity = entities.get(i).getEntity();
        EntityPage page = entities.get(i).getEntity().getPage();
        String domain = page.getDomain();
        String type = page.getType();
        Shingle shingle = entity.getShingle();

        /* windowing algorithms stars */
        int left = currentSpotIndex - window;
        int right = currentSpotIndex + window;
        if (left < 0) {
            right -= left;
            left = 0;
        }
        if (right > sml.size()) {
            left += (sml.size()) - right;
            right = sml.size();
            if (left < 0) {
                left = 0;
            }
        }

        double linkScore = 0, hashInfoboxScore = 0, wordvecDescriptionLocalScore = 0, hashDescriptionScore = 0,
                wordvecLinksScore = 0;
        for (int j = left; j < right; j++) {
            SpotMatch sm2 = sml.get(j);
            EntityMatchList entities2 = sm2.getEntities();
            for (EntityMatch em2 : entities2) {
                String id2 = em2.getId();
                EntityPage page2 = em2.getEntity().getPage();
                int counter = 0;
                if (!ambigious.equals("true")) {
                    for (EntityMatch entityMatch : entities2) {
                        if (entityMatch.getId().startsWith("w")) {
                            counter++;
                        }
                    }
                }

                if ((ambigious.equals("true") || counter == 1) && em.getSpot() != em2.getSpot()
                        && !id.equals(id2)) {
                    // Link Similarity calculation starts
                    double linkSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("link" + id + id2)) {
                            linkSim = entitySimMap.get("link" + id + id2);
                        } else {
                            HashSet<String> set1 = Sets.newHashSet(page.getLinks().split(" "));
                            HashSet<String> set2 = Sets.newHashSet(page2.getLinks().split(" "));
                            linkSim = JaccardCalculator.calculateSimilarity(set1, set2);
                            entitySimMap.put("link" + id + id2, linkSim);
                        }
                        linkScore += linkSim;
                        // Link Similarity calculation ends
                    }
                    // Entity embedding similarity calculation starts
                    double eeSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("ee" + id + id2)) {
                            eeSim = entitySimMap.get("ee" + id + id2);
                        } else {
                            eeSim = EntityEmbeddingSimilarity.getInstance().getSimilarity(page, page2);
                            entitySimMap.put("ee" + id + id2, eeSim);
                        }
                        hashInfoboxScore += eeSim;
                    }
                    double w2veclinksSim = 0;
                    if (id.startsWith("w") && id2.startsWith("w")) {
                        if (entitySimMap.containsKey("wl" + id + id2)) {
                            w2veclinksSim = entitySimMap.get("wl" + id + id2);
                        } else {
                            w2veclinksSim = AveragePooling.getInstance().getSimilarity(page.getWord2vec(),
                                    page2.getWord2vec());
                            entitySimMap.put("wl" + id + id2, w2veclinksSim);
                        }
                        wordvecLinksScore += w2veclinksSim;
                    }

                    // Entity embedding similarity calculation ends

                    // Description word2vec similarity calculation
                    // starts
                    double word2vecSim = 0;

                    if (entitySimMap.containsKey("w2v" + id + id2)) {
                        word2vecSim = entitySimMap.get("w2v" + id + id2);
                    } else {
                        word2vecSim = AveragePooling.getInstance().getSimilarity(page2.getDword2vec(),
                                page.getDword2vec());
                        entitySimMap.put("w2v" + id + id2, word2vecSim);
                    }
                    wordvecDescriptionLocalScore += word2vecSim;
                    // Description word2vec similarity calculation ends

                    // Description autoencoder similarity calculation
                    // starts
                    double autoVecSim = 0;

                    if (entitySimMap.containsKey("a2v" + id + id2)) {
                        autoVecSim = entitySimMap.get("a2v" + id + id2);
                    } else {
                        autoVecSim = AveragePooling.getInstance().getSimilarity(page2.getDautoencoder(),
                                page.getDautoencoder());
                        entitySimMap.put("a2v" + id + id2, autoVecSim);
                    }
                    hashDescriptionScore += autoVecSim;
                    // Description autoencoder similarity calculation
                    // ends

                }
            }
        }
        if (linkScore > maxLinkScore) {
            maxLinkScore = linkScore;
        }
        if (hashInfoboxScore > maxHashInfoboxScore) {
            maxHashInfoboxScore = hashInfoboxScore;
        }
        if (wordvecDescriptionLocalScore > maxwordvecDescriptionLocalScore) {
            maxwordvecDescriptionLocalScore = wordvecDescriptionLocalScore;
        }
        if (hashDescriptionScore > maxHashDescriptionScore) {
            maxHashDescriptionScore = hashDescriptionScore;
        }
        if (wordvecLinksScore > maxWordvecLinksScore) {
            maxWordvecLinksScore = wordvecLinksScore;
        }

        /* windowing algorithms ends */

        double domainScore = 0;
        if (domainMultiset.size() > 0 && maxDomainCount > 1 && domainMultiset.count(domain) > 1) {
            domainScore = (double) domainMultiset.count(domain) / maxDomainCount;
        }
        double typeScore = 0;
        if (typeMultiset.size() > 0 && maxTypeCount > 1 && typeMultiset.count(type) > 1) {
            typeScore = (double) typeMultiset.count(type) / maxTypeCount;
        }
        if (typeBlackList.contains(type)) {
            typeScore /= 10;
        }

        double typeContentScore = 0;
        if (type.length() > 0 && StringUtils.containsIgnoreCase(words.toString(), type)) {
            typeContentScore = 1;
        }

        double typeClassifierScore = TypeClassifier.getInstance().predict(page, page.getTitle(), page.getType(),
                entity.getShingle().getSentence());

        double wordvecDescriptionScore = AveragePooling.getInstance().getSimilarity(documentVector,
                page.getDword2vec());
        if (wordvecDescriptionScore > maxWordvectorAverage) {
            maxWordvectorAverage = wordvecDescriptionScore;
        }

        double suffixScore = 0;

        if (type != null && type.length() > 0) {
            Set<String> suffixes = new HashSet<String>();
            String t = entity.getTitle().toLowerCase(new Locale("tr", "TR"));

            for (int x = 0; x < entities.size(); x++) {
                EntityMatch e2 = entities.get(x);
                if (e2.getId().equals(entity.getId())) {
                    suffixes.add(e2.getMention());
                }
            }
            suffixes.remove(t);
            suffixes.remove(entity.getTitle());
            // String inputTextLower = inputText.toLowerCase(new
            // Locale("tr",
            // "TR"));
            // while (inputTextLower.contains(t)) {
            // int start = inputTextLower.indexOf(t);
            // int end = inputTextLower.indexOf(" ", start + t.length());
            // if (end > start) {
            // String suffix = inputTextLower.substring(start, end);
            // // .replaceAll("\\W", "");
            // if (suffix.contains("'")
            // || (Zemberek.getInstance().hasMorph(suffix)
            // && !suffix.equals(t) && suffix.length() > 4)) {
            // suffixes.add(suffix);
            // }
            // inputTextLower = inputTextLower.substring(end);
            // } else {
            // break;
            // }
            // }
            if (suffixes.size() >= minSuffix) {
                for (String suffix : suffixes) {
                    double sim = gd.calculateSimilarity(suffix, type);
                    suffixScore += sim;
                }
            }
        }

        // String entitySuffix = page.getSuffix();
        // String[] inputSuffix = shingle.getSuffix().split(" ");
        // for (int j = 0; j < inputSuffix.length; j++) {
        // if (entitySuffix.contains(inputSuffix[j])) {
        // suffixScore += 0.25f;
        // }
        // }

        if (suffixScore > maxSuffixScore) {
            maxSuffixScore = suffixScore;
        }
        // if (id.equals("w691538")) {
        // LOGGER.info("");
        // }
        double letterCaseScore = 0;
        int lc = page.getLetterCase();
        if (StringUtils.isAllLowerCase(em.getMention()) && lc == 0 && id.startsWith("t")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllUpperCase(em.getMention()) && lc == 1 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (Character.isUpperCase(em.getMention().charAt(0)) && lc == 2 && id.startsWith("w")) {
            letterCaseScore = 1;
        } else if (StringUtils.isAllLowerCase(em.getMention()) && id.startsWith("t")) {
            letterCaseScore = 1;
        }

        double nameScore = 1 - LevenshteinDistanceCalculator.calculateDistance(page.getTitle(),
                Zemberek.removeAfterSpostrophe(em.getMention()));

        double popularityScore = page.getRank();
        if (id.startsWith("w")) {
            popularityScore = Math.log10(popularityScore + 1);
            if (popularityScore > maxPopularityScore) {
                maxPopularityScore = popularityScore;
            }
        }

        double leskScore = 0, simpleLeskScore = 0;

        String desc = em.getEntity().getPage().getDescription();
        if (desc != null) {
            List<Token> tokens = Zemberek.getInstance().disambiguateFindTokens(desc, false, true);
            for (Token token : tokens) {
                if (inputTokensMultiset.contains(token.getMorphText())
                        && !TurkishNLP.isStopWord(token.getMorphText())) {
                    simpleLeskScore += inputTokensMultiset.count(token.getMorphText());
                }
                if (leskWords.contains(token.getMorphText()) && !TurkishNLP.isStopWord(token.getMorphText())) {
                    leskScore += leskWords.count(token.getMorphText());
                }

            }
            leskScore /= Math.log(tokens.size() + 1);
            simpleLeskScore /= Math.log(tokens.size() + 1);
            if (leskScore > maxLeskScore) {
                maxLeskScore = leskScore;
            }
            if (simpleLeskScore > maxSimpleLeskScore) {
                maxSimpleLeskScore = simpleLeskScore;
            }

            if (!entityScoreMap.containsKey(id)) {
                EntityScores scores = new EntityScores(em, id, popularityScore, nameScore, letterCaseScore,
                        suffixScore, wordvecDescriptionScore, typeContentScore, typeScore, domainScore,
                        hashDescriptionScore, wordvecDescriptionLocalScore, hashInfoboxScore, linkScore,
                        wordvecLinksScore, leskScore, simpleLeskScore, typeClassifierScore);
                entityScoreMap.put(id, scores);
            } else {
                EntityScores entityScores = entityScoreMap.get(id);
                entityScores.setHashInfoboxScore((entityScores.getHashInfoboxScore() + hashInfoboxScore) / 2);
                entityScores.setHashDescriptionScore(
                        (entityScores.getHashInfoboxScore() + hashDescriptionScore) / 2);
                entityScores.setLinkScore((entityScores.getLinkScore() + linkScore) / 2);
                entityScores.setWordvecDescriptionLocalScore(
                        (entityScores.getWordvecDescriptionLocalScore() + wordvecDescriptionLocalScore) / 2);
                entityScores
                        .setWordvecLinksScore((entityScores.getWordvecLinksScore() + wordvecLinksScore) / 2);
                entityScores.setLeskScore((entityScores.getLeskScore() + leskScore) / 2);

            }

        }
    }
    /* normalization and total score calculation starts */
    Set<String> set = new HashSet<String>();
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores entityScores = entityScoreMap.get(id);
        if (set.contains(id)) {
            continue;
        }
        if (id.startsWith("w")) {
            if (maxLinkScore > 0 && entityScores.getLinkScore() > 0) {
                entityScores.setLinkScore(entityScores.getLinkScore() / maxLinkScore);
            }
            if (maxHashInfoboxScore > 0 && entityScores.getHashInfoboxScore() > 0) {
                entityScores.setHashInfoboxScore(entityScores.getHashInfoboxScore() / maxHashInfoboxScore);
            }
            if (maxWordvecLinksScore > 0 && entityScores.getWordvecLinksScore() > 0) {
                entityScores.setWordvecLinksScore(entityScores.getWordvecLinksScore() / maxWordvecLinksScore);
            }
            if (maxPopularityScore > 0 && entityScores.getPopularityScore() > 0) {
                entityScores.setPopularityScore(entityScores.getPopularityScore() / maxPopularityScore);
            }
        }
        if (maxwordvecDescriptionLocalScore > 0 && entityScores.getWordvecDescriptionLocalScore() > 0) {
            entityScores.setWordvecDescriptionLocalScore(
                    entityScores.getWordvecDescriptionLocalScore() / maxwordvecDescriptionLocalScore);
        }
        if (maxHashDescriptionScore > 0 && entityScores.getHashDescriptionScore() > 0) {
            entityScores
                    .setHashDescriptionScore(entityScores.getHashDescriptionScore() / maxHashDescriptionScore);
        }
        if (maxWordvectorAverage > 0 && entityScores.getWordvecDescriptionScore() > 0) {
            entityScores.setWordvecDescriptionScore(
                    entityScores.getWordvecDescriptionScore() / maxWordvectorAverage);
        }
        if (maxLeskScore > 0 && entityScores.getLeskScore() > 0) {
            entityScores.setLeskScore(entityScores.getLeskScore() / maxLeskScore);
        }
        if (maxSimpleLeskScore > 0 && entityScores.getSimpleLeskScore() > 0) {
            entityScores.setSimpleLeskScore(entityScores.getSimpleLeskScore() / maxSimpleLeskScore);
        }
        if (maxSuffixScore > 0 && entityScores.getSuffixScore() > 0) {
            entityScores.setSuffixScore(entityScores.getSuffixScore() / maxSuffixScore);
        }
        set.add(id);
    }

    LOGGER.info("\t"
            + "id\tTitle\tURL\tScore\tPopularity\tName\tLesk\tSimpeLesk\tCase\tNoun\tSuffix\tTypeContent\tType\tDomain\twordvecDescription\twordvecDescriptionLocal\thashDescription\thashInfobox\tword2vecLinks\tLink\t\ttypeClassifier\tDescription");
    for (int i = 0; i < entities.size(); i++) {
        EntityMatch em = entities.get(i);
        String id = em.getId();
        EntityScores e = entityScoreMap.get(id);
        double wikiScore = 0;
        if (id.startsWith("w") && Character.isUpperCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        } else if (id.startsWith("t") && Character.isLowerCase(em.getMention().charAt(0))) {
            wikiScore = wikiWeight;
        }
        // if(id.equals("w508792")){
        // LOGGER.info("");
        // }
        double totalScore = wikiScore + e.getPopularityScore() * popularityWeight
                + e.getNameScore() * nameWeight + e.getLeskScore() * leskWeight
                + e.getSimpleLeskScore() * simpleLeskWeight + e.getLetterCaseScore() * letterCaseWeight
                + e.getSuffixScore() * suffixWeight + e.getTypeContentScore() * typeContentWeight
                + e.getTypeScore() * typeWeight + e.getDomainScore() * domainWeight
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight
                + e.getHashDescriptionScore() * hashDescriptionWeight
                + e.getHashInfoboxScore() * hashInfoboxWeight + e.getWordvecLinksScore() * word2vecLinksWeight
                + e.getLinkScore() * linkWeight + e.getTypeClassifierkScore() * typeClassifierkWeight;
        if (ranklib == true) {
            totalScore = RankLib.getInstance().score(e);
        }

        if (em.getEntity().getPage().getUrlTitle().contains("(")) {
            totalScore /= 2;
        }
        em.setScore(totalScore);
        e.setScore(totalScore);

        LOGGER.info("\t" + id + "\t" + em.getEntity().getPage().getTitle() + "\t"
                + em.getEntity().getPage().getUrlTitle() + "\t" + em.getScore() + "\t"
                + e.getPopularityScore() * popularityWeight + "\t" + e.getNameScore() * nameWeight + "\t"
                + e.getLeskScore() * leskWeight + "\t" + e.getSimpleLeskScore() * simpleLeskWeight + "\t"
                + e.getLetterCaseScore() * letterCaseWeight + "\t" + e.getSuffixScore() * suffixWeight + "\t"
                + e.getTypeContentScore() * typeContentWeight + "\t" + e.getTypeScore() * typeWeight + "\t"
                + e.getDomainScore() * domainWeight + "\t"
                + e.getWordvecDescriptionScore() * wordvecDescriptionWeight + "\t"
                + e.getWordvecDescriptionLocalScore() * wordvecDescriptionLocalWeight + "\t"
                + e.getHashDescriptionScore() * hashDescriptionWeight + "\t"
                + e.getHashInfoboxScore() * hashInfoboxWeight + "\t"
                + e.getWordvecLinksScore() * word2vecLinksWeight + "\t" + e.getLinkScore() * linkWeight + "\t"
                + e.getTypeClassifierkScore() * typeClassifierkWeight + "\t"
                + em.getEntity().getPage().getDescription());
    }

    // if (annotateEntities) {
    // annotateEntities(localParams.getParams().get("originalText"), sml);
    // }

    EntityMatchList eml = new EntityMatchList();
    for (SpotMatch match : sml) {
        EntityMatchList list = match.getEntities();
        if (!list.isEmpty()) {
            list.sort();
            eml.add(list.get(0));
            selectedEntities.add(list.get(0).getId());
        }
    }
    return eml;
}

From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.scorers.LuceneConceptScorer.java

private void search(String queryString, String conf) throws RuntimeException {
    if (queryString.trim().isEmpty())
        return;// ww w  .java2s  . c o  m
    ScoreDoc[] results;
    try {
        Query query = parser.parse(queryString);
        results = searcher.search(query, hits).scoreDocs;
    } catch (ParseException | IOException e) {
        throw new RuntimeException(e);
    }
    Multiset<String> sourceCounts = HashMultiset.create();
    for (int i = 0; i < results.length; i++) {
        try {
            Document doc = reader.document(results[i].doc);
            String source = sourceFieldName == null ? null : doc.get(sourceFieldName);
            String uri = uriPrefix.get(source) + (idFieldName == null ? null : doc.get(idFieldName));
            String gconf = conf + "/global";
            if (!uri2conf2rank.contains(uri, gconf) || uri2conf2rank.get(uri, gconf) > i) {
                uri2conf2rank.put(uri, gconf, i);
            }
            double score = results[i].score;
            if (!uri2conf2score.contains(uri, gconf) || uri2conf2score.get(uri, gconf) < score) {
                uri2conf2score.put(uri, gconf, score);
            }
            String sconf = conf + source;
            int sourceRank = sourceCounts.count(source);
            sourceCounts.add(source);
            if (!uri2conf2rank.contains(uri, sconf) || uri2conf2rank.get(uri, sconf) > sourceRank) {
                synchronizedPut(uri2conf2rank, uri, sconf, sourceRank);
            }
            if (!uri2conf2score.contains(uri, sconf) || uri2conf2score.get(uri, sconf) < score) {
                synchronizedPut(uri2conf2score, uri, sconf, score);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.evidence.concept.ConceptMerger.java

@SuppressWarnings("unchecked")
@Override/*from ww w  . j  ava  2 s .c  o  m*/
public void process(JCas jcas) throws AnalysisEngineProcessException {
    // create views and get all concepts in the views
    List<JCas> views = new ArrayList<>();
    if (includeDefaultView) {
        views.add(jcas);
    }
    views.addAll(ViewType.listViews(jcas, viewNamePrefix));
    List<Concept> concepts = views.stream().map(TypeUtil::getConcepts).flatMap(Collection::stream)
            .collect(toList());
    // preserve concept fields
    Set<String> uuids = new HashSet<>();
    SetMultimap<String, String> uuid2ids = HashMultimap.create();
    SetMultimap<String, String> uuid2names = HashMultimap.create();
    SetMultimap<String, String> uuid2uris = HashMultimap.create();
    SetMultimap<String, ConceptMention> uuid2mentions = HashMultimap.create();
    SetMultimap<String, List<String>> uuid2types = HashMultimap.create();
    for (Concept concept : concepts) {
        String uuid = UUID_PREFIX + UUID.randomUUID().toString();
        uuids.add(uuid);
        uuid2ids.putAll(uuid, TypeUtil.getConceptIds(concept));
        uuid2names.putAll(uuid, TypeUtil.getConceptNames(concept));
        uuid2uris.putAll(uuid, TypeUtil.getConceptUris(concept));
        uuid2mentions.putAll(uuid, TypeUtil.getConceptMentions(concept));
        // also remove duplicated concept type entries
        TypeUtil.getConceptTypes(concept).forEach(type -> uuid2types.put(uuid, toTypeList(type)));
    }
    // connectivity detection for merging
    UndirectedGraph<String, DefaultEdge> graph = new SimpleGraph<>(DefaultEdge.class);
    uuids.forEach(graph::addVertex);
    uuid2ids.values().forEach(graph::addVertex);
    uuid2ids.entries().forEach(entry -> graph.addEdge(entry.getKey(), entry.getValue()));
    if (useName) {
        uuid2names.values().stream().map(ConceptMerger::nameKey).forEach(graph::addVertex);
        uuid2names.entries().forEach(entry -> graph.addEdge(entry.getKey(), nameKey(entry.getValue())));
    }
    views.forEach(view -> view.removeAllIncludingSubtypes(Concept.type));
    ConnectivityInspector<String, DefaultEdge> ci = new ConnectivityInspector<>(graph);
    Multiset<Integer> mergedSizes = HashMultiset.create();
    List<Concept> mergedConcepts = ci.connectedSets().stream().map(subgraph -> {
        Set<String> cuuids = subgraph.stream().filter(str -> str.startsWith(UUID_PREFIX)).collect(toSet());
        List<String> ids = cuuids.stream().map(uuid2ids::get).flatMap(Set::stream).filter(Objects::nonNull)
                .distinct().collect(toList());
        List<String> names = cuuids.stream().map(uuid2names::get).flatMap(Set::stream).filter(Objects::nonNull)
                .distinct().collect(toList());
        List<String> uris = cuuids.stream().map(uuid2uris::get).flatMap(Set::stream).filter(Objects::nonNull)
                .distinct().collect(toList());
        List<ConceptType> types = cuuids.stream().map(uuid2types::get).flatMap(Set::stream)
                .filter(Objects::nonNull).distinct().map(type -> parseTypeList(jcas, type)).collect(toList());
        List<ConceptMention> mentions = cuuids.stream().map(uuid2mentions::get).flatMap(Set::stream)
                .filter(Objects::nonNull).collect(toList());
        mergedSizes.add(cuuids.size());
        return TypeFactory.createConcept(jcas, names, uris, ImmutableList.copyOf(ids), mentions, types);
    }).collect(toList());
    mergedConcepts.forEach(Concept::addToIndexes);
    LOG.info("Merged concepts from {} concepts.", mergedSizes);
    if (LOG.isDebugEnabled()) {
        mergedConcepts.stream().map(TypeUtil::toString).forEachOrdered(c -> LOG.debug(" - {}", c));
    }
}