Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:edu.rpi.tw.linkipedia.search.similarity.ValidationRankSurface.java

License:Open Source License

private boolean isDirectRelated(String url1, String url2, Hashtable<String, Float> dir_cache) {
    try {/*from   w  w w  .  ja  va 2s  .co  m*/
        if (dir_cache.containsKey(url2 + " " + url1)) {
            if (debug)
                System.out.println("direct relate cache found: " + url2 + " " + url1 + " "
                        + dir_cache.get(url2 + " " + url1));
            return dir_cache.get(url2 + " " + url1) == 1;
        }
        //         if(cache.containsKey(url1+" "+url2)){
        //            System.out.println("cache found 2: "+url2+" "+url1);
        //            return cache.get(url1+" "+url2);
        //         }

        BooleanQuery luceneQuery = new BooleanQuery();

        TermQuery termQuery = new TermQuery(new Term("defaultLabel", url1));
        //Query labelQuery = wquery.getLabelQuery(url1);

        Term contextTerm = new Term("related_object", url2);
        TermQuery relateQuery = new TermQuery(contextTerm);

        luceneQuery.add(termQuery, BooleanClause.Occur.MUST);
        luceneQuery.add(relateQuery, BooleanClause.Occur.MUST);

        TopDocs docs = searcher.search(luceneQuery, 1);

        dir_cache.put(url1 + " " + url2, (float) docs.scoreDocs.length);
        dir_cache.put(url2 + " " + url1, (float) docs.scoreDocs.length);
        if (debug)
            System.out.println("direct: " + url1 + " | " + url2 + " : " + docs.scoreDocs.length);
        return docs.scoreDocs.length == 1;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return false;
}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

void addCommonConstraints(BooleanQuery indexQuery, Query query) {
    BooleanQuery categoryQueries = new BooleanQuery();
    for (String category : query.getCategories()) {
        categoryQueries.add(new TermQuery(new Term(Concept.CATEGORY, category)), Occur.SHOULD);
    }//from w ww  . j  a v a2 s  .  com
    if (!query.getCategories().isEmpty()) {
        indexQuery.add(new BooleanClause(categoryQueries, Occur.MUST));
    }

    BooleanQuery prefixQueries = new BooleanQuery();
    for (String curie : query.getPrefixes()) {
        String prefix = curieUtil.getExpansion(curie);
        prefixQueries.add(new WildcardQuery(new Term(CommonProperties.URI, prefix + "*")), Occur.SHOULD);
    }
    if (!query.getPrefixes().isEmpty()) {
        indexQuery.add(new BooleanClause(prefixQueries, Occur.MUST));
    }
}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

@Override
public List<Concept> getConceptsFromPrefix(Query query) {
    QueryParser parser = getQueryParser();
    BooleanQuery finalQuery = new BooleanQuery();
    try {//from   w  w  w .  java  2s  . c om
        BooleanQuery subQuery = new BooleanQuery();
        subQuery.add(parser.parse(
                formatQuery("%s%s:%s*", NodeProperties.LABEL, LuceneUtils.EXACT_SUFFIX, query.getInput())),
                Occur.SHOULD);
        Optional<String> fullUri = curieUtil.getIri(query.getInput());
        if (fullUri.isPresent()) {
            subQuery.add(parser.parse(formatQuery("%s:%s*", NodeProperties.URI, (fullUri.get()))),
                    Occur.SHOULD);
        }
        subQuery.add(parser.parse(formatQuery("%s:%s*", NodeProperties.FRAGMENT, query.getInput())),
                Occur.SHOULD);

        if (query.isIncludeSynonyms()) {
            subQuery.add(parser.parse(
                    formatQuery("%s%s:%s*", Concept.SYNONYM, LuceneUtils.EXACT_SUFFIX, query.getInput())),
                    Occur.SHOULD);
        }
        if (query.isIncludeAbbreviations()) {
            subQuery.add(parser.parse(
                    formatQuery("%s%s:%s*", Concept.ABREVIATION, LuceneUtils.EXACT_SUFFIX, query.getInput())),
                    Occur.SHOULD);
        }
        if (query.isIncludeAcronyms()) {
            subQuery.add(parser.parse(
                    formatQuery("%s%s:%s*", Concept.ACRONYM, LuceneUtils.EXACT_SUFFIX, query.getInput())),
                    Occur.SHOULD);
        }

        finalQuery.add(subQuery, Occur.MUST);
    } catch (ParseException e) {
        logger.log(Level.WARNING, "Failed to parse query", e);
    }
    addCommonConstraints(finalQuery, query);
    IndexHits<Node> hits = null;
    try (Transaction tx = graph.beginTx()) {
        hits = graph.index().getNodeAutoIndexer().getAutoIndex().query(finalQuery);
        tx.success();
    }
    return limitHits(hits, query);

}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

@Override
public List<Concept> searchConcepts(Query query) {
    QueryParser parser = getQueryParser();
    BooleanQuery finalQuery = new BooleanQuery();
    try {//from   w ww . ja v a 2s  . c o m
        if (query.isIncludeSynonyms() || query.isIncludeAbbreviations() || query.isIncludeAcronyms()) {
            BooleanQuery subQuery = new BooleanQuery();
            subQuery.add(LuceneUtils.getBoostedQuery(parser, query.getInput(), 10.0f), Occur.SHOULD);
            String escapedQuery = QueryParser.escape(query.getInput());
            if (query.isIncludeSynonyms()) {
                subQuery.add(parser.parse(Concept.SYNONYM + ":" + escapedQuery), Occur.SHOULD);
            }
            if (query.isIncludeAbbreviations()) {
                subQuery.add(parser.parse(Concept.ABREVIATION + ":" + escapedQuery), Occur.SHOULD);
            }
            if (query.isIncludeAcronyms()) {
                subQuery.add(parser.parse(Concept.ACRONYM + ":" + escapedQuery), Occur.SHOULD);
            }
            finalQuery.add(subQuery, Occur.MUST);
        } else {
            finalQuery.add(parser.parse(query.getInput()), Occur.MUST);
        }
    } catch (ParseException e) {
        logger.log(Level.WARNING, "Failed to parse query", e);
    }
    addCommonConstraints(finalQuery, query);
    IndexHits<Node> hits = null;
    try (Transaction tx = graph.beginTx()) {
        hits = graph.index().getNodeAutoIndexer().getAutoIndex().query(finalQuery);
        tx.success();
    }
    return limitHits(hits, query);
}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

@Override
public List<Concept> getConceptsFromTerm(Query query) {
    QueryParser parser = getQueryParser();
    String exactQuery = String.format("\"\\^ %s $\"", query.getInput());
    BooleanQuery finalQuery = new BooleanQuery();
    try {//  w w w  .j a  va2  s  . c o  m
        if (query.isIncludeSynonyms() || query.isIncludeAbbreviations() || query.isIncludeAcronyms()) {
            BooleanQuery subQuery = new BooleanQuery();
            subQuery.add(LuceneUtils.getBoostedQuery(parser, exactQuery, 10.0f), Occur.SHOULD);
            if (query.isIncludeSynonyms()) {
                subQuery.add(parser.parse(Concept.SYNONYM + ":" + exactQuery), Occur.SHOULD);
            }
            if (query.isIncludeAbbreviations()) {
                subQuery.add(parser.parse(Concept.ABREVIATION + ":" + exactQuery), Occur.SHOULD);
            }
            if (query.isIncludeAcronyms()) {
                subQuery.add(parser.parse(Concept.ACRONYM + ":" + exactQuery), Occur.SHOULD);
            }
            finalQuery.add(subQuery, Occur.MUST);
        } else {
            finalQuery.add(parser.parse(exactQuery), Occur.MUST);
        }
    } catch (ParseException e) {
        logger.log(Level.WARNING, "Failed to parse query", e);
    }
    addCommonConstraints(finalQuery, query);
    logger.finest(finalQuery.toString());
    try (Transaction tx = graph.beginTx()) {
        IndexHits<Node> hits = graph.index().getNodeAutoIndexer().getAutoIndex().query(finalQuery);
        tx.success();
        return limitHits(hits, query);
    }
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * returns collection of docIds of the Lucene docs that hit, at least
 * threshold times.//from  w w  w.  j av a 2 s . co  m
 * warning! only looks up body field, no others
  * Caution: This code is not to be touched, unless something is being optimised
  * Introducing something here can seriously affect the search times.
 */
private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold,
        IndexSearcher searcher, QueryType qt, int lt)
        throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException {
    Collection<String> result = new ArrayList<String>();

    //   String escaped_q = escapeRegex(q); // to mimic built-in regex support
    //TODO: There should also be a general query type that takes any query with field param, i.e. without parser
    Query query;
    if (qt == QueryType.ORIGINAL)
        query = parserOriginal.parse(q);
    else if (qt == QueryType.SUBJECT)
        query = parserSubject.parse(q);
    else if (qt == QueryType.CORRESPONDENTS)
        query = parserCorrespondents.parse(q);
    else if (qt == QueryType.REGEX) {
        query = new BooleanQuery();
        /**
         * Note: this is not a spanning (i.e. doesn't search over more than
         * one token) regexp, for spanning regexp use: body_unanlyzed and
         * title_unanlyzed fields instead
         */
        Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL);
        Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL);
        ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
        ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
    } else /* if (qt == QueryType.PRESET_REGEX) {
           query = new BooleanQuery();
           if(presetQueries != null) {
           for (String pq : presetQueries) {
             Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL);
             Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL);
             ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
             ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
           }
           log.info("Doing a preset regex search");
           }else{
           log.warn("Preset queries is not initialised");
           }
           } else */ if (qt == QueryType.META) {
        query = parserMeta.parse(q);
    } else
        query = parser.parse(q);

    //      query = convertRegex(query);
    long st = System.currentTimeMillis();
    int totalHits = 0;
    ScoreDoc[] hits = null;
    if (query != null) {
        TopDocs tds = searcher.search(query, null, lt);
        log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query);
        hits = tds.scoreDocs;
        totalHits = tds.totalHits;
    } else {
        log.error("Query is null!!");
    }
    // this logging causes a 50% overhead on the query -- maybe enable it only for debugging
    // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :");

    // Iterate through the results:

    // TODO: not very pretty code here to determine dir_name which selects the cache to use
    Util.softAssert(searcher == isearcher || searcher == isearcher_blob);
    String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS;

    Map<Integer, String> map = dirNameToDocIdMap.get(dir_name);
    if (map == null) {
        map = new LinkedHashMap<Integer, String>();
        dirNameToDocIdMap.put(dir_name, map);
        log.info("Adding new entry for dir name to docIdMap");
    } else {
        log.info("Existing entry for dir name to docIdMap");
    }

    int n_added = 0;
    log.info("Found: " + hits.length + " hits for query: " + q);
    for (int i = 0; i < hits.length; i++) {
        int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id.

        String docId = null; // this will be our doc id

        // try to use the new fieldcache id's
        // if this works, we can get rid of the dirNameToDocIdMap
        try {
            docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId);
        } catch (Exception e) {
            Util.print_exception(e, log);
            continue;
        }

        if (threshold <= 1) {
            // common case: threshold is 1.
            result.add(docId);
            n_added++;
        } else {
            // more expensive, do it only if threshold is > 1
            Explanation expl = searcher.explain(query, hits[i].doc);
            Explanation[] details = expl.getDetails();
            // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query.
            // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation.
            // not sure why, but is somewhat corroborated by the code:
            // http://massapi.com/class/ex/Explanation.html
            // showing a single hit creating both a C.E and an E.
            // a more robust approach might be to look for the summary to end with product of: , sum of: etc.
            // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706
            // but for now, we'll count only the number of ComplexExplanation and check if its above threshold
            //            log.info("doc id " + hits[i].toString() + " #details = " + details.length);

            // HORRIBLE HACK! - because we don't know a better way to find the threshold
            outer: for (Explanation detail : details) {
                // log.info(detail.getClass().getName());

                if (detail instanceof ComplexExplanation) {
                    ComplexExplanation ce = (ComplexExplanation) detail;
                    String s = ce.toString();
                    int total_tf = 0;
                    while (true) {
                        int idx = s.indexOf("tf(termFreq(");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx);
                        idx = s.indexOf("=");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx + 1);
                        int idx1 = s.indexOf(")");
                        if (idx < 0)
                            break outer;
                        String num_str = s.substring(0, idx1);
                        int num = 0;
                        try {
                            num = Integer.parseInt(num_str);
                        } catch (Exception e) {
                            log.warn("ERROR parsing complex expl: " + num_str);
                        }
                        total_tf += num;
                        if (total_tf >= threshold) {
                            result.add(docId);
                            n_added++;
                            break outer;
                        }
                    }
                }
            }
        }
    }
    log.info(n_added + " docs added to docIdMap cache");
    return new Pair<Collection<String>, Integer>(result, totalHits);
}

From source file:edu.uci.ics.searcher.SearchFiles.java

License:Apache License

private static Query myBooleanQuery(String query_string) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    BooleanQuery bq = new BooleanQuery();

    String[] query_tokens = query_string.trim().split(" ");
    PhraseQuery contents_phrase_query = new PhraseQuery();
    PhraseQuery title_phrase_query = new PhraseQuery();
    for (int i = 0; i < query_tokens.length; i++) {
        contents_phrase_query.add(new Term("contents", query_tokens[i]), i);
        title_phrase_query.add(new Term("title", query_tokens[i]), i);
    }/*from   ww w  . java 2  s  .  co  m*/
    contents_phrase_query.setBoost(2.0f);
    title_phrase_query.setBoost(2.0f);
    bq.add(contents_phrase_query, BooleanClause.Occur.SHOULD);
    bq.add(title_phrase_query, BooleanClause.Occur.SHOULD);

    TermQuery contents_term_query = new TermQuery(new Term("contents", query_string));
    //contents_term_query.setBoost(1.5f);
    bq.add(contents_term_query, BooleanClause.Occur.SHOULD);
    TermQuery title_term_query = new TermQuery(new Term("title", query_string));
    bq.add(title_term_query, BooleanClause.Occur.SHOULD);

    //TermQuery pub_query = new TermQuery(new Term("contents", "publications"));
    //bq.add(pub_query, BooleanClause.Occur.SHOULD);

    //FuzzyQuery url_query = new FuzzyQuery(new Term("url", "www.ics.uci.edu"), 2);
    //bq.add(url_query, BooleanClause.Occur.SHOULD);

    Query query = new QueryParser(Version.LUCENE_40, "contents", analyzer).parse(bq.toString());
    return query;
}

From source file:edu.ucla.cs.scai.canali.core.index.TokenIndex.java

public ArrayList<IndexedToken> getTokenElements(String search, String domainsOfProperty[],
        String rangesOfProperty[], String[] propertyDomains, int maxResults, String... tokenClasses) {

    System.out.println("SEARCH = " + search);
    for (String t : tokenClasses) {
        System.out.println("TC = " + t);
    }//ww  w  . j a v  a 2  s .  c  o  m

    ArrayList<IndexedToken> res = new ArrayList<>();
    if (search == null) {
        search = "";
    }
    boolean classFound = false;
    boolean classAcceptable = false;
    HashSet<String> admissableLiterals = getAdmissableLiterals(rangesOfProperty);
    try {
        BooleanQuery globalQuery = new BooleanQuery();
        BooleanQuery typeQuery = new BooleanQuery();
        if (tokenClasses != null && tokenClasses.length > 0) {
            for (int i = 0; i < tokenClasses.length; i++) {
                BooleanQuery subTypeQuery = new BooleanQuery();
                subTypeQuery.add(new TermQuery(new Term("type", tokenClasses[i])), BooleanClause.Occur.MUST);
                switch (tokenClasses[i]) {
                case IndexedToken.PROPERTY:
                    if (domainsOfProperty != null && domainsOfProperty.length > 0) {
                        BooleanQuery domainOfQuery = new BooleanQuery();
                        for (String domainOfProperty : domainsOfProperty) {
                            domainOfQuery.add(
                                    new TermQuery(
                                            new Term("domainOfProperty", QueryParser.escape(domainOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(domainOfQuery, BooleanClause.Occur.MUST);
                    }
                    if (rangesOfProperty != null && rangesOfProperty.length > 0) {
                        BooleanQuery rangeOfQuery = new BooleanQuery();
                        for (String rangeOfProperty : rangesOfProperty) {
                            rangeOfQuery.add(
                                    new TermQuery(
                                            new Term("rangeOfProperty", QueryParser.escape(rangeOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(rangeOfQuery, BooleanClause.Occur.MUST);
                    }
                    if (propertyDomains != null && propertyDomains.length > 0) {
                        BooleanQuery domainQuery = new BooleanQuery();
                        for (String propertyDomain : propertyDomains) {
                            domainQuery.add(
                                    new TermQuery(
                                            new Term("propertyDomain", QueryParser.escape(propertyDomain))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(domainQuery, BooleanClause.Occur.MUST);
                    }
                    break;
                case IndexedToken.ENTITY:
                    if (domainsOfProperty != null && domainsOfProperty.length > 0) {
                        BooleanQuery domainOfQuery = new BooleanQuery();
                        for (String domainOfProperty : domainsOfProperty) {
                            domainOfQuery.add(
                                    new TermQuery(
                                            new Term("domainOfProperty", QueryParser.escape(domainOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(domainOfQuery, BooleanClause.Occur.MUST);
                    }
                    if (rangesOfProperty != null && rangesOfProperty.length > 0) {
                        BooleanQuery rangeOfQuery = new BooleanQuery();
                        for (String rangeOfProperty : rangesOfProperty) {
                            rangeOfQuery.add(
                                    new TermQuery(
                                            new Term("rangeOfProperty", QueryParser.escape(rangeOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(rangeOfQuery, BooleanClause.Occur.MUST);
                    }
                    break;
                case IndexedToken.CLASS:
                    classAcceptable = true;
                    if (domainsOfProperty != null && domainsOfProperty.length > 0) {
                        BooleanQuery domainOfQuery = new BooleanQuery();
                        for (String domainOfProperty : domainsOfProperty) {
                            domainOfQuery.add(
                                    new TermQuery(
                                            new Term("domainOfProperty", QueryParser.escape(domainOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(domainOfQuery, BooleanClause.Occur.MUST);
                    }
                    if (rangesOfProperty != null && rangesOfProperty.length > 0) {
                        BooleanQuery rangeOfQuery = new BooleanQuery();
                        for (String rangeOfProperty : rangesOfProperty) {
                            rangeOfQuery.add(
                                    new TermQuery(
                                            new Term("rangeOfProperty", QueryParser.escape(rangeOfProperty))),
                                    BooleanClause.Occur.SHOULD);
                        }
                        subTypeQuery.add(rangeOfQuery, BooleanClause.Occur.MUST);
                    }
                    break;
                }
                typeQuery.add(subTypeQuery, BooleanClause.Occur.SHOULD);
            }
            if (tokenClasses.length > 1) {
                //typeQuery.setMinimumNumberShouldMatch(1);
            }
            globalQuery.add(typeQuery, BooleanClause.Occur.MUST);
        }

        BooleanQuery searchQuery = new BooleanQuery();
        String[] ss = search.split(" ");
        for (String s : ss) {
            if (!s.equals("")) // Modified to avoid query in lucene with empty label: 
                searchQuery.add(new TermQuery(new Term("label", QueryParser.escape(s))),
                        BooleanClause.Occur.SHOULD);
        }
        //searchQuery.setMinimumNumberShouldMatch(1);
        globalQuery.add(searchQuery, BooleanClause.Occur.MUST);
        QueryParser parser = new QueryParser("", analyzer);
        try (IndexReader reader = DirectoryReader.open(directory)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            String queryString = globalQuery.toString(); //I need this because the parser works differently of different search features - look at its definition
            System.out.println("QUERY = " + queryString);
            ScoreDoc[] hits = searcher.search(parser.parse(queryString), maxResults * 5).scoreDocs;
            for (ScoreDoc r : hits) {
                Document doc = searcher.doc(r.doc);
                IndexedToken element = elements.get(doc.getField("id").numericValue().intValue());
                if (element instanceof DirectBinaryOperatorToken
                        || element instanceof IndirectBinaryOperatorToken) {
                    String op = ((OperatorToken) element).getSymbol();
                    if (op.startsWith("year") || op.startsWith("month")) {
                        if (admissableLiterals.contains(DATE)) {
                            res.add(element);
                        }
                    } else if (op.equals("=") || !admissableLiterals.isEmpty()) {
                        res.add(element);
                    }
                } else {
                    res.add(element);
                    if (element instanceof ClassToken) {
                        String fullText = search.toLowerCase();
                        fullText = fullText.toLowerCase();
                        boolean isPrefix = true;
                        if (fullText.endsWith(".")) {
                            fullText = fullText.substring(0, fullText.length() - 1);
                            isPrefix = false;
                        } else if (fullText.endsWith("?")) {
                            fullText = fullText.substring(0, fullText.length() - 1);
                            isPrefix = false;
                        } else if (fullText.endsWith(" and having")) {
                            fullText = fullText.substring(0, fullText.length() - 11);
                            isPrefix = false;
                        } else if (fullText.endsWith(" and with")) {
                            fullText = fullText.substring(0, fullText.length() - 9);
                            isPrefix = false;
                        } else if (fullText.endsWith(" having")) {
                            fullText = fullText.substring(0, fullText.length() - 7);
                            isPrefix = false;
                        } else if (fullText.endsWith(" with")) {
                            fullText = fullText.substring(0, fullText.length() - 5);
                            isPrefix = false;
                        }
                        fullText = fullText.trim();
                        classFound = true;
                        ClassToken ct = (ClassToken) element;
                        HashSet<String> searchWords = new HashSet(Arrays.asList(fullText.split(" ")));
                        HashSet<String> classWords = new HashSet(Arrays.asList((ct).getText().split(" "))); //this does not work with plural forms                            
                        searchWords.removeAll(classWords);
                        if (!searchWords.isEmpty()) {
                            AugmentedClassToken act = new AugmentedClassToken(ct, searchWords, isPrefix);
                            res.add(act);
                        }
                    }
                }
                if (res.size() == maxResults) {
                    //break;
                }
            } //qui???
        }
    } catch (Exception ex) {
        Logger.getLogger(TokenIndex.class.getName()).log(Level.SEVERE, null, ex);
    }
    if (classAcceptable && !classFound) {
        // System.out.println("Try class + keywords for " + search);
    }
    return res;
}

From source file:edu.ucla.cs.scai.canali.core.index.TokenIndex.java

public HashSet<String>[][] describeProperty(String label, int limit) {
    HashSet<String>[][] res = new HashSet[2][];
    res[0] = new HashSet[2];
    res[1] = new HashSet[3];
    Integer idA = ontologyElementsIdByUri.get(label);
    if (idA == null) {
        return res;
    }//from   w  w  w  . j  a v  a 2 s.  c  o m
    IndexedToken e = elements.get(idA);
    if (e == null || !(e instanceof PropertyToken)) {
        return res;
    }
    PropertyToken a = (PropertyToken) e;

    BooleanQuery globalQuery = new BooleanQuery();
    BooleanQuery typeQuery = new BooleanQuery();
    BooleanQuery subTypeQuery = new BooleanQuery();
    subTypeQuery.add(new TermQuery(new Term("type", IndexedToken.CLASS)), BooleanClause.Occur.MUST);
    typeQuery.add(subTypeQuery, BooleanClause.Occur.MUST);
    subTypeQuery = new BooleanQuery();
    subTypeQuery.add(new TermQuery(new Term("type", IndexedToken.PROPERTY)), BooleanClause.Occur.MUST);
    typeQuery.add(subTypeQuery, BooleanClause.Occur.MUST);
    globalQuery.add(typeQuery, BooleanClause.Occur.MUST);
    globalQuery.add(new TermQuery(new Term("domainOfProperty", QueryParser.escape(label))),
            BooleanClause.Occur.MUST);

    res[0][0] = new HashSet<>();
    res[0][1] = new HashSet<>();
    QueryParser parser = new QueryParser("", analyzer);
    try (IndexReader reader = DirectoryReader.open(directory)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        String queryString = globalQuery.toString(); //I need this because the parser works differently of different search features - look at its definition
        ScoreDoc[] hits = searcher.search(parser.parse(queryString), 1000).scoreDocs;
        for (ScoreDoc r : hits) {
            Document doc = searcher.doc(r.doc);
            IndexedToken element = elements.get(doc.getField("id").numericValue().intValue());
            if (element instanceof PropertyToken) {
                res[0][1].add(((PropertyToken) element).uri);
            } else {
                res[0][0].add(((ClassToken) element).uri);
            }
        }
    } catch (Exception ex) {
        Logger.getLogger(TokenIndex.class.getName()).log(Level.SEVERE, null, ex);
    }

    globalQuery = new BooleanQuery();
    typeQuery = new BooleanQuery();
    subTypeQuery = new BooleanQuery();
    subTypeQuery.add(new TermQuery(new Term("type", IndexedToken.CLASS)), BooleanClause.Occur.MUST);
    typeQuery.add(subTypeQuery, BooleanClause.Occur.MUST);
    subTypeQuery = new BooleanQuery();
    subTypeQuery.add(new TermQuery(new Term("type", IndexedToken.PROPERTY)), BooleanClause.Occur.MUST);
    typeQuery.add(subTypeQuery, BooleanClause.Occur.MUST);
    globalQuery.add(typeQuery, BooleanClause.Occur.MUST);
    globalQuery.add(new TermQuery(new Term("rangeOfProperty", QueryParser.escape(label))),
            BooleanClause.Occur.MUST);

    res[1][0] = new HashSet<>();
    res[1][1] = new HashSet<>();
    try (IndexReader reader = DirectoryReader.open(directory)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        String queryString = globalQuery.toString(); //I need this because the parser works differently of different search features - look at its definition
        ScoreDoc[] hits = searcher.search(parser.parse(queryString), 1000).scoreDocs;
        for (ScoreDoc r : hits) {
            Document doc = searcher.doc(r.doc);
            IndexedToken element = elements.get(doc.getField("id").numericValue().intValue());
            if (element instanceof PropertyToken) {
                res[1][1].add(((PropertyToken) element).uri);
            } else {
                res[1][0].add(((ClassToken) element).uri);
            }
        }
    } catch (Exception ex) {
        Logger.getLogger(TokenIndex.class.getName()).log(Level.SEVERE, null, ex);
    }

    String[] atts = new String[1];
    atts[0] = label;
    res[1][2] = new HashSet<>();
    for (String l : getAdmissableLiterals(atts)) {
        res[1][2].add(l);
    }
    return res;
}

From source file:edu.ucla.cs.scai.linkedspending.index.QueryIndexWithLucene.java

public ArrayList<WeightedDataSet> queryDataset(String query) throws Exception {
    BooleanQuery globalQuery = new BooleanQuery();
    BooleanQuery typeQuery = new BooleanQuery();
    typeQuery.add(new TermQuery(new Term("type", "dataset")), BooleanClause.Occur.MUST);
    globalQuery.add(typeQuery, BooleanClause.Occur.MUST);
    BooleanQuery searchQuery = new BooleanQuery();
    for (String s : keywordExtractor.normalizeWords(query)) {
        searchQuery.add(new TermQuery(new Term("label", QueryParser.escape(s))), BooleanClause.Occur.SHOULD);
    }//from ww  w.  j  a  v a  2s .  c  o  m
    globalQuery.add(searchQuery, BooleanClause.Occur.MUST);
    QueryParser parser = new QueryParser("", analyzer);
    ArrayList<WeightedDataSet> res = new ArrayList<>();
    try (IndexReader reader = DirectoryReader.open(directory)) {
        IndexSearcher searcher = new IndexSearcher(reader);
        String queryString = globalQuery.toString(); //I need this because the parser works differently of different search features - look at its definition
        ScoreDoc[] hits = searcher.search(parser.parse(queryString), 50).scoreDocs;
        for (ScoreDoc r : hits) {
            Document doc = searcher.doc(r.doc);
            res.add(new WeightedDataSet(doc.getField("uri").stringValue(), r.score));
        }
        return res;
    } catch (Exception ex) {
        Logger.getLogger(QueryIndexWithLucene.class.getName()).log(Level.SEVERE, null, ex);
    }
    return res;
}