Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:de.mirkosertic.easydav.index.QueryParser.java

License:Open Source License

public Query parse(String aQuery, String aSearchField) {

    BooleanQuery theResult = new BooleanQuery();

    boolean isStringMode = false;
    boolean isNegated = false;
    StringBuilder theCurrentTerm = new StringBuilder();

    for (int i = 0; i < aQuery.length(); i++) {
        char theCurrentChar = Character.toLowerCase(aQuery.charAt(i));
        if (theCurrentChar == '\"') {
            isStringMode = !isStringMode;
        } else {//from  w ww. ja va 2s  .  c  om
            if (!isStringMode) {
                switch (theCurrentChar) {
                case '-': {
                    if (theCurrentTerm.length() == 0) {
                        isNegated = true;
                    } else {
                        theCurrentTerm.append(theCurrentChar);
                    }
                    break;
                }
                case '+':
                    if (theCurrentTerm.length() == 0) {
                        isNegated = false;
                    } else {
                        theCurrentTerm.append(theCurrentChar);
                    }
                    break;
                case ' ': {
                    addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField);
                    theCurrentTerm = new StringBuilder();
                    isNegated = false;
                    break;
                }
                default: {
                    theCurrentTerm.append(theCurrentChar);
                    break;
                }
                }
            } else {
                theCurrentTerm.append(theCurrentChar);
            }
        }
    }

    if (theCurrentTerm.length() > 0) {
        addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField);
    }

    return theResult;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

private Query getRealQuery(SavedProfileSearch aRequest, Analyzer aAnalyzer) throws IOException, ParseException {

    BooleanQuery theQuery = new BooleanQuery();

    GoogleStyleQueryParser theParser = new GoogleStyleQueryParser(null);

    theQuery.add(theParser.parseQuery(aRequest.getProfileContent(), aAnalyzer, ProfileIndexerService.CONTENT),
            Occur.MUST);//from  w  w w  .  ja v  a 2s. com

    if (!StringUtils.isEmpty(aRequest.getPlz())) {
        theQuery.add(
                new WildcardQuery(new Term(ProfileIndexerService.PLZ, aRequest.getPlz().replace("%", "*"))),
                Occur.MUST);
    }

    return theQuery;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

@Override
public DataPage<ProfileSearchEntry> findProfileDataPage(SavedProfileSearch aRequest, int startRow, int pageSize)
        throws Exception {

    if (aRequest.getId() == null) {
        // Kann passieren, wenn die Suche das erste mal aufgerufen wird
        return new DataPage<ProfileSearchEntry>(0, 0, new ArrayList<ProfileSearchEntry>());
    }/* w  ww .j  a v a 2 s.  c  om*/

    Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer();

    FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession());

    Query theQuery = getRealQuery(aRequest, theAnalyzer);

    LOGGER.info("Search query is " + theQuery + " from " + startRow + " with pagesize " + pageSize);

    Highlighter theHighlighter = new Highlighter(new SpanGradientFormatter(1, "#000000", "#0000FF", null, null),
            new QueryScorer(theQuery));

    BooleanQuery theRealQuery = new BooleanQuery();
    theRealQuery.add(theQuery, Occur.MUST);

    if (aRequest != null) {
        for (String theId : aRequest.getProfilesToIgnore()) {
            theRealQuery.add(new TermQuery(new Term(ProfileIndexerService.UNIQUE_ID, theId)), Occur.MUST_NOT);
        }
    }

    LOGGER.info("Query with ignore is " + theRealQuery);

    Sort theSort = null;
    if (!StringUtils.isEmpty(aRequest.getSortierung())) {
        int theSortType = SortField.STRING;
        boolean theReverse = false;

        String theSortField = aRequest.getSortierungField();

        if (ProfileIndexerService.STUNDENSATZ.equals(theSortField)) {
            theSortType = SortField.LONG;
        }
        if (ProfileIndexerService.VERFUEGBARKEIT.equals(theSortField)) {
            theReverse = true;
        }
        if (ProfileIndexerService.LETZTERKONTAKT.equals(theSortField)) {
            theReverse = true;
        }

        if (aRequest.isSortierungReverse()) {
            theReverse = !theReverse;
        }

        theSort = new Sort(new SortField(theSortField, theSortType, theReverse));
    }

    List<Filter> theFilterList = new ArrayList<Filter>();
    TermsFilter theContactForbidden = new TermsFilter();
    theContactForbidden.addTerm(new Term(ProfileIndexerService.KONTAKTSPERRE, "false"));
    theFilterList.add(theContactForbidden);

    if (aRequest.getStundensatzVon() != null || aRequest.getStundensatzBis() != null) {
        if (aRequest.getStundensatzVon() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ,
                    aRequest.getStundensatzVon(), Long.MAX_VALUE, true, true));
        }
        if (aRequest.getStundensatzBis() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ, 0l,
                    aRequest.getStundensatzBis(), true, true));
        }
    }

    Filter theFilter = new ChainedFilter(theFilterList.toArray(new Filter[theFilterList.size()]),
            ChainedFilter.AND);

    int theEnd = startRow + pageSize;

    FullTextQuery theHibernateQuery = theSession.createFullTextQuery(theRealQuery, Freelancer.class);
    if (theFilter != null) {
        theHibernateQuery.setFilter(theFilter);
    }
    if (theSort != null) {
        theHibernateQuery.setSort(theSort);
    }
    theHibernateQuery.setFirstResult(startRow);
    theHibernateQuery.setMaxResults(theEnd - startRow);
    theHibernateQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT);

    List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>();

    for (Object theSingleEntity : theHibernateQuery.list()) {
        Object[] theRow = (Object[]) theSingleEntity;
        Freelancer theFreelancer = (Freelancer) theRow[0];
        Document theDocument = (Document) theRow[1];
        ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theQuery, theHighlighter, theFreelancer,
                theDocument);

        theResult.add(theEntry);
    }

    return new DataPage<ProfileSearchEntry>(theHibernateQuery.getResultSize(), startRow, theResult);
}

From source file:de.spartusch.nasfvi.server.NQuery.java

License:Apache License

/**
 * Interprets the grammatical tense and extends the <code>query</code>
 * accordingly.//from  w  w w.jav  a  2 s .com
 * @param tense Grammatical tense to interpret
 * @param query Query to extend
 * @return Extended query according to the grammatical tense
 */
private static Query interpretTense(final Grammar.Tense tense, final Query query) {
    Semester now = new Semester();
    Query tenseQuery;

    switch (tense) {
    case pqperf:
        int year = new GregorianCalendar().get(GregorianCalendar.YEAR) - 1;
        tenseQuery = new TermRangeQuery("semester_end", "19700101", Integer.toString(year) + "0221", true,
                false);
        break;
    case perf:
        tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, false);
        break;
    case praet:
        tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, true);
        break;
    case praes:
        tenseQuery = new TermQuery(new Term("semester", now.getCanonical()));
        break;
    case fut1:
        tenseQuery = new TermRangeQuery("semester_end", now.getEnd(), "29991231", false, true);
        break;
    default:
        throw new AssertionError();
    }

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(query, BooleanClause.Occur.MUST);
    booleanQuery.add(tenseQuery, BooleanClause.Occur.MUST);

    return booleanQuery;
}

From source file:de.spartusch.nasfvi.server.NSearcher.java

License:Apache License

/**
 * Searches the index using a {@link NQuery}.
 * @param nquery Query to search for/*from  ww w  .  j a v a2  s.  c o m*/
 * @param offset Offset to use for the search
 * @return Matching documents
 * @throws IOException if there is an IOException when accessing the index
 */
public final TopDocs search(final NQuery nquery, final int offset) throws IOException {
    Query q = nquery.getQuery();

    if (nquery.hasSimilarityQuery()) {
        Query similQuery = nquery.getSimilarityQuery();
        TopDocs similDocs = search(similQuery, 1);

        if (similDocs.totalHits == 0) {
            return new TopDocs(0, new ScoreDoc[0], 0f);
        }

        int similDocNum = similDocs.scoreDocs[0].doc;
        String similId = searcher.doc(similDocNum).get("id");
        Query exclude = new TermQuery(new Term("id", similId));
        // exclude the document compared to

        MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
        mlt.setFieldNames(SIMILARITY_FIELDS);
        Query moreLikeQuery = mlt.like(similDocNum);

        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(q, BooleanClause.Occur.MUST);
        booleanQuery.add(moreLikeQuery, BooleanClause.Occur.MUST);
        booleanQuery.add(exclude, BooleanClause.Occur.MUST_NOT);

        q = booleanQuery;
    }

    return search(q, offset + 5); // return top 5 results
}

From source file:de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.Finder.java

License:Apache License

/**
 * Find all n-grams containing these tokens in order but optionally with words between them.
 * /*ww  w. j a  v a2 s  .  co m*/
 * @param aToken
 *            A list of tokens
 * @return the n-grams.
 */
@SuppressWarnings("unchecked")
public List<NGramModel> find(String[] aToken) {
    BooleanQuery q = new BooleanQuery();
    PhraseQuery pq = new PhraseQuery();
    pq.setSlop((5 - aToken.length) >= 0 ? (5 - aToken.length) : 0); // max 5-grams in the web1t
    for (String t : aToken) {
        pq.add(new Term("gram", t.toLowerCase()));
        // q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST);
    }
    q.add(pq, Occur.MUST);

    String cacheKey = q.toString();

    if (ngramCache.containsKey(cacheKey)) {
        List<NGramModel> list = (List<NGramModel>) ngramCache.get(cacheKey);
        return list;
    }

    try {
        // System.out.printf("Searching [%s]... ", cacheKey);

        NGramCollector collector = new NGramCollector();
        // long start = System.currentTimeMillis();
        searcher.search(q, collector);
        List<NGramModel> ngrams = collector.getNgrams();

        ngramCache.put(cacheKey, ngrams);

        // long now = System.currentTimeMillis();
        // System.out.printf(" (%d in %dms)%n", ngrams.size(), now - start);
        // for (NGram ng : ngrams) {
        // System.out.printf("   %s%n", ng);
        // }

        return ngrams;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.web1t.Finder.java

License:Open Source License

/**
 * Find all n-grams in the index.//from   w ww.j a va 2s  . c  o m
 * @param token A list of tokens
 * @return
 */
public List<NGram> find(String[] token) {
    List<NGram> ngrams = new ArrayList<NGram>();

    BooleanQuery q = new BooleanQuery();
    for (String t : token) {
        q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST);
    }

    if (cache.containsKey(q.toString())) {
        return cache.get(q.toString());
    }

    try {
        ScoreDoc[] results = searcher.search(q, 100).scoreDocs;
        Document doc;

        for (ScoreDoc scoreDoc : results) {
            doc = searcher.doc(scoreDoc.doc);
            ngrams.add(new NGram(doc.get("gram"), Integer.valueOf(doc.get("freq"))));
        }
    } catch (IOException e) {
        // TODO: handle exception
        e.printStackTrace();
    }

    cache.put(q.toString(), ngrams);

    return ngrams;
}

From source file:de.twitterlivesearch.analysis.Searcher.java

License:Apache License

/**
 * This is the same as// w  w  w.java  2s .  c  om
 * {@link de.twitterlivesearch.analysis.Searcher#searchForTweets(String)
 * searchForTweets(String)}, but the search is limited to the tweet with the
 * given id. This can for example be used to analyze the latest incoming
 * tweet.
 *
 * @param id
 * @param queryString
 * @return
 */
public List<Document> searchForTweets(Integer id, String queryString) {
    if (queryString.isEmpty()) {
        return Collections.emptyList();
    }

    AbstractConfiguration config = ConfigurationHolder.getConfiguration();
    try {
        if (!DirectoryReader.indexExists(directory)) {
            return null;
        }
    } catch (IOException e) {
        log.fatal("Error when trying to check if directory exists!", e);
        return new ArrayList<>();
    }
    DirectoryReader ireader;
    try {
        ireader = DirectoryReader.open(directory);
    } catch (IOException e) {
        log.fatal("Error when trying to open directory!", e);
        return null;
    }

    IndexSearcher isearcher = new IndexSearcher(ireader);
    Query textQuery = null;
    QueryParser parser = new QueryParser(FieldNames.TEXT.getField(),
            AnalyzerMapping.getInstance().ANALYZER_FOR_DELIMITER);
    parser.setDefaultOperator(config.getDefaultOperator());
    BooleanQuery query = new BooleanQuery();
    try {
        textQuery = parser.parse(queryString);
    } catch (ParseException e) {
        log.fatal("Error while parsing query: " + queryString, e);
    }

    // if id does not equal null only the query with the given id will be
    // searched
    // this can be used to search the latest element only
    if (id != null) {
        Query idQuery = NumericRangeQuery.newIntRange(FieldNames.ID.getField(), id.intValue(), id.intValue(),
                true, true);
        query.add(idQuery, Occur.MUST);
    }
    query.add(textQuery, Occur.MUST);
    ScoreDoc[] hits = null;
    try {
        hits = isearcher.search(query, 1000).scoreDocs;
    } catch (IOException e) {
        log.fatal("Error while trying to search!", e);
    }
    List<Document> result = new ArrayList<>();
    for (int i = 0; i < hits.length; i++) {
        try {
            result.add(isearcher.doc(hits[i].doc));
            log.info("Found result for query \"" + queryString + "\".");
        } catch (IOException e) {
            log.fatal("Error when getting document!", e);
        }
    }
    return result;
}

From source file:de.unihildesheim.iw.cli.DumpIPCs.java

License:Open Source License

private void runMain(final String... args) throws IOException, BuildException {
    new CmdLineParser(this.cliParams);
    parseWithHelp(this.cliParams, args);

    // check, if files and directories are sane
    this.cliParams.check();

    assert this.cliParams.idxReader != null;
    final int maxDoc = this.cliParams.idxReader.maxDoc();
    if (maxDoc == 0) {
        LOG.error("Empty index.");
        return;// w  w  w.  ja  v  a2 s .  c o  m
    }

    final Parser ipcParser = new Parser();
    ipcParser.separatorChar(this.cliParams.sep);
    ipcParser.allowZeroPad(this.cliParams.zeroPad);

    final DirectoryReader reader = DirectoryReader.open(FSDirectory.open(this.cliParams.idxDir.toPath()));
    final Builder idxReaderBuilder = new Builder(reader);

    Pattern rx_ipc = null;

    if (this.cliParams.ipc != null) {
        final IPCRecord ipc = ipcParser.parse(this.cliParams.ipc);
        final BooleanQuery bq = new BooleanQuery();
        rx_ipc = Pattern.compile(ipc.toRegExpString(this.cliParams.sep));
        if (LOG.isDebugEnabled()) {
            LOG.debug("IPC regExp: rx={} pat={}", ipc.toRegExpString(this.cliParams.sep), rx_ipc);
        }

        bq.add(new QueryWrapperFilter(IPCClassQuery.get(ipc, this.cliParams.sep)), Occur.MUST);
        bq.add(new QueryWrapperFilter(
                new IPCFieldFilter(new IPCFieldFilterFunctions.SloppyMatch(ipc), ipcParser)), Occur.MUST);
        idxReaderBuilder.queryFilter(new QueryWrapperFilter(bq));
    }

    final IndexReader idxReader = idxReaderBuilder.build();

    if (idxReader.numDocs() > 0) {
        final Terms terms = MultiFields.getTerms(idxReader, LUCENE_CONF.FLD_IPC);
        TermsEnum termsEnum = TermsEnum.EMPTY;
        BytesRef term;
        if (terms != null) {
            termsEnum = terms.iterator(termsEnum);
            term = termsEnum.next();

            final int[] count = { 0, 0 }; // match, exclude
            while (term != null) {
                final String code = term.utf8ToString();
                if (rx_ipc == null || (rx_ipc.matcher(code).matches())) {
                    final IPCRecord record = ipcParser.parse(code);
                    try {
                        System.out.println(code + ' ' + record + " (" + record.toFormattedString() + ") " + '['
                                + record.toRegExpString('-') + ']');
                    } catch (final IllegalArgumentException e) {
                        System.out.println(code + ' ' + "INVALID (" + code + ')');
                    }
                    count[0]++;
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Skip non matching IPC: {}", code);
                    }
                    count[1]++;
                }
                term = termsEnum.next();
            }
            LOG.info("match={} skip={}", count[0], count[1]);
        }
    } else {
        LOG.info("No documents left after filtering.");
    }
}

From source file:de.unihildesheim.iw.lucene.query.RelaxableCommonTermsQuery.java

License:Open Source License

/**
 * New instance using settings from the supplied {@link Builder} instance.
 *
 * @param builder {@link Builder} Instance builder
 * @throws IOException Thrown on low-level i/o-errors
 *///from   w  w  w .  j  av a  2s  . c om
@SuppressWarnings({ "ObjectAllocationInLoop", "ObjectEquality" })
RelaxableCommonTermsQuery(@NotNull final Builder builder) throws IOException {
    // get all query terms
    assert builder.queryStr != null;
    assert builder.analyzer != null;
    this.queryTerms = QueryUtils.tokenizeQueryString(builder.queryStr, builder.analyzer);

    // list of unique terms contained in the query (stopped, analyzed)
    final String[] uniqueQueryTerms = this.queryTerms.stream().distinct().toArray(String[]::new);
    final int uniqueTermsCount = uniqueQueryTerms.length;

    // heavily based on code from org.apache.lucene.queries.CommonTermsQuery
    assert builder.reader != null;
    final List<LeafReaderContext> leaves = builder.reader.leaves();
    final int maxDoc = builder.reader.maxDoc();
    TermsEnum termsEnum = null;
    final List<Query> subQueries = new ArrayList<>(10);

    assert builder.fields != null;
    for (final String field : builder.fields) {
        final TermContext[] tcArray = new TermContext[uniqueTermsCount];
        final BooleanQuery lowFreq = new BooleanQuery();
        final BooleanQuery highFreq = new BooleanQuery();

        // collect term statistics
        for (int i = 0; i < uniqueTermsCount; i++) {
            final Term term = new Term(field, uniqueQueryTerms[i]);
            for (final LeafReaderContext context : leaves) {
                final TermContext termContext = tcArray[i];
                final Fields fields = context.reader().fields();
                final Terms terms = fields.terms(field);
                if (terms != null) {
                    // only, if field exists
                    termsEnum = terms.iterator(termsEnum);
                    if (termsEnum != TermsEnum.EMPTY) {
                        if (termsEnum.seekExact(term.bytes())) {
                            if (termContext == null) {
                                tcArray[i] = new TermContext(builder.reader.getContext(), termsEnum.termState(),
                                        context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                            } else {
                                termContext.register(termsEnum.termState(), context.ord, termsEnum.docFreq(),
                                        termsEnum.totalTermFreq());
                            }
                        }
                    }
                }
            }

            // build query
            if (tcArray[i] == null) {
                lowFreq.add(new TermQuery(term), builder.lowFreqOccur);
            } else {
                if ((builder.maxTermFrequency >= 1f && (float) tcArray[i].docFreq() > builder.maxTermFrequency)
                        || (tcArray[i].docFreq() > (int) Math
                                .ceil((double) (builder.maxTermFrequency * (float) maxDoc)))) {
                    highFreq.add(new TermQuery(term, tcArray[i]), builder.highFreqOccur);
                } else {
                    lowFreq.add(new TermQuery(term, tcArray[i]), builder.lowFreqOccur);
                }
            }

            final int numLowFreqClauses = lowFreq.clauses().size();
            final int numHighFreqClauses = highFreq.clauses().size();
            if (builder.lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
                lowFreq.setMinimumNumberShouldMatch(numLowFreqClauses);
            }
            if (builder.highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
                highFreq.setMinimumNumberShouldMatch(numHighFreqClauses);
            }
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("qLF={}", lowFreq);
            LOG.debug("qHF={}", highFreq);
        }

        if (lowFreq.clauses().isEmpty()) {
            subQueries.add(highFreq);
        } else if (highFreq.clauses().isEmpty()) {
            subQueries.add(lowFreq);
        } else {
            final BooleanQuery query = new BooleanQuery(true); // final query
            query.add(highFreq, Occur.SHOULD);
            query.add(lowFreq, Occur.MUST);
            subQueries.add(query);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("qList={}", subQueries);
    }

    this.query = subQueries.size() == 1 ? subQueries.get(0) : new DisjunctionMaxQuery(subQueries, 0.1f);

    if (LOG.isDebugEnabled()) {
        LOG.debug("RCTQ {} uQt={}", this.query, uniqueQueryTerms);
    }
}