Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:de.mirkosertic.easydav.index.QueryParser.java

License:Open Source License

public Query parse(String aQuery, String aSearchField) {

    BooleanQuery theResult = new BooleanQuery();

    boolean isStringMode = false;
    boolean isNegated = false;
    StringBuilder theCurrentTerm = new StringBuilder();

    for (int i = 0; i < aQuery.length(); i++) {
        char theCurrentChar = Character.toLowerCase(aQuery.charAt(i));
        if (theCurrentChar == '\"') {
            isStringMode = !isStringMode;
        } else {//from  w ww. ja va 2s  .  c  om
            if (!isStringMode) {
                switch (theCurrentChar) {
                case '-': {
                    if (theCurrentTerm.length() == 0) {
                        isNegated = true;
                    } else {
                        theCurrentTerm.append(theCurrentChar);
                    }
                    break;
                }
                case '+':
                    if (theCurrentTerm.length() == 0) {
                        isNegated = false;
                    } else {
                        theCurrentTerm.append(theCurrentChar);
                    }
                    break;
                case ' ': {
                    addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField);
                    theCurrentTerm = new StringBuilder();
                    isNegated = false;
                    break;
                }
                default: {
                    theCurrentTerm.append(theCurrentChar);
                    break;
                }
                }
            } else {
                theCurrentTerm.append(theCurrentChar);
            }
        }
    }

    if (theCurrentTerm.length() > 0) {
        addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField);
    }

    return theResult;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

private Query getRealQuery(SavedProfileSearch aRequest, Analyzer aAnalyzer) throws IOException, ParseException {

    BooleanQuery theQuery = new BooleanQuery();

    GoogleStyleQueryParser theParser = new GoogleStyleQueryParser(null);

    theQuery.add(theParser.parseQuery(aRequest.getProfileContent(), aAnalyzer, ProfileIndexerService.CONTENT),
            Occur.MUST);//from  w  w w  .  ja v  a 2s. com

    if (!StringUtils.isEmpty(aRequest.getPlz())) {
        theQuery.add(
                new WildcardQuery(new Term(ProfileIndexerService.PLZ, aRequest.getPlz().replace("%", "*"))),
                Occur.MUST);
    }

    return theQuery;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

@Override
public DataPage<ProfileSearchEntry> findProfileDataPage(SavedProfileSearch aRequest, int startRow, int pageSize)
        throws Exception {

    if (aRequest.getId() == null) {
        // Kann passieren, wenn die Suche das erste mal aufgerufen wird
        return new DataPage<ProfileSearchEntry>(0, 0, new ArrayList<ProfileSearchEntry>());
    }/* w  ww .j  a v a 2 s.  c  om*/

    Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer();

    FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession());

    Query theQuery = getRealQuery(aRequest, theAnalyzer);

    LOGGER.info("Search query is " + theQuery + " from " + startRow + " with pagesize " + pageSize);

    Highlighter theHighlighter = new Highlighter(new SpanGradientFormatter(1, "#000000", "#0000FF", null, null),
            new QueryScorer(theQuery));

    BooleanQuery theRealQuery = new BooleanQuery();
    theRealQuery.add(theQuery, Occur.MUST);

    if (aRequest != null) {
        for (String theId : aRequest.getProfilesToIgnore()) {
            theRealQuery.add(new TermQuery(new Term(ProfileIndexerService.UNIQUE_ID, theId)), Occur.MUST_NOT);
        }
    }

    LOGGER.info("Query with ignore is " + theRealQuery);

    Sort theSort = null;
    if (!StringUtils.isEmpty(aRequest.getSortierung())) {
        int theSortType = SortField.STRING;
        boolean theReverse = false;

        String theSortField = aRequest.getSortierungField();

        if (ProfileIndexerService.STUNDENSATZ.equals(theSortField)) {
            theSortType = SortField.LONG;
        }
        if (ProfileIndexerService.VERFUEGBARKEIT.equals(theSortField)) {
            theReverse = true;
        }
        if (ProfileIndexerService.LETZTERKONTAKT.equals(theSortField)) {
            theReverse = true;
        }

        if (aRequest.isSortierungReverse()) {
            theReverse = !theReverse;
        }

        theSort = new Sort(new SortField(theSortField, theSortType, theReverse));
    }

    List<Filter> theFilterList = new ArrayList<Filter>();
    TermsFilter theContactForbidden = new TermsFilter();
    theContactForbidden.addTerm(new Term(ProfileIndexerService.KONTAKTSPERRE, "false"));
    theFilterList.add(theContactForbidden);

    if (aRequest.getStundensatzVon() != null || aRequest.getStundensatzBis() != null) {
        if (aRequest.getStundensatzVon() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ,
                    aRequest.getStundensatzVon(), Long.MAX_VALUE, true, true));
        }
        if (aRequest.getStundensatzBis() != null) {
            theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ, 0l,
                    aRequest.getStundensatzBis(), true, true));
        }
    }

    Filter theFilter = new ChainedFilter(theFilterList.toArray(new Filter[theFilterList.size()]),
            ChainedFilter.AND);

    int theEnd = startRow + pageSize;

    FullTextQuery theHibernateQuery = theSession.createFullTextQuery(theRealQuery, Freelancer.class);
    if (theFilter != null) {
        theHibernateQuery.setFilter(theFilter);
    }
    if (theSort != null) {
        theHibernateQuery.setSort(theSort);
    }
    theHibernateQuery.setFirstResult(startRow);
    theHibernateQuery.setMaxResults(theEnd - startRow);
    theHibernateQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT);

    List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>();

    for (Object theSingleEntity : theHibernateQuery.list()) {
        Object[] theRow = (Object[]) theSingleEntity;
        Freelancer theFreelancer = (Freelancer) theRow[0];
        Document theDocument = (Document) theRow[1];
        ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theQuery, theHighlighter, theFreelancer,
                theDocument);

        theResult.add(theEntry);
    }

    return new DataPage<ProfileSearchEntry>(theHibernateQuery.getResultSize(), startRow, theResult);
}

From source file:de.spartusch.nasfvi.server.NQuery.java

License:Apache License

/**
 * Interprets the grammatical tense and extends the <code>query</code>
 * accordingly.//from  w  w w.jav  a  2 s .com
 * @param tense Grammatical tense to interpret
 * @param query Query to extend
 * @return Extended query according to the grammatical tense
 */
private static Query interpretTense(final Grammar.Tense tense, final Query query) {
    Semester now = new Semester();
    Query tenseQuery;

    switch (tense) {
    case pqperf:
        int year = new GregorianCalendar().get(GregorianCalendar.YEAR) - 1;
        tenseQuery = new TermRangeQuery("semester_end", "19700101", Integer.toString(year) + "0221", true,
                false);
        break;
    case perf:
        tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, false);
        break;
    case praet:
        tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, true);
        break;
    case praes:
        tenseQuery = new TermQuery(new Term("semester", now.getCanonical()));
        break;
    case fut1:
        tenseQuery = new TermRangeQuery("semester_end", now.getEnd(), "29991231", false, true);
        break;
    default:
        throw new AssertionError();
    }

    BooleanQuery booleanQuery = new BooleanQuery();
    booleanQuery.add(query, BooleanClause.Occur.MUST);
    booleanQuery.add(tenseQuery, BooleanClause.Occur.MUST);

    return booleanQuery;
}

From source file:de.spartusch.nasfvi.server.NSearcher.java

License:Apache License

/**
 * Searches the index using a {@link NQuery}.
 * @param nquery Query to search for/*from  ww w  .  j a v a2  s.  c o m*/
 * @param offset Offset to use for the search
 * @return Matching documents
 * @throws IOException if there is an IOException when accessing the index
 */
public final TopDocs search(final NQuery nquery, final int offset) throws IOException {
    Query q = nquery.getQuery();

    if (nquery.hasSimilarityQuery()) {
        Query similQuery = nquery.getSimilarityQuery();
        TopDocs similDocs = search(similQuery, 1);

        if (similDocs.totalHits == 0) {
            return new TopDocs(0, new ScoreDoc[0], 0f);
        }

        int similDocNum = similDocs.scoreDocs[0].doc;
        String similId = searcher.doc(similDocNum).get("id");
        Query exclude = new TermQuery(new Term("id", similId));
        // exclude the document compared to

        MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
        mlt.setFieldNames(SIMILARITY_FIELDS);
        Query moreLikeQuery = mlt.like(similDocNum);

        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(q, BooleanClause.Occur.MUST);
        booleanQuery.add(moreLikeQuery, BooleanClause.Occur.MUST);
        booleanQuery.add(exclude, BooleanClause.Occur.MUST_NOT);

        q = booleanQuery;
    }

    return search(q, offset + 5); // return top 5 results
}

From source file:de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.Finder.java

License:Apache License

/**
 * Find all n-grams containing these tokens in order but optionally with words between them.
 * /*ww  w. j a  v a2 s  .  co m*/
 * @param aToken
 *            A list of tokens
 * @return the n-grams.
 */
@SuppressWarnings("unchecked")
public List<NGramModel> find(String[] aToken) {
    BooleanQuery q = new BooleanQuery();
    PhraseQuery pq = new PhraseQuery();
    pq.setSlop((5 - aToken.length) >= 0 ? (5 - aToken.length) : 0); // max 5-grams in the web1t
    for (String t : aToken) {
        pq.add(new Term("gram", t.toLowerCase()));
        // q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST);
    }
    q.add(pq, Occur.MUST);

    String cacheKey = q.toString();

    if (ngramCache.containsKey(cacheKey)) {
        List<NGramModel> list = (List<NGramModel>) ngramCache.get(cacheKey);
        return list;
    }

    try {
        // System.out.printf("Searching [%s]... ", cacheKey);

        NGramCollector collector = new NGramCollector();
        // long start = System.currentTimeMillis();
        searcher.search(q, collector);
        List<NGramModel> ngrams = collector.getNgrams();

        ngramCache.put(cacheKey, ngrams);

        // long now = System.currentTimeMillis();
        // System.out.printf(" (%d in %dms)%n", ngrams.size(), now - start);
        // for (NGram ng : ngrams) {
        // System.out.printf("   %s%n", ng);
        // }

        return ngrams;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.web1t.Finder.java

License:Open Source License

/**
 * Find all n-grams in the index.//from   w ww.j a va 2s  . c  o m
 * @param token A list of tokens
 * @return
 */
public List<NGram> find(String[] token) {
    List<NGram> ngrams = new ArrayList<NGram>();

    BooleanQuery q = new BooleanQuery();
    for (String t : token) {
        q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST);
    }

    if (cache.containsKey(q.toString())) {
        return cache.get(q.toString());
    }

    try {
        ScoreDoc[] results = searcher.search(q, 100).scoreDocs;
        Document doc;

        for (ScoreDoc scoreDoc : results) {
            doc = searcher.doc(scoreDoc.doc);
            ngrams.add(new NGram(doc.get("gram"), Integer.valueOf(doc.get("freq"))));
        }
    } catch (IOException e) {
        // TODO: handle exception
        e.printStackTrace();
    }

    cache.put(q.toString(), ngrams);

    return ngrams;
}

From source file:de.twitterlivesearch.analysis.Searcher.java

License:Apache License

/**
 * This is the same as// w  w  w.java  2s .  c  om
 * {@link de.twitterlivesearch.analysis.Searcher#searchForTweets(String)
 * searchForTweets(String)}, but the search is limited to the tweet with the
 * given id. This can for example be used to analyze the latest incoming
 * tweet.
 *
 * @param id
 * @param queryString
 * @return
 */
public List<Document> searchForTweets(Integer id, String queryString) {
    if (queryString.isEmpty()) {
        return Collections.emptyList();
    }

    AbstractConfiguration config = ConfigurationHolder.getConfiguration();
    try {
        if (!DirectoryReader.indexExists(directory)) {
            return null;
        }
    } catch (IOException e) {
        log.fatal("Error when trying to check if directory exists!", e);
        return new ArrayList<>();
    }
    DirectoryReader ireader;
    try {
        ireader = DirectoryReader.open(directory);
    } catch (IOException e) {
        log.fatal("Error when trying to open directory!", e);
        return null;
    }

    IndexSearcher isearcher = new IndexSearcher(ireader);
    Query textQuery = null;
    QueryParser parser = new QueryParser(FieldNames.TEXT.getField(),
            AnalyzerMapping.getInstance().ANALYZER_FOR_DELIMITER);
    parser.setDefaultOperator(config.getDefaultOperator());
    BooleanQuery query = new BooleanQuery();
    try {
        textQuery = parser.parse(queryString);
    } catch (ParseException e) {
        log.fatal("Error while parsing query: " + queryString, e);
    }

    // if id does not equal null only the query with the given id will be
    // searched
    // this can be used to search the latest element only
    if (id != null) {
        Query idQuery = NumericRangeQuery.newIntRange(FieldNames.ID.getField(), id.intValue(), id.intValue(),
                true, true);
        query.add(idQuery, Occur.MUST);
    }
    query.add(textQuery, Occur.MUST);
    ScoreDoc[] hits = null;
    try {
        hits = isearcher.search(query, 1000).scoreDocs;
    } catch (IOException e) {
        log.fatal("Error while trying to search!", e);
    }
    List<Document> result = new ArrayList<>();
    for (int i = 0; i < hits.length; i++) {
        try {
            result.add(isearcher.doc(hits[i].doc));
            log.info("Found result for query \"" + queryString + "\".");
        } catch (IOException e) {
            log.fatal("Error when getting document!", e);
        }
    }
    return result;
}

From source file:de.unihildesheim.iw.cli.DumpIPCs.java

License:Open Source License

private void runMain(final String... args) throws IOException, BuildException {
    new CmdLineParser(this.cliParams);
    parseWithHelp(this.cliParams, args);

    // check, if files and directories are sane
    this.cliParams.check();

    assert this.cliParams.idxReader != null;
    final int maxDoc = this.cliParams.idxReader.maxDoc();
    if (maxDoc == 0) {
        LOG.error("Empty index.");
        return;// w  w  w.  ja  v  a2 s .  c o  m
    }

    final Parser ipcParser = new Parser();
    ipcParser.separatorChar(this.cliParams.sep);
    ipcParser.allowZeroPad(this.cliParams.zeroPad);

    final DirectoryReader reader = DirectoryReader.open(FSDirectory.open(this.cliParams.idxDir.toPath()));
    final Builder idxReaderBuilder = new Builder(reader);

    Pattern rx_ipc = null;

    if (this.cliParams.ipc != null) {
        final IPCRecord ipc = ipcParser.parse(this.cliParams.ipc);
        final BooleanQuery bq = new BooleanQuery();
        rx_ipc = Pattern.compile(ipc.toRegExpString(this.cliParams.sep));
        if (LOG.isDebugEnabled()) {
            LOG.debug("IPC regExp: rx={} pat={}", ipc.toRegExpString(this.cliParams.sep), rx_ipc);
        }

        bq.add(new QueryWrapperFilter(IPCClassQuery.get(ipc, this.cliParams.sep)), Occur.MUST);
        bq.add(new QueryWrapperFilter(
                new IPCFieldFilter(new IPCFieldFilterFunctions.SloppyMatch(ipc), ipcParser)), Occur.MUST);
        idxReaderBuilder.queryFilter(new QueryWrapperFilter(bq));
    }

    final IndexReader idxReader = idxReaderBuilder.build();

    if (idxReader.numDocs() > 0) {
        final Terms terms = MultiFields.getTerms(idxReader, LUCENE_CONF.FLD_IPC);
        TermsEnum termsEnum = TermsEnum.EMPTY;
        BytesRef term;
        if (terms != null) {
            termsEnum = terms.iterator(termsEnum);
            term = termsEnum.next();

            final int[] count = { 0, 0 }; // match, exclude
            while (term != null) {
                final String code = term.utf8ToString();
                if (rx_ipc == null || (rx_ipc.matcher(code).matches())) {
                    final IPCRecord record = ipcParser.parse(code);
                    try {
                        System.out.println(code + ' ' + record + " (" + record.toFormattedString() + ") " + '['
                                + record.toRegExpString('-') + ']');
                    } catch (final IllegalArgumentException e) {
                        System.out.println(code + ' ' + "INVALID (" + code + ')');
                    }
                    count[0]++;
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Skip non matching IPC: {}", code);
                    }
                    count[1]++;
                }
                term = termsEnum.next();
            }
            LOG.info("match={} skip={}", count[0], count[1]);
        }
    } else {
        LOG.info("No documents left after filtering.");
    }
}

From source file:de.unihildesheim.iw.lucene.query.RelaxableCommonTermsQuery.java

License:Open Source License

/**
 * New instance using settings from the supplied {@link Builder} instance.
 *
 * @param builder {@link Builder} Instance builder
 * @throws IOException Thrown on low-level i/o-errors
 *///from   w  w  w .  j  av a  2s  . c om
@SuppressWarnings({ "ObjectAllocationInLoop", "ObjectEquality" })
RelaxableCommonTermsQuery(@NotNull final Builder builder) throws IOException {
    // get all query terms
    assert builder.queryStr != null;
    assert builder.analyzer != null;
    this.queryTerms = QueryUtils.tokenizeQueryString(builder.queryStr, builder.analyzer);

    // list of unique terms contained in the query (stopped, analyzed)
    final String[] uniqueQueryTerms = this.queryTerms.stream().distinct().toArray(String[]::new);
    final int uniqueTermsCount = uniqueQueryTerms.length;

    // heavily based on code from org.apache.lucene.queries.CommonTermsQuery
    assert builder.reader != null;
    final List<LeafReaderContext> leaves = builder.reader.leaves();
    final int maxDoc = builder.reader.maxDoc();
    TermsEnum termsEnum = null;
    final List<Query> subQueries = new ArrayList<>(10);

    assert builder.fields != null;
    for (final String field : builder.fields) {
        final TermContext[] tcArray = new TermContext[uniqueTermsCount];
        final BooleanQuery lowFreq = new BooleanQuery();
        final BooleanQuery highFreq = new BooleanQuery();

        // collect term statistics
        for (int i = 0; i < uniqueTermsCount; i++) {
            final Term term = new Term(field, uniqueQueryTerms[i]);
            for (final LeafReaderContext context : leaves) {
                final TermContext termContext = tcArray[i];
                final Fields fields = context.reader().fields();
                final Terms terms = fields.terms(field);
                if (terms != null) {
                    // only, if field exists
                    termsEnum = terms.iterator(termsEnum);
                    if (termsEnum != TermsEnum.EMPTY) {
                        if (termsEnum.seekExact(term.bytes())) {
                            if (termContext == null) {
                                tcArray[i] = new TermContext(builder.reader.getContext(), termsEnum.termState(),
                                        context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                            } else {
                                termContext.register(termsEnum.termState(), context.ord, termsEnum.docFreq(),
                                        termsEnum.totalTermFreq());
                            }
                        }
                    }
                }
            }

            // build query
            if (tcArray[i] == null) {
                lowFreq.add(new TermQuery(term), builder.lowFreqOccur);
            } else {
                if ((builder.maxTermFrequency >= 1f && (float) tcArray[i].docFreq() > builder.maxTermFrequency)
                        || (tcArray[i].docFreq() > (int) Math
                                .ceil((double) (builder.maxTermFrequency * (float) maxDoc)))) {
                    highFreq.add(new TermQuery(term, tcArray[i]), builder.highFreqOccur);
                } else {
                    lowFreq.add(new TermQuery(term, tcArray[i]), builder.lowFreqOccur);
                }
            }

            final int numLowFreqClauses = lowFreq.clauses().size();
            final int numHighFreqClauses = highFreq.clauses().size();
            if (builder.lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
                lowFreq.setMinimumNumberShouldMatch(numLowFreqClauses);
            }
            if (builder.highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
                highFreq.setMinimumNumberShouldMatch(numHighFreqClauses);
            }
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("qLF={}", lowFreq);
            LOG.debug("qHF={}", highFreq);
        }

        if (lowFreq.clauses().isEmpty()) {
            subQueries.add(highFreq);
        } else if (highFreq.clauses().isEmpty()) {
            subQueries.add(lowFreq);
        } else {
            final BooleanQuery query = new BooleanQuery(true); // final query
            query.add(highFreq, Occur.SHOULD);
            query.add(lowFreq, Occur.MUST);
            subQueries.add(query);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("qList={}", subQueries);
    }

    this.query = subQueries.size() == 1 ? subQueries.get(0) : new DisjunctionMaxQuery(subQueries, 0.1f);

    if (LOG.isDebugEnabled()) {
        LOG.debug("RCTQ {} uQt={}", this.query, uniqueQueryTerms);
    }
}