List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:de.mirkosertic.easydav.index.QueryParser.java
License:Open Source License
public Query parse(String aQuery, String aSearchField) { BooleanQuery theResult = new BooleanQuery(); boolean isStringMode = false; boolean isNegated = false; StringBuilder theCurrentTerm = new StringBuilder(); for (int i = 0; i < aQuery.length(); i++) { char theCurrentChar = Character.toLowerCase(aQuery.charAt(i)); if (theCurrentChar == '\"') { isStringMode = !isStringMode; } else {//from w ww. ja va 2s . c om if (!isStringMode) { switch (theCurrentChar) { case '-': { if (theCurrentTerm.length() == 0) { isNegated = true; } else { theCurrentTerm.append(theCurrentChar); } break; } case '+': if (theCurrentTerm.length() == 0) { isNegated = false; } else { theCurrentTerm.append(theCurrentChar); } break; case ' ': { addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField); theCurrentTerm = new StringBuilder(); isNegated = false; break; } default: { theCurrentTerm.append(theCurrentChar); break; } } } else { theCurrentTerm.append(theCurrentChar); } } } if (theCurrentTerm.length() > 0) { addSubQuery(theResult, theCurrentTerm.toString(), isNegated, aSearchField); } return theResult; }
From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java
License:Open Source License
private Query getRealQuery(SavedProfileSearch aRequest, Analyzer aAnalyzer) throws IOException, ParseException { BooleanQuery theQuery = new BooleanQuery(); GoogleStyleQueryParser theParser = new GoogleStyleQueryParser(null); theQuery.add(theParser.parseQuery(aRequest.getProfileContent(), aAnalyzer, ProfileIndexerService.CONTENT), Occur.MUST);//from w w w . ja v a 2s. com if (!StringUtils.isEmpty(aRequest.getPlz())) { theQuery.add( new WildcardQuery(new Term(ProfileIndexerService.PLZ, aRequest.getPlz().replace("%", "*"))), Occur.MUST); } return theQuery; }
From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java
License:Open Source License
@Override public DataPage<ProfileSearchEntry> findProfileDataPage(SavedProfileSearch aRequest, int startRow, int pageSize) throws Exception { if (aRequest.getId() == null) { // Kann passieren, wenn die Suche das erste mal aufgerufen wird return new DataPage<ProfileSearchEntry>(0, 0, new ArrayList<ProfileSearchEntry>()); }/* w ww .j a v a 2 s. c om*/ Analyzer theAnalyzer = ProfileAnalyzerFactory.createAnalyzer(); FullTextSession theSession = Search.getFullTextSession(sessionFactory.getCurrentSession()); Query theQuery = getRealQuery(aRequest, theAnalyzer); LOGGER.info("Search query is " + theQuery + " from " + startRow + " with pagesize " + pageSize); Highlighter theHighlighter = new Highlighter(new SpanGradientFormatter(1, "#000000", "#0000FF", null, null), new QueryScorer(theQuery)); BooleanQuery theRealQuery = new BooleanQuery(); theRealQuery.add(theQuery, Occur.MUST); if (aRequest != null) { for (String theId : aRequest.getProfilesToIgnore()) { theRealQuery.add(new TermQuery(new Term(ProfileIndexerService.UNIQUE_ID, theId)), Occur.MUST_NOT); } } LOGGER.info("Query with ignore is " + theRealQuery); Sort theSort = null; if (!StringUtils.isEmpty(aRequest.getSortierung())) { int theSortType = SortField.STRING; boolean theReverse = false; String theSortField = aRequest.getSortierungField(); if (ProfileIndexerService.STUNDENSATZ.equals(theSortField)) { theSortType = SortField.LONG; } if (ProfileIndexerService.VERFUEGBARKEIT.equals(theSortField)) { theReverse = true; } if (ProfileIndexerService.LETZTERKONTAKT.equals(theSortField)) { theReverse = true; } if (aRequest.isSortierungReverse()) { theReverse = !theReverse; } theSort = new Sort(new SortField(theSortField, theSortType, theReverse)); } List<Filter> theFilterList = new ArrayList<Filter>(); TermsFilter theContactForbidden = new TermsFilter(); theContactForbidden.addTerm(new Term(ProfileIndexerService.KONTAKTSPERRE, "false")); theFilterList.add(theContactForbidden); if (aRequest.getStundensatzVon() != null || aRequest.getStundensatzBis() != null) { if (aRequest.getStundensatzVon() != null) { theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ, aRequest.getStundensatzVon(), Long.MAX_VALUE, true, true)); } if (aRequest.getStundensatzBis() != null) { theFilterList.add(NumericRangeFilter.newLongRange(ProfileIndexerService.STUNDENSATZ, 0l, aRequest.getStundensatzBis(), true, true)); } } Filter theFilter = new ChainedFilter(theFilterList.toArray(new Filter[theFilterList.size()]), ChainedFilter.AND); int theEnd = startRow + pageSize; FullTextQuery theHibernateQuery = theSession.createFullTextQuery(theRealQuery, Freelancer.class); if (theFilter != null) { theHibernateQuery.setFilter(theFilter); } if (theSort != null) { theHibernateQuery.setSort(theSort); } theHibernateQuery.setFirstResult(startRow); theHibernateQuery.setMaxResults(theEnd - startRow); theHibernateQuery.setProjection(FullTextQuery.THIS, FullTextQuery.DOCUMENT); List<ProfileSearchEntry> theResult = new ArrayList<ProfileSearchEntry>(); for (Object theSingleEntity : theHibernateQuery.list()) { Object[] theRow = (Object[]) theSingleEntity; Freelancer theFreelancer = (Freelancer) theRow[0]; Document theDocument = (Document) theRow[1]; ProfileSearchEntry theEntry = createResultEntry(theAnalyzer, theQuery, theHighlighter, theFreelancer, theDocument); theResult.add(theEntry); } return new DataPage<ProfileSearchEntry>(theHibernateQuery.getResultSize(), startRow, theResult); }
From source file:de.spartusch.nasfvi.server.NQuery.java
License:Apache License
/** * Interprets the grammatical tense and extends the <code>query</code> * accordingly.//from w w w.jav a 2 s .com * @param tense Grammatical tense to interpret * @param query Query to extend * @return Extended query according to the grammatical tense */ private static Query interpretTense(final Grammar.Tense tense, final Query query) { Semester now = new Semester(); Query tenseQuery; switch (tense) { case pqperf: int year = new GregorianCalendar().get(GregorianCalendar.YEAR) - 1; tenseQuery = new TermRangeQuery("semester_end", "19700101", Integer.toString(year) + "0221", true, false); break; case perf: tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, false); break; case praet: tenseQuery = new TermRangeQuery("semester_beg", "19700101", now.getBegin(), true, true); break; case praes: tenseQuery = new TermQuery(new Term("semester", now.getCanonical())); break; case fut1: tenseQuery = new TermRangeQuery("semester_end", now.getEnd(), "29991231", false, true); break; default: throw new AssertionError(); } BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(query, BooleanClause.Occur.MUST); booleanQuery.add(tenseQuery, BooleanClause.Occur.MUST); return booleanQuery; }
From source file:de.spartusch.nasfvi.server.NSearcher.java
License:Apache License
/** * Searches the index using a {@link NQuery}. * @param nquery Query to search for/*from ww w . j a v a2 s. c o m*/ * @param offset Offset to use for the search * @return Matching documents * @throws IOException if there is an IOException when accessing the index */ public final TopDocs search(final NQuery nquery, final int offset) throws IOException { Query q = nquery.getQuery(); if (nquery.hasSimilarityQuery()) { Query similQuery = nquery.getSimilarityQuery(); TopDocs similDocs = search(similQuery, 1); if (similDocs.totalHits == 0) { return new TopDocs(0, new ScoreDoc[0], 0f); } int similDocNum = similDocs.scoreDocs[0].doc; String similId = searcher.doc(similDocNum).get("id"); Query exclude = new TermQuery(new Term("id", similId)); // exclude the document compared to MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); mlt.setFieldNames(SIMILARITY_FIELDS); Query moreLikeQuery = mlt.like(similDocNum); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(q, BooleanClause.Occur.MUST); booleanQuery.add(moreLikeQuery, BooleanClause.Occur.MUST); booleanQuery.add(exclude, BooleanClause.Occur.MUST_NOT); q = booleanQuery; } return search(q, offset + 5); // return top 5 results }
From source file:de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.Finder.java
License:Apache License
/** * Find all n-grams containing these tokens in order but optionally with words between them. * /*ww w. j a v a2 s . co m*/ * @param aToken * A list of tokens * @return the n-grams. */ @SuppressWarnings("unchecked") public List<NGramModel> find(String[] aToken) { BooleanQuery q = new BooleanQuery(); PhraseQuery pq = new PhraseQuery(); pq.setSlop((5 - aToken.length) >= 0 ? (5 - aToken.length) : 0); // max 5-grams in the web1t for (String t : aToken) { pq.add(new Term("gram", t.toLowerCase())); // q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST); } q.add(pq, Occur.MUST); String cacheKey = q.toString(); if (ngramCache.containsKey(cacheKey)) { List<NGramModel> list = (List<NGramModel>) ngramCache.get(cacheKey); return list; } try { // System.out.printf("Searching [%s]... ", cacheKey); NGramCollector collector = new NGramCollector(); // long start = System.currentTimeMillis(); searcher.search(q, collector); List<NGramModel> ngrams = collector.getNgrams(); ngramCache.put(cacheKey, ngrams); // long now = System.currentTimeMillis(); // System.out.printf(" (%d in %dms)%n", ngrams.size(), now - start); // for (NGram ng : ngrams) { // System.out.printf(" %s%n", ng); // } return ngrams; } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.web1t.Finder.java
License:Open Source License
/** * Find all n-grams in the index.//from w ww.j a va 2s . c o m * @param token A list of tokens * @return */ public List<NGram> find(String[] token) { List<NGram> ngrams = new ArrayList<NGram>(); BooleanQuery q = new BooleanQuery(); for (String t : token) { q.add(new TermQuery(new Term("gram", t.toLowerCase())), Occur.MUST); } if (cache.containsKey(q.toString())) { return cache.get(q.toString()); } try { ScoreDoc[] results = searcher.search(q, 100).scoreDocs; Document doc; for (ScoreDoc scoreDoc : results) { doc = searcher.doc(scoreDoc.doc); ngrams.add(new NGram(doc.get("gram"), Integer.valueOf(doc.get("freq")))); } } catch (IOException e) { // TODO: handle exception e.printStackTrace(); } cache.put(q.toString(), ngrams); return ngrams; }
From source file:de.twitterlivesearch.analysis.Searcher.java
License:Apache License
/** * This is the same as// w w w.java 2s . c om * {@link de.twitterlivesearch.analysis.Searcher#searchForTweets(String) * searchForTweets(String)}, but the search is limited to the tweet with the * given id. This can for example be used to analyze the latest incoming * tweet. * * @param id * @param queryString * @return */ public List<Document> searchForTweets(Integer id, String queryString) { if (queryString.isEmpty()) { return Collections.emptyList(); } AbstractConfiguration config = ConfigurationHolder.getConfiguration(); try { if (!DirectoryReader.indexExists(directory)) { return null; } } catch (IOException e) { log.fatal("Error when trying to check if directory exists!", e); return new ArrayList<>(); } DirectoryReader ireader; try { ireader = DirectoryReader.open(directory); } catch (IOException e) { log.fatal("Error when trying to open directory!", e); return null; } IndexSearcher isearcher = new IndexSearcher(ireader); Query textQuery = null; QueryParser parser = new QueryParser(FieldNames.TEXT.getField(), AnalyzerMapping.getInstance().ANALYZER_FOR_DELIMITER); parser.setDefaultOperator(config.getDefaultOperator()); BooleanQuery query = new BooleanQuery(); try { textQuery = parser.parse(queryString); } catch (ParseException e) { log.fatal("Error while parsing query: " + queryString, e); } // if id does not equal null only the query with the given id will be // searched // this can be used to search the latest element only if (id != null) { Query idQuery = NumericRangeQuery.newIntRange(FieldNames.ID.getField(), id.intValue(), id.intValue(), true, true); query.add(idQuery, Occur.MUST); } query.add(textQuery, Occur.MUST); ScoreDoc[] hits = null; try { hits = isearcher.search(query, 1000).scoreDocs; } catch (IOException e) { log.fatal("Error while trying to search!", e); } List<Document> result = new ArrayList<>(); for (int i = 0; i < hits.length; i++) { try { result.add(isearcher.doc(hits[i].doc)); log.info("Found result for query \"" + queryString + "\"."); } catch (IOException e) { log.fatal("Error when getting document!", e); } } return result; }
From source file:de.unihildesheim.iw.cli.DumpIPCs.java
License:Open Source License
private void runMain(final String... args) throws IOException, BuildException { new CmdLineParser(this.cliParams); parseWithHelp(this.cliParams, args); // check, if files and directories are sane this.cliParams.check(); assert this.cliParams.idxReader != null; final int maxDoc = this.cliParams.idxReader.maxDoc(); if (maxDoc == 0) { LOG.error("Empty index."); return;// w w w. ja v a2 s . c o m } final Parser ipcParser = new Parser(); ipcParser.separatorChar(this.cliParams.sep); ipcParser.allowZeroPad(this.cliParams.zeroPad); final DirectoryReader reader = DirectoryReader.open(FSDirectory.open(this.cliParams.idxDir.toPath())); final Builder idxReaderBuilder = new Builder(reader); Pattern rx_ipc = null; if (this.cliParams.ipc != null) { final IPCRecord ipc = ipcParser.parse(this.cliParams.ipc); final BooleanQuery bq = new BooleanQuery(); rx_ipc = Pattern.compile(ipc.toRegExpString(this.cliParams.sep)); if (LOG.isDebugEnabled()) { LOG.debug("IPC regExp: rx={} pat={}", ipc.toRegExpString(this.cliParams.sep), rx_ipc); } bq.add(new QueryWrapperFilter(IPCClassQuery.get(ipc, this.cliParams.sep)), Occur.MUST); bq.add(new QueryWrapperFilter( new IPCFieldFilter(new IPCFieldFilterFunctions.SloppyMatch(ipc), ipcParser)), Occur.MUST); idxReaderBuilder.queryFilter(new QueryWrapperFilter(bq)); } final IndexReader idxReader = idxReaderBuilder.build(); if (idxReader.numDocs() > 0) { final Terms terms = MultiFields.getTerms(idxReader, LUCENE_CONF.FLD_IPC); TermsEnum termsEnum = TermsEnum.EMPTY; BytesRef term; if (terms != null) { termsEnum = terms.iterator(termsEnum); term = termsEnum.next(); final int[] count = { 0, 0 }; // match, exclude while (term != null) { final String code = term.utf8ToString(); if (rx_ipc == null || (rx_ipc.matcher(code).matches())) { final IPCRecord record = ipcParser.parse(code); try { System.out.println(code + ' ' + record + " (" + record.toFormattedString() + ") " + '[' + record.toRegExpString('-') + ']'); } catch (final IllegalArgumentException e) { System.out.println(code + ' ' + "INVALID (" + code + ')'); } count[0]++; } else { if (LOG.isDebugEnabled()) { LOG.debug("Skip non matching IPC: {}", code); } count[1]++; } term = termsEnum.next(); } LOG.info("match={} skip={}", count[0], count[1]); } } else { LOG.info("No documents left after filtering."); } }
From source file:de.unihildesheim.iw.lucene.query.RelaxableCommonTermsQuery.java
License:Open Source License
/** * New instance using settings from the supplied {@link Builder} instance. * * @param builder {@link Builder} Instance builder * @throws IOException Thrown on low-level i/o-errors *///from w w w . j av a 2s . c om @SuppressWarnings({ "ObjectAllocationInLoop", "ObjectEquality" }) RelaxableCommonTermsQuery(@NotNull final Builder builder) throws IOException { // get all query terms assert builder.queryStr != null; assert builder.analyzer != null; this.queryTerms = QueryUtils.tokenizeQueryString(builder.queryStr, builder.analyzer); // list of unique terms contained in the query (stopped, analyzed) final String[] uniqueQueryTerms = this.queryTerms.stream().distinct().toArray(String[]::new); final int uniqueTermsCount = uniqueQueryTerms.length; // heavily based on code from org.apache.lucene.queries.CommonTermsQuery assert builder.reader != null; final List<LeafReaderContext> leaves = builder.reader.leaves(); final int maxDoc = builder.reader.maxDoc(); TermsEnum termsEnum = null; final List<Query> subQueries = new ArrayList<>(10); assert builder.fields != null; for (final String field : builder.fields) { final TermContext[] tcArray = new TermContext[uniqueTermsCount]; final BooleanQuery lowFreq = new BooleanQuery(); final BooleanQuery highFreq = new BooleanQuery(); // collect term statistics for (int i = 0; i < uniqueTermsCount; i++) { final Term term = new Term(field, uniqueQueryTerms[i]); for (final LeafReaderContext context : leaves) { final TermContext termContext = tcArray[i]; final Fields fields = context.reader().fields(); final Terms terms = fields.terms(field); if (terms != null) { // only, if field exists termsEnum = terms.iterator(termsEnum); if (termsEnum != TermsEnum.EMPTY) { if (termsEnum.seekExact(term.bytes())) { if (termContext == null) { tcArray[i] = new TermContext(builder.reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { termContext.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } } } // build query if (tcArray[i] == null) { lowFreq.add(new TermQuery(term), builder.lowFreqOccur); } else { if ((builder.maxTermFrequency >= 1f && (float) tcArray[i].docFreq() > builder.maxTermFrequency) || (tcArray[i].docFreq() > (int) Math .ceil((double) (builder.maxTermFrequency * (float) maxDoc)))) { highFreq.add(new TermQuery(term, tcArray[i]), builder.highFreqOccur); } else { lowFreq.add(new TermQuery(term, tcArray[i]), builder.lowFreqOccur); } } final int numLowFreqClauses = lowFreq.clauses().size(); final int numHighFreqClauses = highFreq.clauses().size(); if (builder.lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) { lowFreq.setMinimumNumberShouldMatch(numLowFreqClauses); } if (builder.highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) { highFreq.setMinimumNumberShouldMatch(numHighFreqClauses); } } if (LOG.isDebugEnabled()) { LOG.debug("qLF={}", lowFreq); LOG.debug("qHF={}", highFreq); } if (lowFreq.clauses().isEmpty()) { subQueries.add(highFreq); } else if (highFreq.clauses().isEmpty()) { subQueries.add(lowFreq); } else { final BooleanQuery query = new BooleanQuery(true); // final query query.add(highFreq, Occur.SHOULD); query.add(lowFreq, Occur.MUST); subQueries.add(query); } } if (LOG.isDebugEnabled()) { LOG.debug("qList={}", subQueries); } this.query = subQueries.size() == 1 ? subQueries.get(0) : new DisjunctionMaxQuery(subQueries, 0.1f); if (LOG.isDebugEnabled()) { LOG.debug("RCTQ {} uQt={}", this.query, uniqueQueryTerms); } }