List of usage examples for org.apache.lucene.queryparser.classic QueryParserBase escape
public static String escape(String s)
\. From source file:com.berico.clavin.resolver.impl.lucene.LuceneLocationNameIndex.java
License:Apache License
/** * Return a list of Resolved Locations that best match the Location Occurrence * found in a document./*from www . j av a 2s . c o m*/ * @param occurrence The Location Occurrence. * @param options Options for the index. * @return List of Resolved Locations matching the occurrence. */ @Override public List<ResolvedLocation> search(LocationOccurrence occurrence, Options options) throws Exception { options = (options == null) ? new Options() : options; // Get the max number of records to return. int limit = options.getInt(KEY_DEFAULT_LIMIT, DEFAULT_LIMIT); // Get whether fuzzy matching is enabled. boolean useFuzzy = options.getBoolean(KEY_DEFAULT_USE_FUZZY, DEFAULT_USE_FUZZY); IndexSearcher searcher = lucene.getSearcherManager().acquire(); boolean usedFuzzy = false; // We need to sanitize the name so it doesn't have unescaped Lucene syntax that // would throw off the search index. String escapedName = QueryParserBase.escape(occurrence.getText().toLowerCase()); // Try an exact query Query query = getExactQuery(escapedName); // Gather the results. TopDocs results = searcher.search(query, null, limit, DEFAULT_SORTER); // If there are no results, and a fuzzy query was requested if (results.scoreDocs.length == 0 && useFuzzy) { usedFuzzy = true; // Attempt a fuzzy query query = getFuzzyQuery(escapedName); // Gather the results results = searcher.search(query, null, limit, DEFAULT_SORTER); } if (results.scoreDocs.length == 0) logger.info("Found no results for {}.", escapedName); return LuceneUtils.convertToLocations(occurrence, searcher, results, usedFuzzy); }
From source file:controllers.SkosToElasticsearch.java
License:Open Source License
/** * @param q the q string will passed to elasticsearch as a queryStringQuery * @param index the index to search in//from w w w .j a v a 2 s. c o m * @return an array of documents */ public CompletionStage<Result> search(String q, String index) { response().setHeader("content-type", "application/json"); String escaped_q = QueryParserBase.escape(q); CompletableFuture<Result> future = new CompletableFuture<>(); SearchHits hits = esb.getInstance().query(index, escaped_q, 0, 10); List<SearchHit> list = Arrays.asList(hits.getHits()); List<Map<String, Object>> hitMap = new ArrayList<>(); for (SearchHit hit : list) { Map<String, Object> m = hit.getSource(); m.put("primaryTopic", hit.getId()); hitMap.add(m); } future.complete(ok(SkosToElasticsearch.json(hitMap))); return future; }
From source file:de.unihildesheim.iw.lucene.query.TryExactTermsQuery.java
License:Open Source License
/** * New instance using the supplied query. * * @param analyzer Query analyzer/*from w w w . j a va 2 s. co m*/ * @param queryStr Query string * @param fields Fields to query * @throws ParseException Thrown, if the query could not be parsed */ public TryExactTermsQuery(@NotNull final Analyzer analyzer, @NotNull final String queryStr, @NotNull final String... fields) throws ParseException { if (fields.length == 0) { throw new IllegalArgumentException("Empty fields list."); } if (StringUtils.isStrippedEmpty(queryStr)) { throw new IllegalArgumentException("Empty query."); } this.queryTerms = QueryUtils.tokenizeQueryString(queryStr, analyzer); final QueryParser qParser = new MultiFieldQueryParser(fields, analyzer); this.query = new BooleanQuery(); this.uniqueQueryTerms = new HashSet<>(this.queryTerms); for (final String term : this.uniqueQueryTerms) { @SuppressWarnings("ObjectAllocationInLoop") final BooleanClause bc = new BooleanClause(qParser.parse(QueryParserBase.escape(term)), Occur.SHOULD); this.query.add(bc); } this.query.setMinimumNumberShouldMatch(this.uniqueQueryTerms.size()); if (LOG.isDebugEnabled()) { LOG.debug("TEQ {} uQt={}", this.query, this.uniqueQueryTerms); } }
From source file:ie.cmrc.smtx.skos.index.lucene.LuceneSKOSIndex.java
License:Apache License
/** * Constructs a query string for the provided keyword and search field * @param keyword A (string keyword, language code) pair * @param field Search field/*from w ww .j a v a 2 s .c o m*/ * @return Lucene query representation of the provided keyword and search field */ private String getQueryString(Term keyword, IndexField.Searchable field) { if (keyword != null) { String kw = keyword.getString(); if (kw != null && (kw = kw.trim()).length() >= this.minKeywordLength) { String queryString; kw = QueryParserBase.escape(kw); String language = keyword.getLanguage(); if (language == null) { /*queryString = "("+IndexField.Property.NAME.fieldName()+":\""+kw+"\") OR (" +IndexField.Property.URI.fieldName()+":\""+kw+"\")";*/ // Search in all languages queryString = ""; int i = 0; for (String lang : this.languages) { if (i > 0) queryString += " OR "; queryString += this.getQueryString(kw, lang, field); i++; } } else { queryString = this.getQueryString(kw, language, field); } return queryString; } } return null; }
From source file:it.unipd.dei.ims.lucene.clef.parser.ClefQQParser.java
License:Apache License
@Override public Query parse(QualityQuery qq) throws ParseException { QueryParser qp = queryParser.get();//ww w . j a va2 s. c o m if (qp == null) { Analyzer analyzer = AnalyzerFactory.createAnalyzer(language, stemmer, stopset); qp = new QueryParser(fieldToSearch, analyzer); queryParser.set(qp); } BooleanQuery bq = new BooleanQuery(); for (int i = 0; i < qqFields.length; i++) bq.add(qp.parse(QueryParserBase.escape(qq.getValue(qqFields[i]))), BooleanClause.Occur.SHOULD); return bq; }
From source file:org.apache.ctakes.dictionary.lookup.lucene.LuceneDictionaryImpl.java
License:Apache License
/** * {@inheritDoc}// ww w. j a v a 2s .c o m */ @Override public Collection<MetaDataHit> getEntries(final String text) throws DictionaryException { final Set<MetaDataHit> metaDataHitSet = new HashSet<>(); try { Query q = null; TopDocs topDoc = null; if (text.indexOf('-') == -1) { q = new TermQuery(new Term(iv_lookupFieldName, text)); topDoc = iv_searcher.search(q, iv_maxHits); } else { // needed the KeyworkAnalyzer for situations where the hypen was included in the f-word final QueryParser query = new QueryParser(Version.LUCENE_40, iv_lookupFieldName, new KeywordAnalyzer()); try { //CTAKES-63 - I believe all of the chars in the str token should be escaped to avoid issues such as a token ending with ']' //topDoc = iv_searcher.search(query.parse(text.replace('-', ' ')), iv_maxHits); final String escaped = QueryParserBase.escape(text.replace('-', ' ')); topDoc = iv_searcher.search(query.parse(escaped), iv_maxHits); } catch (ParseException e) { // thrown by QueryParser.parse() // TODO Auto-generated catch block e.printStackTrace(); } } if (topDoc == null) { // avoids possible NPE on topDoc.scoreDocs 12-26-2012 SPF iv_logger.warn(getClass().getName() + " getEntries(..) topDoc is null, returning empty collection"); return Collections.emptySet(); } if (iv_maxHits == 0) { iv_maxHits = Integer.MAX_VALUE; iv_logger.warn("iv_maxHits was 0, using Integer.MAX_VALUE instead"); } final ScoreDoc[] hits = topDoc.scoreDocs; if (hits.length == iv_maxHits) { iv_logger.warn( "'iv_maxHits' equals the list length returned by the lucene query (" + hits.length + ")."); iv_logger.warn( "You may want to consider setting a higher value, since there may be more entries not being returned in the event greater than " + iv_maxHits + " exist."); } for (ScoreDoc scoreDoc : hits) { final Document luceneDoc = iv_searcher.doc(scoreDoc.doc); final MetaDataHit mdh = new LuceneDocumentMetaDataHitImpl(luceneDoc); metaDataHitSet.add(mdh); } return metaDataHitSet; } catch (IOException ioe) { // thrown by IndexSearcher.search(), IndexSearcher.doc() throw new DictionaryException(ioe); } }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); final Sort sort = getSort(plan); final PlanResult pr = getPlanResult(plan); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override/*from w w w . j a va2 s .c om*/ protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (pr.isPathTransformed()) { String originalPath = path; path = pr.transformPath(path); if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } seenPaths.add(path); } LOG.trace("Matched path {}", path); return new LuceneResultRow(path, doc.score, excerpt, facets, explanation); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; final IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query); if (customScoreQuery != null) { query = customScoreQuery; } checkForIndexVersionChange(searcher); TopDocs docs; long start = PERF_LOGGER.start(); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); } } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.search(query, nextBatchSize); } else { docs = searcher.search(query, nextBatchSize, sort); } } PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); long f = PERF_LOGGER.start(); Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets()); PERF_LOGGER.end(f, -1, "facets retrieved"); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION); boolean addExplain = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); FieldInfos mergedFieldInfos = null; if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos); } String explanation = null; if (addExplain) { explanation = searcher.explain(query, doc.doc).toString(); } LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { //queue is still empty but more results can be fetched //from Lucene so still continue lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; noDocs = true; SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.string)); break; } } } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer()); // ACL filter suggestions for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value)); break; } } } } } } catch (Exception e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, plan, settings, sizeEstimator); }
From source file:org.apache.jena.query.text.DatasetGraphText.java
License:Apache License
/** Search the text index on the text field associated with the predicate within graph */ public Iterator<TextHit> search(String queryString, Node predicate, String graphURI, String lang, int limit) { queryString = QueryParserBase.escape(queryString); if (predicate != null) { String f = textIndex.getDocDef().getField(predicate); queryString = f + ":" + queryString; }//w w w . ja v a2s . c o m List<TextHit> results = textIndex.query(predicate, queryString, graphURI, lang, limit); return results.iterator(); }
From source file:org.apache.jena.query.text.es.TextIndexES.java
License:Apache License
private String parse(String fieldName, String qs, String lang) { //Escape special characters if any in the query string qs = QueryParserBase.escape(qs); if (fieldName != null && !fieldName.isEmpty()) { if (lang != null && !lang.equals("none")) { if (!ASTERISK.equals(lang)) { fieldName = fieldName + UNDERSCORE + lang.replaceAll(DASH, UNDERSCORE); qs = fieldName + COLON + qs; } else { if (!qs.contains("\\*")) { fieldName = fieldName + ASTERISK; qs = fieldName + COLON + qs; }//from w ww . ja v a2s . c o m } } else { //Lang is null, but field name is not null qs = fieldName + COLON + qs; } } //We do this to enable wild card search return qs.replaceAll("\\*", "\\\\*"); }
From source file:org.apache.jena.query.text.TextIndexLucene.java
License:Apache License
private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException { String escaped = QueryParserBase.escape(uri); String qs = docDef.getEntityField() + ":" + escaped; Query query = parseQuery(qs, queryAnalyzer); IndexSearcher indexSearcher = new IndexSearcher(indexReader); ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs; List<Map<String, Node>> records = new ArrayList<>(); for (ScoreDoc sd : sDocs) { Document doc = indexSearcher.doc(sd.doc); String[] x = doc.getValues(docDef.getEntityField()); if (x.length != 1) { }/*from w w w.j av a 2 s. c o m*/ String uriStr = x[0]; Map<String, Node> record = new HashMap<>(); Node entity = NodeFactory.createURI(uriStr); record.put(docDef.getEntityField(), entity); for (String f : docDef.fields()) { // log.info("Field: "+f) ; String[] values = doc.getValues(f); for (String v : values) { Node n = entryToNode(v); record.put(f, n); } records.add(record); } } return records; }