Example usage for org.apache.lucene.queryparser.classic QueryParserBase escape

List of usage examples for org.apache.lucene.queryparser.classic QueryParserBase escape

Introduction

In this page you can find the example usage for org.apache.lucene.queryparser.classic QueryParserBase escape.

Prototype

public static String escape(String s) 

Source Link

Document

Returns a String where those characters that QueryParser expects to be escaped are escaped by a preceding \.

Usage

From source file:com.berico.clavin.resolver.impl.lucene.LuceneLocationNameIndex.java

License:Apache License

/**
 * Return a list of Resolved Locations that best match the Location Occurrence
 * found in a document./*from www . j av a  2s .  c  o  m*/
 * @param occurrence The Location Occurrence.
 * @param options Options for the index.
 * @return List of Resolved Locations matching the occurrence.
 */
@Override
public List<ResolvedLocation> search(LocationOccurrence occurrence, Options options) throws Exception {

    options = (options == null) ? new Options() : options;

    // Get the max number of records to return.
    int limit = options.getInt(KEY_DEFAULT_LIMIT, DEFAULT_LIMIT);

    // Get whether fuzzy matching is enabled.
    boolean useFuzzy = options.getBoolean(KEY_DEFAULT_USE_FUZZY, DEFAULT_USE_FUZZY);

    IndexSearcher searcher = lucene.getSearcherManager().acquire();

    boolean usedFuzzy = false;

    // We need to sanitize the name so it doesn't have unescaped Lucene syntax that
    // would throw off the search index.
    String escapedName = QueryParserBase.escape(occurrence.getText().toLowerCase());

    // Try an exact query
    Query query = getExactQuery(escapedName);

    // Gather the results.
    TopDocs results = searcher.search(query, null, limit, DEFAULT_SORTER);

    // If there are no results, and a fuzzy query was requested
    if (results.scoreDocs.length == 0 && useFuzzy) {

        usedFuzzy = true;

        // Attempt a fuzzy query
        query = getFuzzyQuery(escapedName);

        // Gather the results
        results = searcher.search(query, null, limit, DEFAULT_SORTER);
    }

    if (results.scoreDocs.length == 0)
        logger.info("Found no results for {}.", escapedName);

    return LuceneUtils.convertToLocations(occurrence, searcher, results, usedFuzzy);
}

From source file:controllers.SkosToElasticsearch.java

License:Open Source License

/**
 * @param q the q string will passed to elasticsearch as a queryStringQuery
 * @param index the index to search in//from   w w  w .j a  v a  2  s. c o m
 * @return an array of documents
 */
public CompletionStage<Result> search(String q, String index) {
    response().setHeader("content-type", "application/json");
    String escaped_q = QueryParserBase.escape(q);
    CompletableFuture<Result> future = new CompletableFuture<>();
    SearchHits hits = esb.getInstance().query(index, escaped_q, 0, 10);
    List<SearchHit> list = Arrays.asList(hits.getHits());
    List<Map<String, Object>> hitMap = new ArrayList<>();
    for (SearchHit hit : list) {
        Map<String, Object> m = hit.getSource();
        m.put("primaryTopic", hit.getId());
        hitMap.add(m);
    }
    future.complete(ok(SkosToElasticsearch.json(hitMap)));
    return future;

}

From source file:de.unihildesheim.iw.lucene.query.TryExactTermsQuery.java

License:Open Source License

/**
 * New instance using the supplied query.
 *
 * @param analyzer Query analyzer/*from   w w w  .  j  a  va 2 s.  co  m*/
 * @param queryStr Query string
 * @param fields Fields to query
 * @throws ParseException Thrown, if the query could not be parsed
 */
public TryExactTermsQuery(@NotNull final Analyzer analyzer, @NotNull final String queryStr,
        @NotNull final String... fields) throws ParseException {
    if (fields.length == 0) {
        throw new IllegalArgumentException("Empty fields list.");
    }
    if (StringUtils.isStrippedEmpty(queryStr)) {
        throw new IllegalArgumentException("Empty query.");
    }

    this.queryTerms = QueryUtils.tokenizeQueryString(queryStr, analyzer);

    final QueryParser qParser = new MultiFieldQueryParser(fields, analyzer);

    this.query = new BooleanQuery();
    this.uniqueQueryTerms = new HashSet<>(this.queryTerms);
    for (final String term : this.uniqueQueryTerms) {
        @SuppressWarnings("ObjectAllocationInLoop")
        final BooleanClause bc = new BooleanClause(qParser.parse(QueryParserBase.escape(term)), Occur.SHOULD);
        this.query.add(bc);
    }
    this.query.setMinimumNumberShouldMatch(this.uniqueQueryTerms.size());
    if (LOG.isDebugEnabled()) {
        LOG.debug("TEQ {} uQt={}", this.query, this.uniqueQueryTerms);
    }
}

From source file:ie.cmrc.smtx.skos.index.lucene.LuceneSKOSIndex.java

License:Apache License

/**
 * Constructs a query string for the provided keyword and search field
 * @param keyword A (string keyword, language code) pair
 * @param field Search field/*from   w ww .j  a  v  a 2 s  .c o m*/
 * @return Lucene query representation of the provided keyword and search field
 */
private String getQueryString(Term keyword, IndexField.Searchable field) {
    if (keyword != null) {
        String kw = keyword.getString();
        if (kw != null && (kw = kw.trim()).length() >= this.minKeywordLength) {

            String queryString;

            kw = QueryParserBase.escape(kw);
            String language = keyword.getLanguage();

            if (language == null) {
                /*queryString = "("+IndexField.Property.NAME.fieldName()+":\""+kw+"\") OR ("
                    +IndexField.Property.URI.fieldName()+":\""+kw+"\")";*/

                // Search in all languages
                queryString = "";
                int i = 0;
                for (String lang : this.languages) {
                    if (i > 0)
                        queryString += " OR ";
                    queryString += this.getQueryString(kw, lang, field);
                    i++;
                }
            } else {
                queryString = this.getQueryString(kw, language, field);
            }

            return queryString;
        }
    }
    return null;
}

From source file:it.unipd.dei.ims.lucene.clef.parser.ClefQQParser.java

License:Apache License

@Override
public Query parse(QualityQuery qq) throws ParseException {
    QueryParser qp = queryParser.get();//ww  w . j  a  va2  s. c  o  m
    if (qp == null) {
        Analyzer analyzer = AnalyzerFactory.createAnalyzer(language, stemmer, stopset);
        qp = new QueryParser(fieldToSearch, analyzer);
        queryParser.set(qp);
    }
    BooleanQuery bq = new BooleanQuery();
    for (int i = 0; i < qqFields.length; i++)
        bq.add(qp.parse(QueryParserBase.escape(qq.getValue(qqFields[i]))), BooleanClause.Occur.SHOULD);

    return bq;
}

From source file:org.apache.ctakes.dictionary.lookup.lucene.LuceneDictionaryImpl.java

License:Apache License

/**
 * {@inheritDoc}// ww  w. j a  v  a  2s .c o m
 */
@Override
public Collection<MetaDataHit> getEntries(final String text) throws DictionaryException {
    final Set<MetaDataHit> metaDataHitSet = new HashSet<>();

    try {
        Query q = null;
        TopDocs topDoc = null;
        if (text.indexOf('-') == -1) {
            q = new TermQuery(new Term(iv_lookupFieldName, text));
            topDoc = iv_searcher.search(q, iv_maxHits);
        } else { // needed the KeyworkAnalyzer for situations where the hypen was included in the f-word
            final QueryParser query = new QueryParser(Version.LUCENE_40, iv_lookupFieldName,
                    new KeywordAnalyzer());
            try {
                //CTAKES-63 - I believe all of the chars in the str token should be escaped to avoid issues such as a token ending with ']'
                //topDoc = iv_searcher.search(query.parse(text.replace('-', ' ')), iv_maxHits);
                final String escaped = QueryParserBase.escape(text.replace('-', ' '));
                topDoc = iv_searcher.search(query.parse(escaped), iv_maxHits);
            } catch (ParseException e) {
                // thrown by QueryParser.parse()
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        if (topDoc == null) {
            // avoids possible NPE on topDoc.scoreDocs 12-26-2012 SPF
            iv_logger.warn(getClass().getName() + " getEntries(..) topDoc is null, returning empty collection");
            return Collections.emptySet();
        }
        if (iv_maxHits == 0) {
            iv_maxHits = Integer.MAX_VALUE;
            iv_logger.warn("iv_maxHits was 0, using Integer.MAX_VALUE instead");
        }
        final ScoreDoc[] hits = topDoc.scoreDocs;
        if (hits.length == iv_maxHits) {
            iv_logger.warn(
                    "'iv_maxHits' equals the list length returned by the lucene query (" + hits.length + ").");
            iv_logger.warn(
                    "You may want to consider setting a higher value, since there may be more entries not being returned in the event greater than "
                            + iv_maxHits + " exist.");
        }
        for (ScoreDoc scoreDoc : hits) {
            final Document luceneDoc = iv_searcher.doc(scoreDoc.doc);
            final MetaDataHit mdh = new LuceneDocumentMetaDataHitImpl(luceneDoc);
            metaDataHitSet.add(mdh);
        }
        return metaDataHitSet;
    } catch (IOException ioe) {
        // thrown by IndexSearcher.search(), IndexSearcher.doc()
        throw new DictionaryException(ioe);
    }
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override/*from   w  w  w .  j a va2  s .c  om*/
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt,
                Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);

                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }

                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }

                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();

                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);

                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }

                    checkForIndexVersionChange(searcher);

                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan,
                                indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }

                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT
                            : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField,
                                QueryParserBase.escape(suggestion.string));

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }

                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;

                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().isSuggestAnalyzed()
                                    ? indexNode.getDefinition().getAnalyzer()
                                    : SuggestHelper.getAnalyzer());

                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}

From source file:org.apache.jena.query.text.DatasetGraphText.java

License:Apache License

/** Search the text index on the text field associated with the predicate within graph */
public Iterator<TextHit> search(String queryString, Node predicate, String graphURI, String lang, int limit) {
    queryString = QueryParserBase.escape(queryString);
    if (predicate != null) {
        String f = textIndex.getDocDef().getField(predicate);
        queryString = f + ":" + queryString;
    }//w w  w  .  ja v a2s .  c  o  m
    List<TextHit> results = textIndex.query(predicate, queryString, graphURI, lang, limit);
    return results.iterator();
}

From source file:org.apache.jena.query.text.es.TextIndexES.java

License:Apache License

private String parse(String fieldName, String qs, String lang) {
    //Escape special characters if any in the query string
    qs = QueryParserBase.escape(qs);

    if (fieldName != null && !fieldName.isEmpty()) {
        if (lang != null && !lang.equals("none")) {
            if (!ASTERISK.equals(lang)) {
                fieldName = fieldName + UNDERSCORE + lang.replaceAll(DASH, UNDERSCORE);
                qs = fieldName + COLON + qs;
            } else {
                if (!qs.contains("\\*")) {
                    fieldName = fieldName + ASTERISK;
                    qs = fieldName + COLON + qs;
                }//from   w  ww  .  ja  v a2s . c  o  m
            }

        } else {
            //Lang is null, but field name is not null
            qs = fieldName + COLON + qs;

        }
    }
    //We do this to enable wild card search
    return qs.replaceAll("\\*", "\\\\*");

}

From source file:org.apache.jena.query.text.TextIndexLucene.java

License:Apache License

private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
    String escaped = QueryParserBase.escape(uri);
    String qs = docDef.getEntityField() + ":" + escaped;
    Query query = parseQuery(qs, queryAnalyzer);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs;
    List<Map<String, Node>> records = new ArrayList<>();

    for (ScoreDoc sd : sDocs) {
        Document doc = indexSearcher.doc(sd.doc);
        String[] x = doc.getValues(docDef.getEntityField());
        if (x.length != 1) {
        }/*from  w  w  w.j  av a  2  s. c  o m*/
        String uriStr = x[0];
        Map<String, Node> record = new HashMap<>();
        Node entity = NodeFactory.createURI(uriStr);
        record.put(docDef.getEntityField(), entity);

        for (String f : docDef.fields()) {
            // log.info("Field: "+f) ;
            String[] values = doc.getValues(f);
            for (String v : values) {
                Node n = entryToNode(v);
                record.put(f, n);
            }
            records.add(record);
        }
    }
    return records;
}