Example usage for org.apache.lucene.search RegexpQuery RegexpQuery

List of usage examples for org.apache.lucene.search RegexpQuery RegexpQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search RegexpQuery RegexpQuery.

Prototype

public RegexpQuery(Term term, int flags) 

Source Link

Document

Constructs a query for terms matching term.

Usage

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * returns collection of docIds of the Lucene docs that hit, at least
 * threshold times.//from ww w  .  j  a va2 s .  c  o m
 * warning! only looks up body field, no others
  * Caution: This code is not to be touched, unless something is being optimised
  * Introducing something here can seriously affect the search times.
 */
private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold,
        IndexSearcher searcher, QueryType qt, int lt)
        throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException {
    Collection<String> result = new ArrayList<String>();

    //   String escaped_q = escapeRegex(q); // to mimic built-in regex support
    //TODO: There should also be a general query type that takes any query with field param, i.e. without parser
    Query query;
    if (qt == QueryType.ORIGINAL)
        query = parserOriginal.parse(q);
    else if (qt == QueryType.SUBJECT)
        query = parserSubject.parse(q);
    else if (qt == QueryType.CORRESPONDENTS)
        query = parserCorrespondents.parse(q);
    else if (qt == QueryType.REGEX) {
        query = new BooleanQuery();
        /**
         * Note: this is not a spanning (i.e. doesn't search over more than
         * one token) regexp, for spanning regexp use: body_unanlyzed and
         * title_unanlyzed fields instead
         */
        Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL);
        Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL);
        ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
        ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
    } else /* if (qt == QueryType.PRESET_REGEX) {
           query = new BooleanQuery();
           if(presetQueries != null) {
           for (String pq : presetQueries) {
             Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL);
             Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL);
             ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
             ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
           }
           log.info("Doing a preset regex search");
           }else{
           log.warn("Preset queries is not initialised");
           }
           } else */ if (qt == QueryType.META) {
        query = parserMeta.parse(q);
    } else
        query = parser.parse(q);

    //      query = convertRegex(query);
    long st = System.currentTimeMillis();
    int totalHits = 0;
    ScoreDoc[] hits = null;
    if (query != null) {
        TopDocs tds = searcher.search(query, null, lt);
        log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query);
        hits = tds.scoreDocs;
        totalHits = tds.totalHits;
    } else {
        log.error("Query is null!!");
    }
    // this logging causes a 50% overhead on the query -- maybe enable it only for debugging
    // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :");

    // Iterate through the results:

    // TODO: not very pretty code here to determine dir_name which selects the cache to use
    Util.softAssert(searcher == isearcher || searcher == isearcher_blob);
    String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS;

    Map<Integer, String> map = dirNameToDocIdMap.get(dir_name);
    if (map == null) {
        map = new LinkedHashMap<Integer, String>();
        dirNameToDocIdMap.put(dir_name, map);
        log.info("Adding new entry for dir name to docIdMap");
    } else {
        log.info("Existing entry for dir name to docIdMap");
    }

    int n_added = 0;
    log.info("Found: " + hits.length + " hits for query: " + q);
    for (int i = 0; i < hits.length; i++) {
        int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id.

        String docId = null; // this will be our doc id

        // try to use the new fieldcache id's
        // if this works, we can get rid of the dirNameToDocIdMap
        try {
            docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId);
        } catch (Exception e) {
            Util.print_exception(e, log);
            continue;
        }

        if (threshold <= 1) {
            // common case: threshold is 1.
            result.add(docId);
            n_added++;
        } else {
            // more expensive, do it only if threshold is > 1
            Explanation expl = searcher.explain(query, hits[i].doc);
            Explanation[] details = expl.getDetails();
            // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query.
            // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation.
            // not sure why, but is somewhat corroborated by the code:
            // http://massapi.com/class/ex/Explanation.html
            // showing a single hit creating both a C.E and an E.
            // a more robust approach might be to look for the summary to end with product of: , sum of: etc.
            // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706
            // but for now, we'll count only the number of ComplexExplanation and check if its above threshold
            //            log.info("doc id " + hits[i].toString() + " #details = " + details.length);

            // HORRIBLE HACK! - because we don't know a better way to find the threshold
            outer: for (Explanation detail : details) {
                // log.info(detail.getClass().getName());

                if (detail instanceof ComplexExplanation) {
                    ComplexExplanation ce = (ComplexExplanation) detail;
                    String s = ce.toString();
                    int total_tf = 0;
                    while (true) {
                        int idx = s.indexOf("tf(termFreq(");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx);
                        idx = s.indexOf("=");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx + 1);
                        int idx1 = s.indexOf(")");
                        if (idx < 0)
                            break outer;
                        String num_str = s.substring(0, idx1);
                        int num = 0;
                        try {
                            num = Integer.parseInt(num_str);
                        } catch (Exception e) {
                            log.warn("ERROR parsing complex expl: " + num_str);
                        }
                        total_tf += num;
                        if (total_tf >= threshold) {
                            result.add(docId);
                            n_added++;
                            break outer;
                        }
                    }
                }
            }
        }
    }
    log.info(n_added + " docs added to docIdMap cache");
    return new Pair<Collection<String>, Integer>(result, totalHits);
}

From source file:org.elasticsearch.index.mapper.core.AbstractFieldMapper.java

License:Apache License

@Override
public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method,
        @Nullable QueryParseContext context) {
    RegexpQuery query = new RegexpQuery(names().createIndexNameTerm(indexedValueForSearch(value)), flags);
    if (method != null) {
        query.setRewriteMethod(method);//from  w ww  .j  ava2  s. c o m
    }
    return query;
}

From source file:org.elasticsearch.index.mapper.internal.IdFieldMapper.java

License:Apache License

@Override
public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method,
        @Nullable QueryParseContext context) {
    if (fieldType.indexed() || context == null) {
        return super.regexpQuery(value, flags, method, context);
    }/*from  w  ww  .ja  v a2s . com*/
    Collection<String> queryTypes = context.queryTypes();
    if (queryTypes.size() == 1) {
        RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME,
                Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))),
                flags);
        if (method != null) {
            regexpQuery.setRewriteMethod(method);
        }
        return regexpQuery;
    }
    BooleanQuery query = new BooleanQuery();
    for (String queryType : queryTypes) {
        RegexpQuery regexpQuery = new RegexpQuery(
                new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))),
                flags);
        if (method != null) {
            regexpQuery.setRewriteMethod(method);
        }
        query.add(regexpQuery, BooleanClause.Occur.SHOULD);
    }
    return query;
}

From source file:org.elasticsearch.index.query.RegexpQueryParser.java

License:Apache License

@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
    XContentParser parser = parseContext.parser();

    XContentParser.Token token = parser.nextToken();
    if (token != XContentParser.Token.FIELD_NAME) {
        throw new QueryParsingException(parseContext.index(), "[regexp] query malformed, no field");
    }/*ww w .  j  a  va  2s  .co  m*/
    String fieldName = parser.currentName();
    String rewriteMethod = null;

    Object value = null;
    float boost = 1.0f;
    int flagsValue = -1;
    String queryName = null;
    token = parser.nextToken();
    if (token == XContentParser.Token.START_OBJECT) {
        String currentFieldName = null;
        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
                currentFieldName = parser.currentName();
            } else if (token.isValue()) {
                if ("value".equals(currentFieldName)) {
                    value = parser.objectBytes();
                } else if ("boost".equals(currentFieldName)) {
                    boost = parser.floatValue();
                } else if ("rewrite".equals(currentFieldName)) {
                    rewriteMethod = parser.textOrNull();
                } else if ("flags".equals(currentFieldName)) {
                    String flags = parser.textOrNull();
                    flagsValue = RegexpFlag.resolveValue(flags);
                } else if ("flags_value".equals(currentFieldName)) {
                    flagsValue = parser.intValue();
                    if (flagsValue < 0) {
                        flagsValue = RegExp.ALL;
                    }
                } else if ("_name".equals(currentFieldName)) {
                    queryName = parser.text();
                }
            } else {
                throw new QueryParsingException(parseContext.index(),
                        "[regexp] query does not support [" + currentFieldName + "]");
            }
        }
        parser.nextToken();
    } else {
        value = parser.objectBytes();
        parser.nextToken();
    }

    if (value == null) {
        throw new QueryParsingException(parseContext.index(), "No value specified for regexp query");
    }

    MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);

    Query query = null;
    MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
    if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
        if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) {
            String[] previousTypes = QueryParseContext
                    .setTypesWithPrevious(new String[] { smartNameFieldMappers.docMapper().type() });
            try {
                query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext);
            } finally {
                QueryParseContext.setTypes(previousTypes);
            }
        } else {
            query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext);
        }
    }
    if (query == null) {
        RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue);
        if (method != null) {
            regexpQuery.setRewriteMethod(method);
        }
        query = regexpQuery;
    }
    query.setBoost(boost);
    query = wrapSmartNameQuery(query, smartNameFieldMappers, parseContext);
    if (queryName != null) {
        parseContext.addNamedQuery(queryName, query);
    }
    return query;
}

From source file:org.kie.workbench.common.services.refactoring.backend.server.query.builder.AbstractQueryBuilder.java

License:Apache License

public Query getQuery(ValueIndexTerm valueTerm) {
    final String text = getText(valueTerm);
    Term term = new Term(valueTerm.getTerm(), text);

    Query termQuery;//w w w  .  jav  a2  s .c  o m
    switch (valueTerm.getSearchType()) {
    case PREFIX:
        termQuery = new PrefixQuery(term);
        break;
    case WILDCARD:
        termQuery = new WildcardQuery(term);
        break;
    case REGEXP:
        termQuery = new RegexpQuery(term, RegExp.NONE); // NONE until there's a specific reason to use extend regex syntax
        break;
    case NORMAL:
        termQuery = new TermQuery(term);
        break;
    default:
        throw new UnsupportedOperationException(ValueIndexTerm.TermSearchType.class.getSimpleName() + " value "
                + valueTerm.getSearchType().toString() + " is unsupported!");
    }

    return termQuery;
}

From source file:org.modeshape.jcr.index.lucene.query.LuceneQueryFactory.java

License:Apache License

protected Query pathFieldQuery(String field, Operator operator, Object value, CaseOperation caseOperation) {
    Path path = null;/*  w  w w.  j a  v a2s.c o m*/
    if (operator != Operator.LIKE) {
        path = !(value instanceof Path) ? pathFactory.create(value) : (Path) value;
    }
    if (caseOperation == null) {
        caseOperation = CaseOperations.AS_IS;
    }
    switch (operator) {
    case EQUAL_TO:
        return CompareStringQuery.createQueryForNodesWithFieldEqualTo(stringFactory.create(path), field,
                factories, caseOperation);
    case NOT_EQUAL_TO:
        return not(CompareStringQuery.createQueryForNodesWithFieldEqualTo(stringFactory.create(path), field,
                factories, caseOperation));
    case LIKE:
        String likeExpression = stringFactory.create(value);
        // the paths are stored in the index via stringFactory.create, which doesn't have the "1" index for SNS...
        likeExpression = likeExpression.replaceAll("\\[1\\]", "");
        if (likeExpression.contains("[%]")) {
            // We can't use '[%]' because we only want to match digits,
            // so handle this using a regex ...
            // !!! LUCENE Regexp is not the same as Java's. See the javadoc RegExp
            String regex = likeExpression;
            regex = regex.replace("[%]", "(\\[[0-9]+\\])?");
            regex = regex.replaceAll("\\[\\d+\\]", "\\[[0-9]+\\]");
            //regex = regex.replace("]", "\\]");
            regex = regex.replace("*", ".*");
            regex = regex.replace("%", ".*").replace("_", ".");
            // Now create a regex query ...
            int flags = caseOperation == CaseOperations.AS_IS ? 0 : Pattern.CASE_INSENSITIVE;
            return new RegexpQuery(new Term(field, regex), flags);
        } else {
            return CompareStringQuery.createQueryForNodesWithFieldLike(likeExpression, field, factories,
                    caseOperation);
        }
    case GREATER_THAN:
        return ComparePathQuery.createQueryForNodesWithPathGreaterThan(path, field, factories, caseOperation);
    case GREATER_THAN_OR_EQUAL_TO:
        return ComparePathQuery.createQueryForNodesWithPathGreaterThanOrEqualTo(path, field, factories,
                caseOperation);
    case LESS_THAN:
        return ComparePathQuery.createQueryForNodesWithPathLessThan(path, field, factories, caseOperation);
    case LESS_THAN_OR_EQUAL_TO:
        return ComparePathQuery.createQueryForNodesWithPathLessThanOrEqualTo(path, field, factories,
                caseOperation);
    default: {
        throw new IllegalArgumentException("Unknown operator:" + operator);
    }
    }
}