Example usage for org.apache.lucene.util.automaton RegExp ALL

List of usage examples for org.apache.lucene.util.automaton RegExp ALL

Introduction

In this page you can find the example usage for org.apache.lucene.util.automaton RegExp ALL.

Prototype

int ALL

To view the source code for org.apache.lucene.util.automaton RegExp ALL.

Click Source Link

Document

Syntax flag, enables all optional regexp syntax.

Usage

From source file:com.qwazr.search.query.RegexpQuery.java

License:Apache License

@Override
final public Query getQuery(QueryContext queryContext) throws IOException {
    return new org.apache.lucene.search.RegexpQuery(new Term(field, text), flags == null ? RegExp.ALL : flags,
            max_determinized_states == null ? Operations.DEFAULT_MAX_DETERMINIZED_STATES
                    : max_determinized_states);
}

From source file:com.sindicetech.siren.search.node.NodeRegexpQuery.java

License:Open Source License

/**
 * Constructs a query for terms matching <code>term</code>.
 * <p>//  w  ww  .  j  av  a 2s. c  o  m
 * By default, all regular expression features are enabled.
 * </p>
 *
 * @param term regular expression.
 */
public NodeRegexpQuery(final Term term) {
    this(term, RegExp.ALL);
}

From source file:com.sindicetech.siren.search.node.TestNodeRegexpQuery.java

License:Open Source License

public void testCustomProvider() throws IOException {
    final AutomatonProvider myProvider = new AutomatonProvider() {
        // automaton that matches quick or brown
        private final Automaton quickBrownAutomaton = BasicOperations
                .union(Arrays.asList(BasicAutomata.makeString("quick"), BasicAutomata.makeString("brown"),
                        BasicAutomata.makeString("bob")));

        public Automaton getAutomaton(final String name) {
            if (name.equals("quickBrown"))
                return quickBrownAutomaton;
            else/* ww  w .j  av  a2  s  . com*/
                return null;
        }
    };
    final NodeRegexpQuery query = new NodeRegexpQuery(this.newTerm("<quickBrown>"), RegExp.ALL, myProvider);
    assertEquals(1, searcher.search(dq(query), 5).totalHits);
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * returns collection of docIds of the Lucene docs that hit, at least
 * threshold times./*from w  w w  .  j a  v  a2  s  .c o  m*/
 * warning! only looks up body field, no others
  * Caution: This code is not to be touched, unless something is being optimised
  * Introducing something here can seriously affect the search times.
 */
private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold,
        IndexSearcher searcher, QueryType qt, int lt)
        throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException {
    Collection<String> result = new ArrayList<String>();

    //   String escaped_q = escapeRegex(q); // to mimic built-in regex support
    //TODO: There should also be a general query type that takes any query with field param, i.e. without parser
    Query query;
    if (qt == QueryType.ORIGINAL)
        query = parserOriginal.parse(q);
    else if (qt == QueryType.SUBJECT)
        query = parserSubject.parse(q);
    else if (qt == QueryType.CORRESPONDENTS)
        query = parserCorrespondents.parse(q);
    else if (qt == QueryType.REGEX) {
        query = new BooleanQuery();
        /**
         * Note: this is not a spanning (i.e. doesn't search over more than
         * one token) regexp, for spanning regexp use: body_unanlyzed and
         * title_unanlyzed fields instead
         */
        Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL);
        Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL);
        ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
        ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
    } else /* if (qt == QueryType.PRESET_REGEX) {
           query = new BooleanQuery();
           if(presetQueries != null) {
           for (String pq : presetQueries) {
             Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL);
             Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL);
             ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
             ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD);
           }
           log.info("Doing a preset regex search");
           }else{
           log.warn("Preset queries is not initialised");
           }
           } else */ if (qt == QueryType.META) {
        query = parserMeta.parse(q);
    } else
        query = parser.parse(q);

    //      query = convertRegex(query);
    long st = System.currentTimeMillis();
    int totalHits = 0;
    ScoreDoc[] hits = null;
    if (query != null) {
        TopDocs tds = searcher.search(query, null, lt);
        log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query);
        hits = tds.scoreDocs;
        totalHits = tds.totalHits;
    } else {
        log.error("Query is null!!");
    }
    // this logging causes a 50% overhead on the query -- maybe enable it only for debugging
    // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :");

    // Iterate through the results:

    // TODO: not very pretty code here to determine dir_name which selects the cache to use
    Util.softAssert(searcher == isearcher || searcher == isearcher_blob);
    String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS;

    Map<Integer, String> map = dirNameToDocIdMap.get(dir_name);
    if (map == null) {
        map = new LinkedHashMap<Integer, String>();
        dirNameToDocIdMap.put(dir_name, map);
        log.info("Adding new entry for dir name to docIdMap");
    } else {
        log.info("Existing entry for dir name to docIdMap");
    }

    int n_added = 0;
    log.info("Found: " + hits.length + " hits for query: " + q);
    for (int i = 0; i < hits.length; i++) {
        int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id.

        String docId = null; // this will be our doc id

        // try to use the new fieldcache id's
        // if this works, we can get rid of the dirNameToDocIdMap
        try {
            docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId);
        } catch (Exception e) {
            Util.print_exception(e, log);
            continue;
        }

        if (threshold <= 1) {
            // common case: threshold is 1.
            result.add(docId);
            n_added++;
        } else {
            // more expensive, do it only if threshold is > 1
            Explanation expl = searcher.explain(query, hits[i].doc);
            Explanation[] details = expl.getDetails();
            // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query.
            // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation.
            // not sure why, but is somewhat corroborated by the code:
            // http://massapi.com/class/ex/Explanation.html
            // showing a single hit creating both a C.E and an E.
            // a more robust approach might be to look for the summary to end with product of: , sum of: etc.
            // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706
            // but for now, we'll count only the number of ComplexExplanation and check if its above threshold
            //            log.info("doc id " + hits[i].toString() + " #details = " + details.length);

            // HORRIBLE HACK! - because we don't know a better way to find the threshold
            outer: for (Explanation detail : details) {
                // log.info(detail.getClass().getName());

                if (detail instanceof ComplexExplanation) {
                    ComplexExplanation ce = (ComplexExplanation) detail;
                    String s = ce.toString();
                    int total_tf = 0;
                    while (true) {
                        int idx = s.indexOf("tf(termFreq(");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx);
                        idx = s.indexOf("=");
                        if (idx < 0)
                            break outer;
                        s = s.substring(idx + 1);
                        int idx1 = s.indexOf(")");
                        if (idx < 0)
                            break outer;
                        String num_str = s.substring(0, idx1);
                        int num = 0;
                        try {
                            num = Integer.parseInt(num_str);
                        } catch (Exception e) {
                            log.warn("ERROR parsing complex expl: " + num_str);
                        }
                        total_tf += num;
                        if (total_tf >= threshold) {
                            result.add(docId);
                            n_added++;
                            break outer;
                        }
                    }
                }
            }
        }
    }
    log.info(n_added + " docs added to docIdMap cache");
    return new Pair<Collection<String>, Integer>(result, totalHits);
}

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java

License:Apache License

private Query getRegexpQuerySingle(String field, String termStr) throws ParseException {
    currentFieldType = null;/*from  w  ww.  jav  a 2 s. c o m*/
    Analyzer oldAnalyzer = getAnalyzer();
    try {
        currentFieldType = parseContext.fieldMapper(field);
        if (currentFieldType != null) {
            if (!forcedAnalyzer) {
                setAnalyzer(parseContext.getSearchAnalyzer(currentFieldType));
            }
            Query query = null;
            if (currentFieldType.useTermQueryWithQueryString()) {
                query = currentFieldType.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates,
                        multiTermRewriteMethod, parseContext);
            }
            if (query == null) {
                query = super.getRegexpQuery(field, termStr);
            }
            return query;
        }
        return super.getRegexpQuery(field, termStr);
    } catch (RuntimeException e) {
        if (settings.lenient()) {
            return null;
        }
        throw e;
    } finally {
        setAnalyzer(oldAnalyzer);
    }
}

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.QueryParserBase.java

License:Apache License

/**
 * Builds a new RegexpQuery instance//from w w w.  j  a va  2  s .co  m
 * @param regexp Regexp term
 * @return new RegexpQuery instance
 */
protected Query newRegexpQuery(Term regexp) {
    RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, maxDeterminizedStates);
    query.setRewriteMethod(multiTermRewriteMethod);
    return query;
}

From source file:org.codelibs.elasticsearch.index.query.RegexpFlag.java

License:Apache License

/**
 * Resolves the combined OR'ed value for the given list of regular expression flags. The given flags must follow the
 * following syntax://www  .  j ava 2 s . c o  m
 * <p>
 * <tt>flag_name</tt>(|<tt>flag_name</tt>)*
 * <p>
 * Where <tt>flag_name</tt> is one of the following:
 * <ul>
 *     <li>INTERSECTION</li>
 *     <li>COMPLEMENT</li>
 *     <li>EMPTY</li>
 *     <li>ANYSTRING</li>
 *     <li>INTERVAL</li>
 *     <li>NONE</li>
 *     <li>ALL</li>
 * </ul>
 * <p>
 * Example: <tt>INTERSECTION|COMPLEMENT|EMPTY</tt>
 *
 * @param flags A string representing a list of regular expression flags
 * @return The combined OR'ed value for all the flags
 */
public static int resolveValue(String flags) {
    if (flags == null || flags.isEmpty()) {
        return RegExp.ALL;
    }
    int magic = RegExp.NONE;
    for (String s : Strings.delimitedListToStringArray(flags, "|")) {
        if (s.isEmpty()) {
            continue;
        }
        try {
            RegexpFlag flag = RegexpFlag.valueOf(s.toUpperCase(Locale.ROOT));
            if (flag == RegexpFlag.NONE) {
                continue;
            }
            if (flag == RegexpFlag.ALL) {
                return flag.value();
            }
            magic |= flag.value();
        } catch (IllegalArgumentException iae) {
            throw new IllegalArgumentException("Unknown regexp flag [" + s + "]");
        }
    }
    return magic;
}

From source file:org.elasticsearch.common.lucene.search.RegexpFilter.java

License:Apache License

public RegexpFilter(Term term) {
    this(term, RegExp.ALL);
}

From source file:org.elasticsearch.index.mapper.completion.CompletionFieldMapperTests.java

License:Apache License

public void testRegexQueryType() throws Exception {
    String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("completion").field("type", "completion").endObject().endObject().endObject()
            .endObject().string();/*from w  ww  .  jav  a 2 s. c  o  m*/

    DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
    FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion");
    CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
    Query prefixQuery = completionFieldMapper.fieldType().regexpQuery(new BytesRef("co"), RegExp.ALL,
            Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class));
}

From source file:org.elasticsearch.index.mapper.CompletionFieldMapperTests.java

License:Apache License

public void testRegexQueryType() throws Exception {
    String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties")
            .startObject("completion").field("type", "completion").endObject().endObject().endObject()
            .endObject().string();//from  w w  w  . ja  va 2  s.c om

    DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse("type1",
            new CompressedXContent(mapping));
    FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion");
    CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
    Query prefixQuery = completionFieldMapper.fieldType().regexpQuery(new BytesRef("co"), RegExp.ALL,
            Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class));
}