Example usage for org.apache.lucene.util.automaton CompiledAutomaton CompiledAutomaton

List of usage examples for org.apache.lucene.util.automaton CompiledAutomaton CompiledAutomaton

Introduction

In this page you can find the example usage for org.apache.lucene.util.automaton CompiledAutomaton CompiledAutomaton.

Prototype

public CompiledAutomaton(Automaton automaton) 

Source Link

Document

Create this, passing simplify=true and finite=null, so that we try to simplify the automaton and determine if it is finite.

Usage

From source file:com.github.flaxsearch.resources.TermsResource.java

License:Apache License

private TermsEnum getTermsEnum(Terms terms, String filter) throws IOException {
    if (filter == null)
        return terms.iterator();

    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(filter).toAutomaton());
    return automaton.getTermsEnum(terms);
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
    if (leftTerms == null || rightTerms == null) {
        assertNull(leftTerms);//from  w  ww  .j  a  v a 2s  . c om
        assertNull(rightTerms);
        return;
    }
    assertTermsStatistics(leftTerms, rightTerms);

    // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

    boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
    TermsEnum leftTermsEnum = leftTerms.iterator();
    TermsEnum rightTermsEnum = rightTerms.iterator();
    assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions);

    assertTermsSeeking(leftTerms, rightTerms);

    if (deep) {
        int numIntersections = atLeast(3);
        for (int i = 0; i < numIntersections; i++) {
            String re = AutomatonTestUtil.randomRegexp(random());
            CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
            if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
                // TODO: test start term too
                TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
                TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
                assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions);
            }
        }
    }
}

From source file:com.sindicetech.siren.search.node.NodeAutomatonQuery.java

License:Open Source License

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 *
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.//from  w ww.  j a va  2s .  co  m
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 */
public NodeAutomatonQuery(final Term term, final Automaton automaton) {
    super(term.field());
    this.term = term;
    this.automaton = automaton;
    this.compiled = new CompiledAutomaton(automaton);
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private Term[] expandTerms(String field, String queryStr) throws XPathException {
    List<Term> termList = new ArrayList<>(8);
    Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr));
    CompiledAutomaton compiled = new CompiledAutomaton(automaton);
    IndexReader reader = null;/*from   w ww  . j  a v  a  2  s  .c o  m*/
    try {
        reader = index.getReader();

        for (AtomicReaderContext atomic : reader.leaves()) {
            Terms terms = atomic.reader().terms(field);
            if (terms != null) {
                TermsEnum termsEnum = compiled.getTermsEnum(terms);
                BytesRef data = termsEnum.next();
                while (data != null) {
                    String term = data.utf8ToString();
                    termList.add(new Term(field, term));
                    data = termsEnum.next();
                }
            }
        }
    } catch (IOException e) {
        throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e);
    } finally {
        index.releaseReader(reader);
    }
    Term[] matchingTerms = new Term[termList.size()];
    return termList.toArray(matchingTerms);
}

From source file:org.opengrok.suggest.query.SuggesterRangeQuery.java

License:Open Source License

/** {@inheritDoc} */
@Override//  www.ja  v a2 s  .co m
public TermsEnum getTermsEnumForSuggestions(final Terms terms) {
    if (terms == null) {
        return TermsEnum.EMPTY;
    }

    BytesRef prefix = getPrefix();
    if (prefix != null) {
        Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix);

        Automaton finalAutomaton;
        if (suggestPosition == SuggestPosition.LOWER) {
            Automaton binaryInt = Automata.makeBinaryInterval(getLowerTerm(), includesLower(), getUpperTerm(),
                    includesUpper());

            finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton);
        } else {
            Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower());

            finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE);
        }

        CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton);
        try {
            return compiledAutomaton.getTermsEnum(terms);
        } catch (IOException e) {
            logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e);
        }
    }

    return TermsEnum.EMPTY;
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from   w  w w .  j a  v a  2  s .c o  m
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), null);
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        values.add(val.utf8ToString());
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from w  ww . j av  a 2  s . c  o m
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())),
            term.bytes());
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
        bucket.add(t, reader.docFreq(t));
    }
}

From source file:org.zenoss.zep.index.impl.lucene.LuceneQueryBuilder.java

License:Open Source License

private static Term[] getMatchingTerms(String fieldName, IndexReader reader, String value) throws ZepException {
    // Don't search for matches if text doesn't contain wildcards
    if (value.indexOf('*') == -1 && value.indexOf('?') == -1)
        return new Term[] { new Term(fieldName, value) };

    logger.debug("getMatchingTerms: field={}, value={}", fieldName, value);
    List<Term> matches = new ArrayList<Term>();
    Automaton automaton = WildcardQuery.toAutomaton(new Term(fieldName, value));
    CompiledAutomaton compiled = new CompiledAutomaton(automaton);
    try {/*from   w  w  w .j a va2s.c o m*/
        Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(fieldName);
        TermsEnum wildcardTermEnum = compiled.getTermsEnum(terms);
        BytesRef match;
        while (wildcardTermEnum.next() != null) {
            match = wildcardTermEnum.term();
            logger.debug("Match: {}", match);
            matches.add(new Term(fieldName, match.utf8ToString()));
        }
        return matches.toArray(new Term[matches.size()]);
    } catch (IOException e) {
        throw new ZepException(e.getLocalizedMessage(), e);
    }
}