List of usage examples for org.apache.lucene.util.automaton CompiledAutomaton CompiledAutomaton
public CompiledAutomaton(Automaton automaton)
From source file:com.github.flaxsearch.resources.TermsResource.java
License:Apache License
private TermsEnum getTermsEnum(Terms terms, String filter) throws IOException { if (filter == null) return terms.iterator(); CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(filter).toAutomaton()); return automaton.getTermsEnum(terms); }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception { if (leftTerms == null || rightTerms == null) { assertNull(leftTerms);//from w ww .j a v a 2s . c om assertNull(rightTerms); return; } assertTermsStatistics(leftTerms, rightTerms); // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions(); TermsEnum leftTermsEnum = leftTerms.iterator(); TermsEnum rightTermsEnum = rightTerms.iterator(); assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions); assertTermsSeeking(leftTerms, rightTerms); if (deep) { int numIntersections = atLeast(3); for (int i = 0; i < numIntersections; i++) { String re = AutomatonTestUtil.randomRegexp(random()); CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton()); if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // TODO: test start term too TermsEnum leftIntersection = leftTerms.intersect(automaton, null); TermsEnum rightIntersection = rightTerms.intersect(automaton, null); assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions); } } } }
From source file:com.sindicetech.siren.search.node.NodeAutomatonQuery.java
License:Open Source License
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored.//from w ww. j a va 2s . co m * @param automaton Automaton to run, terms that are accepted are considered a * match. */ public NodeAutomatonQuery(final Term term, final Automaton automaton) { super(term.field()); this.term = term; this.automaton = automaton; this.compiled = new CompiledAutomaton(automaton); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Term[] expandTerms(String field, String queryStr) throws XPathException { List<Term> termList = new ArrayList<>(8); Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr)); CompiledAutomaton compiled = new CompiledAutomaton(automaton); IndexReader reader = null;/*from w ww . j a v a 2 s .c o m*/ try { reader = index.getReader(); for (AtomicReaderContext atomic : reader.leaves()) { Terms terms = atomic.reader().terms(field); if (terms != null) { TermsEnum termsEnum = compiled.getTermsEnum(terms); BytesRef data = termsEnum.next(); while (data != null) { String term = data.utf8ToString(); termList.add(new Term(field, term)); data = termsEnum.next(); } } } } catch (IOException e) { throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e); } finally { index.releaseReader(reader); } Term[] matchingTerms = new Term[termList.size()]; return termList.toArray(matchingTerms); }
From source file:org.opengrok.suggest.query.SuggesterRangeQuery.java
License:Open Source License
/** {@inheritDoc} */ @Override// www.ja v a2 s .co m public TermsEnum getTermsEnumForSuggestions(final Terms terms) { if (terms == null) { return TermsEnum.EMPTY; } BytesRef prefix = getPrefix(); if (prefix != null) { Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix); Automaton finalAutomaton; if (suggestPosition == SuggestPosition.LOWER) { Automaton binaryInt = Automata.makeBinaryInterval(getLowerTerm(), includesLower(), getUpperTerm(), includesUpper()); finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton); } else { Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower()); finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE); } CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton); try { return compiledAutomaton.getTermsEnum(terms); } catch (IOException e) { logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e); } } return TermsEnum.EMPTY; }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use.//from w w w . j a v a 2 s .c o m * @param values The list of values to load. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), null); BytesRef val; while ((val = prefixes.next()) != null) { values.add(val.utf8ToString()); } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use.//from w ww . j av a 2 s . c o m * @param bucket Where to store the terms. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } }
From source file:org.zenoss.zep.index.impl.lucene.LuceneQueryBuilder.java
License:Open Source License
private static Term[] getMatchingTerms(String fieldName, IndexReader reader, String value) throws ZepException { // Don't search for matches if text doesn't contain wildcards if (value.indexOf('*') == -1 && value.indexOf('?') == -1) return new Term[] { new Term(fieldName, value) }; logger.debug("getMatchingTerms: field={}, value={}", fieldName, value); List<Term> matches = new ArrayList<Term>(); Automaton automaton = WildcardQuery.toAutomaton(new Term(fieldName, value)); CompiledAutomaton compiled = new CompiledAutomaton(automaton); try {/*from w w w .j a va2s.c o m*/ Terms terms = SlowCompositeReaderWrapper.wrap(reader).terms(fieldName); TermsEnum wildcardTermEnum = compiled.getTermsEnum(terms); BytesRef match; while (wildcardTermEnum.next() != null) { match = wildcardTermEnum.term(); logger.debug("Match: {}", match); matches.add(new Term(fieldName, match.utf8ToString())); } return matches.toArray(new Term[matches.size()]); } catch (IOException e) { throw new ZepException(e.getLocalizedMessage(), e); } }