List of usage examples for org.apache.lucene.util.automaton RegExp ALL
int ALL
To view the source code for org.apache.lucene.util.automaton RegExp ALL.
Click Source Link
From source file:com.qwazr.search.query.RegexpQuery.java
License:Apache License
@Override final public Query getQuery(QueryContext queryContext) throws IOException { return new org.apache.lucene.search.RegexpQuery(new Term(field, text), flags == null ? RegExp.ALL : flags, max_determinized_states == null ? Operations.DEFAULT_MAX_DETERMINIZED_STATES : max_determinized_states); }
From source file:com.sindicetech.siren.search.node.NodeRegexpQuery.java
License:Open Source License
/** * Constructs a query for terms matching <code>term</code>. * <p>// w ww . j av a 2s. c o m * By default, all regular expression features are enabled. * </p> * * @param term regular expression. */ public NodeRegexpQuery(final Term term) { this(term, RegExp.ALL); }
From source file:com.sindicetech.siren.search.node.TestNodeRegexpQuery.java
License:Open Source License
public void testCustomProvider() throws IOException { final AutomatonProvider myProvider = new AutomatonProvider() { // automaton that matches quick or brown private final Automaton quickBrownAutomaton = BasicOperations .union(Arrays.asList(BasicAutomata.makeString("quick"), BasicAutomata.makeString("brown"), BasicAutomata.makeString("bob"))); public Automaton getAutomaton(final String name) { if (name.equals("quickBrown")) return quickBrownAutomaton; else/* ww w .j av a2 s . com*/ return null; } }; final NodeRegexpQuery query = new NodeRegexpQuery(this.newTerm("<quickBrown>"), RegExp.ALL, myProvider); assertEquals(1, searcher.search(dq(query), 5).totalHits); }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
/** * returns collection of docIds of the Lucene docs that hit, at least * threshold times./*from w w w . j a v a2 s .c o m*/ * warning! only looks up body field, no others * Caution: This code is not to be touched, unless something is being optimised * Introducing something here can seriously affect the search times. */ private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold, IndexSearcher searcher, QueryType qt, int lt) throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException { Collection<String> result = new ArrayList<String>(); // String escaped_q = escapeRegex(q); // to mimic built-in regex support //TODO: There should also be a general query type that takes any query with field param, i.e. without parser Query query; if (qt == QueryType.ORIGINAL) query = parserOriginal.parse(q); else if (qt == QueryType.SUBJECT) query = parserSubject.parse(q); else if (qt == QueryType.CORRESPONDENTS) query = parserCorrespondents.parse(q); else if (qt == QueryType.REGEX) { query = new BooleanQuery(); /** * Note: this is not a spanning (i.e. doesn't search over more than * one token) regexp, for spanning regexp use: body_unanlyzed and * title_unanlyzed fields instead */ Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } else /* if (qt == QueryType.PRESET_REGEX) { query = new BooleanQuery(); if(presetQueries != null) { for (String pq : presetQueries) { Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } log.info("Doing a preset regex search"); }else{ log.warn("Preset queries is not initialised"); } } else */ if (qt == QueryType.META) { query = parserMeta.parse(q); } else query = parser.parse(q); // query = convertRegex(query); long st = System.currentTimeMillis(); int totalHits = 0; ScoreDoc[] hits = null; if (query != null) { TopDocs tds = searcher.search(query, null, lt); log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query); hits = tds.scoreDocs; totalHits = tds.totalHits; } else { log.error("Query is null!!"); } // this logging causes a 50% overhead on the query -- maybe enable it only for debugging // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :"); // Iterate through the results: // TODO: not very pretty code here to determine dir_name which selects the cache to use Util.softAssert(searcher == isearcher || searcher == isearcher_blob); String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS; Map<Integer, String> map = dirNameToDocIdMap.get(dir_name); if (map == null) { map = new LinkedHashMap<Integer, String>(); dirNameToDocIdMap.put(dir_name, map); log.info("Adding new entry for dir name to docIdMap"); } else { log.info("Existing entry for dir name to docIdMap"); } int n_added = 0; log.info("Found: " + hits.length + " hits for query: " + q); for (int i = 0; i < hits.length; i++) { int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id. String docId = null; // this will be our doc id // try to use the new fieldcache id's // if this works, we can get rid of the dirNameToDocIdMap try { docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId); } catch (Exception e) { Util.print_exception(e, log); continue; } if (threshold <= 1) { // common case: threshold is 1. result.add(docId); n_added++; } else { // more expensive, do it only if threshold is > 1 Explanation expl = searcher.explain(query, hits[i].doc); Explanation[] details = expl.getDetails(); // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query. // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation. // not sure why, but is somewhat corroborated by the code: // http://massapi.com/class/ex/Explanation.html // showing a single hit creating both a C.E and an E. // a more robust approach might be to look for the summary to end with product of: , sum of: etc. // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706 // but for now, we'll count only the number of ComplexExplanation and check if its above threshold // log.info("doc id " + hits[i].toString() + " #details = " + details.length); // HORRIBLE HACK! - because we don't know a better way to find the threshold outer: for (Explanation detail : details) { // log.info(detail.getClass().getName()); if (detail instanceof ComplexExplanation) { ComplexExplanation ce = (ComplexExplanation) detail; String s = ce.toString(); int total_tf = 0; while (true) { int idx = s.indexOf("tf(termFreq("); if (idx < 0) break outer; s = s.substring(idx); idx = s.indexOf("="); if (idx < 0) break outer; s = s.substring(idx + 1); int idx1 = s.indexOf(")"); if (idx < 0) break outer; String num_str = s.substring(0, idx1); int num = 0; try { num = Integer.parseInt(num_str); } catch (Exception e) { log.warn("ERROR parsing complex expl: " + num_str); } total_tf += num; if (total_tf >= threshold) { result.add(docId); n_added++; break outer; } } } } } } log.info(n_added + " docs added to docIdMap cache"); return new Pair<Collection<String>, Integer>(result, totalHits); }
From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java
License:Apache License
private Query getRegexpQuerySingle(String field, String termStr) throws ParseException { currentFieldType = null;/*from w ww. jav a 2 s. c o m*/ Analyzer oldAnalyzer = getAnalyzer(); try { currentFieldType = parseContext.fieldMapper(field); if (currentFieldType != null) { if (!forcedAnalyzer) { setAnalyzer(parseContext.getSearchAnalyzer(currentFieldType)); } Query query = null; if (currentFieldType.useTermQueryWithQueryString()) { query = currentFieldType.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates, multiTermRewriteMethod, parseContext); } if (query == null) { query = super.getRegexpQuery(field, termStr); } return query; } return super.getRegexpQuery(field, termStr); } catch (RuntimeException e) { if (settings.lenient()) { return null; } throw e; } finally { setAnalyzer(oldAnalyzer); } }
From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.QueryParserBase.java
License:Apache License
/** * Builds a new RegexpQuery instance//from w w w. j a va 2 s .co m * @param regexp Regexp term * @return new RegexpQuery instance */ protected Query newRegexpQuery(Term regexp) { RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, maxDeterminizedStates); query.setRewriteMethod(multiTermRewriteMethod); return query; }
From source file:org.codelibs.elasticsearch.index.query.RegexpFlag.java
License:Apache License
/** * Resolves the combined OR'ed value for the given list of regular expression flags. The given flags must follow the * following syntax://www . j ava 2 s . c o m * <p> * <tt>flag_name</tt>(|<tt>flag_name</tt>)* * <p> * Where <tt>flag_name</tt> is one of the following: * <ul> * <li>INTERSECTION</li> * <li>COMPLEMENT</li> * <li>EMPTY</li> * <li>ANYSTRING</li> * <li>INTERVAL</li> * <li>NONE</li> * <li>ALL</li> * </ul> * <p> * Example: <tt>INTERSECTION|COMPLEMENT|EMPTY</tt> * * @param flags A string representing a list of regular expression flags * @return The combined OR'ed value for all the flags */ public static int resolveValue(String flags) { if (flags == null || flags.isEmpty()) { return RegExp.ALL; } int magic = RegExp.NONE; for (String s : Strings.delimitedListToStringArray(flags, "|")) { if (s.isEmpty()) { continue; } try { RegexpFlag flag = RegexpFlag.valueOf(s.toUpperCase(Locale.ROOT)); if (flag == RegexpFlag.NONE) { continue; } if (flag == RegexpFlag.ALL) { return flag.value(); } magic |= flag.value(); } catch (IllegalArgumentException iae) { throw new IllegalArgumentException("Unknown regexp flag [" + s + "]"); } } return magic; }
From source file:org.elasticsearch.common.lucene.search.RegexpFilter.java
License:Apache License
public RegexpFilter(Term term) { this(term, RegExp.ALL); }
From source file:org.elasticsearch.index.mapper.completion.CompletionFieldMapperTests.java
License:Apache License
public void testRegexQueryType() throws Exception { String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("completion").field("type", "completion").endObject().endObject().endObject() .endObject().string();/*from w ww . jav a 2 s. c o m*/ DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping); FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; Query prefixQuery = completionFieldMapper.fieldType().regexpQuery(new BytesRef("co"), RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES); assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class)); }
From source file:org.elasticsearch.index.mapper.CompletionFieldMapperTests.java
License:Apache License
public void testRegexQueryType() throws Exception { String mapping = jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("completion").field("type", "completion").endObject().endObject().endObject() .endObject().string();//from w w w . ja va 2 s.c om DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser().parse("type1", new CompressedXContent(mapping)); FieldMapper fieldMapper = defaultMapper.mappers().getMapper("completion"); CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; Query prefixQuery = completionFieldMapper.fieldType().regexpQuery(new BytesRef("co"), RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES); assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class)); }