List of usage examples for org.apache.lucene.search RegexpQuery RegexpQuery
public RegexpQuery(Term term, int flags)
term. From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
/** * returns collection of docIds of the Lucene docs that hit, at least * threshold times.//from ww w . j a va2 s . c o m * warning! only looks up body field, no others * Caution: This code is not to be touched, unless something is being optimised * Introducing something here can seriously affect the search times. */ private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold, IndexSearcher searcher, QueryType qt, int lt) throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException { Collection<String> result = new ArrayList<String>(); // String escaped_q = escapeRegex(q); // to mimic built-in regex support //TODO: There should also be a general query type that takes any query with field param, i.e. without parser Query query; if (qt == QueryType.ORIGINAL) query = parserOriginal.parse(q); else if (qt == QueryType.SUBJECT) query = parserSubject.parse(q); else if (qt == QueryType.CORRESPONDENTS) query = parserCorrespondents.parse(q); else if (qt == QueryType.REGEX) { query = new BooleanQuery(); /** * Note: this is not a spanning (i.e. doesn't search over more than * one token) regexp, for spanning regexp use: body_unanlyzed and * title_unanlyzed fields instead */ Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } else /* if (qt == QueryType.PRESET_REGEX) { query = new BooleanQuery(); if(presetQueries != null) { for (String pq : presetQueries) { Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } log.info("Doing a preset regex search"); }else{ log.warn("Preset queries is not initialised"); } } else */ if (qt == QueryType.META) { query = parserMeta.parse(q); } else query = parser.parse(q); // query = convertRegex(query); long st = System.currentTimeMillis(); int totalHits = 0; ScoreDoc[] hits = null; if (query != null) { TopDocs tds = searcher.search(query, null, lt); log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query); hits = tds.scoreDocs; totalHits = tds.totalHits; } else { log.error("Query is null!!"); } // this logging causes a 50% overhead on the query -- maybe enable it only for debugging // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :"); // Iterate through the results: // TODO: not very pretty code here to determine dir_name which selects the cache to use Util.softAssert(searcher == isearcher || searcher == isearcher_blob); String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS; Map<Integer, String> map = dirNameToDocIdMap.get(dir_name); if (map == null) { map = new LinkedHashMap<Integer, String>(); dirNameToDocIdMap.put(dir_name, map); log.info("Adding new entry for dir name to docIdMap"); } else { log.info("Existing entry for dir name to docIdMap"); } int n_added = 0; log.info("Found: " + hits.length + " hits for query: " + q); for (int i = 0; i < hits.length; i++) { int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id. String docId = null; // this will be our doc id // try to use the new fieldcache id's // if this works, we can get rid of the dirNameToDocIdMap try { docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId); } catch (Exception e) { Util.print_exception(e, log); continue; } if (threshold <= 1) { // common case: threshold is 1. result.add(docId); n_added++; } else { // more expensive, do it only if threshold is > 1 Explanation expl = searcher.explain(query, hits[i].doc); Explanation[] details = expl.getDetails(); // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query. // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation. // not sure why, but is somewhat corroborated by the code: // http://massapi.com/class/ex/Explanation.html // showing a single hit creating both a C.E and an E. // a more robust approach might be to look for the summary to end with product of: , sum of: etc. // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706 // but for now, we'll count only the number of ComplexExplanation and check if its above threshold // log.info("doc id " + hits[i].toString() + " #details = " + details.length); // HORRIBLE HACK! - because we don't know a better way to find the threshold outer: for (Explanation detail : details) { // log.info(detail.getClass().getName()); if (detail instanceof ComplexExplanation) { ComplexExplanation ce = (ComplexExplanation) detail; String s = ce.toString(); int total_tf = 0; while (true) { int idx = s.indexOf("tf(termFreq("); if (idx < 0) break outer; s = s.substring(idx); idx = s.indexOf("="); if (idx < 0) break outer; s = s.substring(idx + 1); int idx1 = s.indexOf(")"); if (idx < 0) break outer; String num_str = s.substring(0, idx1); int num = 0; try { num = Integer.parseInt(num_str); } catch (Exception e) { log.warn("ERROR parsing complex expl: " + num_str); } total_tf += num; if (total_tf >= threshold) { result.add(docId); n_added++; break outer; } } } } } } log.info(n_added + " docs added to docIdMap cache"); return new Pair<Collection<String>, Integer>(result, totalHits); }
From source file:org.elasticsearch.index.mapper.core.AbstractFieldMapper.java
License:Apache License
@Override public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { RegexpQuery query = new RegexpQuery(names().createIndexNameTerm(indexedValueForSearch(value)), flags); if (method != null) { query.setRewriteMethod(method);//from w ww .j ava2 s. c o m } return query; }
From source file:org.elasticsearch.index.mapper.internal.IdFieldMapper.java
License:Apache License
@Override public Query regexpQuery(Object value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { if (fieldType.indexed() || context == null) { return super.regexpQuery(value, flags, method, context); }/*from w ww .ja v a2s . com*/ Collection<String> queryTypes = context.queryTypes(); if (queryTypes.size() == 1) { RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(Iterables.getFirst(queryTypes, null), BytesRefs.toBytesRef(value))), flags); if (method != null) { regexpQuery.setRewriteMethod(method); } return regexpQuery; } BooleanQuery query = new BooleanQuery(); for (String queryType : queryTypes) { RegexpQuery regexpQuery = new RegexpQuery( new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(queryType, BytesRefs.toBytesRef(value))), flags); if (method != null) { regexpQuery.setRewriteMethod(method); } query.add(regexpQuery, BooleanClause.Occur.SHOULD); } return query; }
From source file:org.elasticsearch.index.query.RegexpQueryParser.java
License:Apache License
@Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); XContentParser.Token token = parser.nextToken(); if (token != XContentParser.Token.FIELD_NAME) { throw new QueryParsingException(parseContext.index(), "[regexp] query malformed, no field"); }/*ww w . j a va 2s .co m*/ String fieldName = parser.currentName(); String rewriteMethod = null; Object value = null; float boost = 1.0f; int flagsValue = -1; String queryName = null; token = parser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { String currentFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { if ("value".equals(currentFieldName)) { value = parser.objectBytes(); } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } else if ("rewrite".equals(currentFieldName)) { rewriteMethod = parser.textOrNull(); } else if ("flags".equals(currentFieldName)) { String flags = parser.textOrNull(); flagsValue = RegexpFlag.resolveValue(flags); } else if ("flags_value".equals(currentFieldName)) { flagsValue = parser.intValue(); if (flagsValue < 0) { flagsValue = RegExp.ALL; } } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); } } else { throw new QueryParsingException(parseContext.index(), "[regexp] query does not support [" + currentFieldName + "]"); } } parser.nextToken(); } else { value = parser.objectBytes(); parser.nextToken(); } if (value == null) { throw new QueryParsingException(parseContext.index(), "No value specified for regexp query"); } MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null); Query query = null; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) { String[] previousTypes = QueryParseContext .setTypesWithPrevious(new String[] { smartNameFieldMappers.docMapper().type() }); try { query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext); } finally { QueryParseContext.setTypes(previousTypes); } } else { query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext); } } if (query == null) { RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue); if (method != null) { regexpQuery.setRewriteMethod(method); } query = regexpQuery; } query.setBoost(boost); query = wrapSmartNameQuery(query, smartNameFieldMappers, parseContext); if (queryName != null) { parseContext.addNamedQuery(queryName, query); } return query; }
From source file:org.kie.workbench.common.services.refactoring.backend.server.query.builder.AbstractQueryBuilder.java
License:Apache License
public Query getQuery(ValueIndexTerm valueTerm) { final String text = getText(valueTerm); Term term = new Term(valueTerm.getTerm(), text); Query termQuery;//w w w . jav a2 s .c o m switch (valueTerm.getSearchType()) { case PREFIX: termQuery = new PrefixQuery(term); break; case WILDCARD: termQuery = new WildcardQuery(term); break; case REGEXP: termQuery = new RegexpQuery(term, RegExp.NONE); // NONE until there's a specific reason to use extend regex syntax break; case NORMAL: termQuery = new TermQuery(term); break; default: throw new UnsupportedOperationException(ValueIndexTerm.TermSearchType.class.getSimpleName() + " value " + valueTerm.getSearchType().toString() + " is unsupported!"); } return termQuery; }
From source file:org.modeshape.jcr.index.lucene.query.LuceneQueryFactory.java
License:Apache License
protected Query pathFieldQuery(String field, Operator operator, Object value, CaseOperation caseOperation) { Path path = null;/* w w w. j a v a2s.c o m*/ if (operator != Operator.LIKE) { path = !(value instanceof Path) ? pathFactory.create(value) : (Path) value; } if (caseOperation == null) { caseOperation = CaseOperations.AS_IS; } switch (operator) { case EQUAL_TO: return CompareStringQuery.createQueryForNodesWithFieldEqualTo(stringFactory.create(path), field, factories, caseOperation); case NOT_EQUAL_TO: return not(CompareStringQuery.createQueryForNodesWithFieldEqualTo(stringFactory.create(path), field, factories, caseOperation)); case LIKE: String likeExpression = stringFactory.create(value); // the paths are stored in the index via stringFactory.create, which doesn't have the "1" index for SNS... likeExpression = likeExpression.replaceAll("\\[1\\]", ""); if (likeExpression.contains("[%]")) { // We can't use '[%]' because we only want to match digits, // so handle this using a regex ... // !!! LUCENE Regexp is not the same as Java's. See the javadoc RegExp String regex = likeExpression; regex = regex.replace("[%]", "(\\[[0-9]+\\])?"); regex = regex.replaceAll("\\[\\d+\\]", "\\[[0-9]+\\]"); //regex = regex.replace("]", "\\]"); regex = regex.replace("*", ".*"); regex = regex.replace("%", ".*").replace("_", "."); // Now create a regex query ... int flags = caseOperation == CaseOperations.AS_IS ? 0 : Pattern.CASE_INSENSITIVE; return new RegexpQuery(new Term(field, regex), flags); } else { return CompareStringQuery.createQueryForNodesWithFieldLike(likeExpression, field, factories, caseOperation); } case GREATER_THAN: return ComparePathQuery.createQueryForNodesWithPathGreaterThan(path, field, factories, caseOperation); case GREATER_THAN_OR_EQUAL_TO: return ComparePathQuery.createQueryForNodesWithPathGreaterThanOrEqualTo(path, field, factories, caseOperation); case LESS_THAN: return ComparePathQuery.createQueryForNodesWithPathLessThan(path, field, factories, caseOperation); case LESS_THAN_OR_EQUAL_TO: return ComparePathQuery.createQueryForNodesWithPathLessThanOrEqualTo(path, field, factories, caseOperation); default: { throw new IllegalArgumentException("Unknown operator:" + operator); } } }