Java tutorial
package org.apache.maven.index; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import javax.inject.Named; import javax.inject.Singleton; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser.Operator; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.maven.index.context.NexusAnalyzer; import org.apache.maven.index.creator.JarFileContentsIndexCreator; import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator; import org.apache.maven.index.expr.SearchExpression; import org.apache.maven.index.expr.SearchTyped; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A default {@link QueryCreator} constructs Lucene query for provided query text. * <p> * By default wildcards are created such as query text matches beginning of the field value or beginning of the * class/package name segment for {@link ArtifactInfo#NAMES NAMES} field. But it can be controlled by using special * markers: * <ul> * <li>* - any character</li> * <li>'^' - beginning of the text</li> * <li>'$' or '<' or ' ' end of the text</li> * </ul> * For example: * <ul> * <li>junit - matches junit and junit-foo, but not foo-junit</li> * <li>*junit - matches junit, junit-foo and foo-junit</li> * <li>^junit$ - matches junit, but not junit-foo, nor foo-junit</li> * </ul> * * @author Eugene Kuleshov */ @Singleton @Named public class DefaultQueryCreator implements QueryCreator { private final Logger logger = LoggerFactory.getLogger(getClass()); protected Logger getLogger() { return logger; } // == public IndexerField selectIndexerField(final Field field, final SearchType type) { IndexerField lastField = null; for (IndexerField indexerField : field.getIndexerFields()) { lastField = indexerField; if (type.matchesIndexerField(indexerField)) { return indexerField; } } return lastField; } public Query constructQuery(final Field field, final SearchExpression expression) throws ParseException { SearchType searchType = SearchType.SCORED; if (expression instanceof SearchTyped) { searchType = ((SearchTyped) expression).getSearchType(); } return constructQuery(field, expression.getStringValue(), searchType); } public Query constructQuery(final Field field, final String query, final SearchType type) throws ParseException { if (type == null) { throw new NullPointerException("Cannot construct query with type of \"null\"!"); } if (field == null) { throw new NullPointerException("Cannot construct query for field \"null\"!"); } else { return constructQuery(field, selectIndexerField(field, type), query, type); } } @Deprecated public Query constructQuery(String field, String query) { Query result = null; if (MinimalArtifactInfoIndexCreator.FLD_GROUP_ID_KW.getKey().equals(field) || MinimalArtifactInfoIndexCreator.FLD_ARTIFACT_ID_KW.getKey().equals(field) || MinimalArtifactInfoIndexCreator.FLD_VERSION_KW.getKey().equals(field) || JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals(field)) { // these are special untokenized fields, kept for use cases like TreeView is (exact matching). result = legacyConstructQuery(field, query); } else { QueryParser qp = new QueryParser(field, new NexusAnalyzer()); // small cheap trick // if a query is not "expert" (does not contain field:val kind of expression) // but it contains star and/or punctuation chars, example: "common-log*" if (!query.contains(":")) { if (query.contains("*") && query.matches(".*(\\.|-|_).*")) { query = query.toLowerCase().replaceAll("\\*", "X").replaceAll("\\.|-|_", " ").replaceAll("X", "*"); } } try { result = qp.parse(query); } catch (ParseException e) { getLogger().debug("Query parsing with \"legacy\" method, we got ParseException from QueryParser: " + e.getMessage()); result = legacyConstructQuery(field, query); } } if (getLogger().isDebugEnabled()) { getLogger().debug("Query parsed as: " + result.toString()); } return result; } // == public Query constructQuery(final Field field, final IndexerField indexerField, final String query, final SearchType type) throws ParseException { if (indexerField == null) { getLogger().warn("Querying for field \"" + field.toString() + "\" without any indexer field was tried. " + "Please review your code, and consider adding this field to index!"); return null; } if (!indexerField.isIndexed()) { getLogger().warn("Querying for non-indexed field " + field.toString() + " was tried. Please review your code or consider adding this field to index!"); return null; } if (Field.NOT_PRESENT.equals(query)) { return new WildcardQuery(new Term(indexerField.getKey(), "*")); } if (SearchType.EXACT.equals(type)) { if (indexerField.isKeyword()) { // no tokenization should happen against the field! if (query.contains("*") || query.contains("?")) { return new WildcardQuery(new Term(indexerField.getKey(), query)); } else { // exactly what callee wants return new TermQuery(new Term(indexerField.getKey(), query)); } } else if (!indexerField.isKeyword() && indexerField.isStored()) { // TODO: resolve this better! Decouple QueryCreator and IndexCreators! // This is a hack/workaround here if (JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.equals(indexerField)) { if (query.startsWith("/")) { return new TermQuery( new Term(indexerField.getKey(), query.toLowerCase().replaceAll("\\.", "/"))); } else { return new TermQuery( new Term(indexerField.getKey(), "/" + query.toLowerCase().replaceAll("\\.", "/"))); } } else { getLogger().warn(type.toString() + " type of querying for non-keyword (but stored) field " + indexerField.getOntology().toString() + " was tried. Please review your code, or indexCreator involved, " + "since this type of querying of this field is currently unsupported."); // will never succeed (unless we supply him "filter" too, but that would kill performance) // and is possible with stored fields only return null; } } else { getLogger().warn(type.toString() + " type of querying for non-keyword (and not stored) field " + indexerField.getOntology().toString() + " was tried. Please review your code, or indexCreator involved, " + "since this type of querying of this field is impossible."); // not a keyword indexerField, nor stored. No hope at all. Impossible even with "filtering" return null; } } else if (SearchType.SCORED.equals(type)) { if (JarFileContentsIndexCreator.FLD_CLASSNAMES.equals(indexerField)) { String qpQuery = query.toLowerCase().replaceAll("\\.", " ").replaceAll("/", " "); // tokenization should happen against the field! QueryParser qp = new QueryParser(indexerField.getKey(), new NexusAnalyzer()); qp.setDefaultOperator(Operator.AND); return qp.parse(qpQuery); } else if (indexerField.isKeyword()) { // no tokenization should happen against the field! if (query.contains("*") || query.contains("?")) { return new WildcardQuery(new Term(indexerField.getKey(), query)); } else { BooleanQuery bq = new BooleanQuery(); Term t = new Term(indexerField.getKey(), query); bq.add(new TermQuery(t), Occur.SHOULD); PrefixQuery pq = new PrefixQuery(t); pq.setBoost(0.8f); bq.add(pq, Occur.SHOULD); return bq; } } else { // to save "original" query String qpQuery = query; // tokenization should happen against the field! QueryParser qp = new QueryParser(indexerField.getKey(), new NexusAnalyzer()); qp.setDefaultOperator(Operator.AND); // small cheap trick // if a query is not "expert" (does not contain field:val kind of expression) // but it contains star and/or punctuation chars, example: "common-log*" // since Lucene does not support multi-terms WITH wildcards. // So, here, we "mimic" NexusAnalyzer (this should be fixed!) // but do this with PRESERVING original query! if (qpQuery.matches(".*(\\.|-|_|/).*")) { qpQuery = qpQuery.toLowerCase().replaceAll("\\*", "X").replaceAll("\\.|-|_|/", " ") .replaceAll("X", "*").replaceAll(" \\* ", "").replaceAll("^\\* ", "") .replaceAll(" \\*$", ""); } // "fix" it with trailing "*" if not there, but only if it not ends with a space if (!qpQuery.endsWith("*") && !qpQuery.endsWith(" ")) { qpQuery += "*"; } try { // qpQuery = "\"" + qpQuery + "\""; BooleanQuery q1 = new BooleanQuery(); q1.add(qp.parse(qpQuery), Occur.SHOULD); if (qpQuery.contains(" ")) { q1.add(qp.parse("\"" + qpQuery + "\""), Occur.SHOULD); } Query q2 = null; int termCount = countTerms(indexerField, query); // try with KW only if the processed query in qpQuery does not have spaces! if (!query.contains(" ") && termCount > 1) { // get the KW field IndexerField keywordField = selectIndexerField(indexerField.getOntology(), SearchType.EXACT); if (keywordField.isKeyword()) { q2 = constructQuery(indexerField.getOntology(), keywordField, query, type); } } if (q2 == null) { return q1; } else { BooleanQuery bq = new BooleanQuery(); // trick with order bq.add(q2, Occur.SHOULD); bq.add(q1, Occur.SHOULD); return bq; } } catch (ParseException e) { // TODO: we are not falling back anymore to legacy! throw e; // getLogger().debug( // "Query parsing with \"legacy\" method, we got ParseException from QueryParser: " // + e.getMessage() ); // // return legacyConstructQuery( indexerField.getKey(), query ); } } } else { // what search type is this? return null; } } public Query legacyConstructQuery(String field, String query) { if (query == null || query.length() == 0) { getLogger().info("Empty or null query for field:" + field); return null; } String q = query.toLowerCase(); char h = query.charAt(0); if (JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.getKey().equals(field) || JarFileContentsIndexCreator.FLD_CLASSNAMES.getKey().equals(field)) { q = q.replaceAll("\\.", "/"); if (h == '^') { q = q.substring(1); if (q.charAt(0) != '/') { q = '/' + q; } } else if (h != '*') { q = "*/" + q; } } else { if (h == '^') { q = q.substring(1); } else if (h != '*') { q = "*" + q; } } int l = q.length() - 1; char c = q.charAt(l); if (c == ' ' || c == '<' || c == '$') { q = q.substring(0, q.length() - 1); } else if (c != '*') { q += "*"; } int n = q.indexOf('*'); if (n == -1) { return new TermQuery(new Term(field, q)); } else if (n > 0 && n == q.length() - 1) { return new PrefixQuery(new Term(field, q.substring(0, q.length() - 1))); } return new WildcardQuery(new Term(field, q)); } // == private NexusAnalyzer nexusAnalyzer = new NexusAnalyzer(); protected int countTerms(final IndexerField indexerField, final String query) { try { TokenStream ts = nexusAnalyzer.tokenStream(indexerField.getKey(), new StringReader(query)); ts.reset(); int result = 0; while (ts.incrementToken()) { result++; } ts.end(); ts.close(); return result; } catch (IOException e) { // will not happen return 1; } } }