Example usage for org.apache.lucene.analysis Analyzer normalize

List of usage examples for org.apache.lucene.analysis Analyzer normalize

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Analyzer normalize.

Prototype

public final BytesRef normalize(final String fieldName, final String text) 

Source Link

Document

Normalize a string down to the representation that it would have in the index.

Usage

From source file:org.elasticsearch.index.search.QueryStringQueryParser.java

License:Apache License

private Query getRangeQuerySingle(String field, String part1, String part2, boolean startInclusive,
        boolean endInclusive, QueryShardContext context) {
    currentFieldType = context.fieldMapper(field);
    if (currentFieldType == null) {
        return newUnmappedFieldQuery(field);
    }//from  www .  j a  va 2 s .  co  m
    try {
        Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType)
                : forceAnalyzer;
        BytesRef part1Binary = part1 == null ? null : normalizer.normalize(field, part1);
        BytesRef part2Binary = part2 == null ? null : normalizer.normalize(field, part2);
        Query rangeQuery = currentFieldType.rangeQuery(part1Binary, part2Binary, startInclusive, endInclusive,
                null, timeZone, null, context);
        return rangeQuery;
    } catch (RuntimeException e) {
        if (lenient) {
            return newLenientFieldQuery(field, e);
        }
        throw e;
    }
}

From source file:org.elasticsearch.index.search.QueryStringQueryParser.java

License:Apache License

private Query getFuzzyQuerySingle(String field, String termStr, float minSimilarity) throws ParseException {
    currentFieldType = context.fieldMapper(field);
    if (currentFieldType == null) {
        return newUnmappedFieldQuery(field);
    }/*ww w.j a  v  a 2  s .  c  o  m*/
    try {
        Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType)
                : forceAnalyzer;
        BytesRef term = termStr == null ? null : normalizer.normalize(field, termStr);
        return currentFieldType.fuzzyQuery(term, Fuzziness.fromEdits((int) minSimilarity),
                getFuzzyPrefixLength(), fuzzyMaxExpansions, fuzzyTranspositions);
    } catch (RuntimeException e) {
        if (lenient) {
            return newLenientFieldQuery(field, e);
        }
        throw e;
    }
}

From source file:org.elasticsearch.index.search.SimpleQueryStringQueryParser.java

License:Apache License

/**
 * Analyze the given string using its analyzer, constructing either a
 * {@code PrefixQuery} or a {@code BooleanQuery} made up
 * of {@code TermQuery}s and {@code PrefixQuery}s
 *//*from  w  w w .ja v  a  2 s  .c  om*/
private Query newPossiblyAnalyzedQuery(String field, String termStr, Analyzer analyzer) {
    List<List<BytesRef>> tlist = new ArrayList<>();
    try (TokenStream source = analyzer.tokenStream(field, termStr)) {
        source.reset();
        List<BytesRef> currentPos = new ArrayList<>();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);

        try {
            boolean hasMoreTokens = source.incrementToken();
            while (hasMoreTokens) {
                if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
                    tlist.add(currentPos);
                    currentPos = new ArrayList<>();
                }
                final BytesRef term = analyzer.normalize(field, termAtt.toString());
                currentPos.add(term);
                hasMoreTokens = source.incrementToken();
            }
            if (currentPos.isEmpty() == false) {
                tlist.add(currentPos);
            }
        } catch (IOException e) {
            // ignore
            // TODO: we should not ignore the exception and return a prefix query with the original term ?
        }
    } catch (IOException e) {
        // Bail on any exceptions, going with a regular prefix query
        return new PrefixQuery(new Term(field, termStr));
    }

    if (tlist.size() == 0) {
        return null;
    }

    if (tlist.size() == 1 && tlist.get(0).size() == 1) {
        return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
    }

    // build a boolean query with prefix on the last position only.
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (int pos = 0; pos < tlist.size(); pos++) {
        List<BytesRef> plist = tlist.get(pos);
        boolean isLastPos = (pos == tlist.size() - 1);
        Query posQuery;
        if (plist.size() == 1) {
            if (isLastPos) {
                posQuery = new PrefixQuery(new Term(field, plist.get(0)));
            } else {
                posQuery = newTermQuery(new Term(field, plist.get(0)));
            }
        } else if (isLastPos == false) {
            // build a synonym query for terms in the same position.
            Term[] terms = new Term[plist.size()];
            for (int i = 0; i < plist.size(); i++) {
                terms[i] = new Term(field, plist.get(i));
            }
            posQuery = new SynonymQuery(terms);
        } else {
            BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
            for (BytesRef token : plist) {
                innerBuilder.add(
                        new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
            }
            posQuery = innerBuilder.build();
        }
        builder.add(new BooleanClause(posQuery, getDefaultOperator()));
    }
    return builder.build();
}

From source file:org.tallison.solr.search.SolrSpanQueryParser.java

License:Apache License

/**
 *
 * @param fieldName//from   w  ww .j  a v  a2 s .c  o  m
 * @param term
 * @return
 */
@Override
protected BytesRef normalizeMultiTerm(String fieldName, String term) {

    Analyzer multiTermAnalyzer = getMultiTermAnalyzer(fieldName);
    if (multiTermAnalyzer == null) {
        return new BytesRef(term);
    } else {
        return multiTermAnalyzer.normalize(fieldName, term);
    }
}