Example usage for org.apache.lucene.analysis.pattern PatternReplaceCharFilter PatternReplaceCharFilter

List of usage examples for org.apache.lucene.analysis.pattern PatternReplaceCharFilter PatternReplaceCharFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.pattern PatternReplaceCharFilter PatternReplaceCharFilter.

Prototype

public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in) 

Source Link

Usage

From source file:it.cnr.isti.hpc.dexter.analysis.DexterAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    CharFilter cf = new PatternReplaceCharFilter(Pattern.compile("[*-!`{}~[]='<>:/;.&%|=+_]"), "", reader);

    cf = new HTMLStripCharFilter(cf);

    final StandardTokenizer analyzer = new StandardTokenizer(Version.LUCENE_41, cf);
    TokenStream tok = new StandardFilter(Version.LUCENE_41, analyzer);
    tok = new LowerCaseFilter(Version.LUCENE_41, tok);
    tok = new ASCIIFoldingFilter(tok);
    if (shingles) {
        tok = new ShingleFilter(tok, 5);
    }/*from  w ww  .j a va 2  s .  co m*/
    return new TokenStreamComponents(analyzer, tok) {
        @Override
        protected void setReader(final Reader reader) throws IOException {
            super.setReader(reader);
        }
    };
}

From source file:it.cnr.isti.hpc.dexter.analysis.SpotAnalyzer.java

License:Apache License

@Override
protected Reader initReader(String fieldName, Reader reader) {
    CharFilter cf = new PatternReplaceCharFilter(Pattern.compile("^[ ]*the +(.*)"), "$1", reader);

    cf = new PatternReplaceCharFilter(Pattern.compile("[*!`{}~='<>:/%|=+_]"), " ", cf);

    cf = new PatternReplaceCharFilter(Pattern.compile("^[ ]*a +(.*)"), "$1", cf);
    cf = new PatternReplaceCharFilter(Pattern.compile("^(.*) \\(.*\\)$"), "$1", cf);

    cf = new PatternReplaceCharFilter(Pattern.compile("^(.*)#.*$"), "$1", cf);

    cf = new PatternReplaceCharFilter(Pattern.compile("[, ]*[sjSJ][rR][.]?"), "", cf);

    cf = new PatternReplaceCharFilter(Pattern.compile(" ([A-Z][.] ?)+"), " ", cf);
    cf = new PatternReplaceCharFilter(Pattern.compile("^([A-Z][.] ?)+ "), " ", cf);
    cf = new PatternReplaceCharFilter(Pattern.compile(" [A-Z][.]$"), " ", cf);

    cf = new HTMLStripCharFilter(reader);
    return cf;/* w w w. j  a  v a  2s .  c  om*/

}

From source file:org.elasticsearch.analysis.common.PatternReplaceCharFilterFactory.java

License:Apache License

@Override
public Reader create(Reader tokenStream) {
    return new PatternReplaceCharFilter(pattern, replacement, tokenStream);
}