List of usage examples for org.apache.lucene.analysis.pattern PatternReplaceCharFilter PatternReplaceCharFilter
public PatternReplaceCharFilter(Pattern pattern, String replacement, Reader in)
From source file:it.cnr.isti.hpc.dexter.analysis.DexterAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { CharFilter cf = new PatternReplaceCharFilter(Pattern.compile("[*-!`{}~[]='<>:/;.&%|=+_]"), "", reader); cf = new HTMLStripCharFilter(cf); final StandardTokenizer analyzer = new StandardTokenizer(Version.LUCENE_41, cf); TokenStream tok = new StandardFilter(Version.LUCENE_41, analyzer); tok = new LowerCaseFilter(Version.LUCENE_41, tok); tok = new ASCIIFoldingFilter(tok); if (shingles) { tok = new ShingleFilter(tok, 5); }/*from w ww .j a va 2 s . co m*/ return new TokenStreamComponents(analyzer, tok) { @Override protected void setReader(final Reader reader) throws IOException { super.setReader(reader); } }; }
From source file:it.cnr.isti.hpc.dexter.analysis.SpotAnalyzer.java
License:Apache License
@Override protected Reader initReader(String fieldName, Reader reader) { CharFilter cf = new PatternReplaceCharFilter(Pattern.compile("^[ ]*the +(.*)"), "$1", reader); cf = new PatternReplaceCharFilter(Pattern.compile("[*!`{}~='<>:/%|=+_]"), " ", cf); cf = new PatternReplaceCharFilter(Pattern.compile("^[ ]*a +(.*)"), "$1", cf); cf = new PatternReplaceCharFilter(Pattern.compile("^(.*) \\(.*\\)$"), "$1", cf); cf = new PatternReplaceCharFilter(Pattern.compile("^(.*)#.*$"), "$1", cf); cf = new PatternReplaceCharFilter(Pattern.compile("[, ]*[sjSJ][rR][.]?"), "", cf); cf = new PatternReplaceCharFilter(Pattern.compile(" ([A-Z][.] ?)+"), " ", cf); cf = new PatternReplaceCharFilter(Pattern.compile("^([A-Z][.] ?)+ "), " ", cf); cf = new PatternReplaceCharFilter(Pattern.compile(" [A-Z][.]$"), " ", cf); cf = new HTMLStripCharFilter(reader); return cf;/* w w w. j a v a 2s . c om*/ }
From source file:org.elasticsearch.analysis.common.PatternReplaceCharFilterFactory.java
License:Apache License
@Override public Reader create(Reader tokenStream) { return new PatternReplaceCharFilter(pattern, replacement, tokenStream); }