List of usage examples for org.apache.lucene.analysis.util TokenFilterFactory create
public abstract TokenStream create(TokenStream input);
From source file:com.qwazr.search.analysis.CustomAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = tokenizerFactory.create(); if (tokenFilterFactories == null) return new TokenStreamComponents(source); TokenStream result = source;/*from ww w. ja v a2 s . c om*/ if (tokenFilterFactories != null) for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) result = tokenFilterFactory.create(result); return new TokenStreamComponents(source, result); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tk = tokenizer.create(reader); TokenStream ts = tk;/*from www . ja v a 2s .c o m*/ for (TokenFilterFactory filter : filters) { ts = filter.create(ts); } return new TokenStreamComponents(tk, ts); }
From source file:org.apache.solr.analysis.TokenizerChain.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName, Reader aReader) { Tokenizer tk = tokenizer.create(aReader); TokenStream ts = tk;//from ww w . java 2s.c o m for (TokenFilterFactory filter : filters) { ts = filter.create(ts); } return new TokenStreamComponents(tk, ts); }
From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java
License:Apache License
/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze/* w ww. j a v a2s . c om*/ * @param context The {@link AnalysisContext analysis context}. * * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.tokenStream(context.getFieldName(), value); NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } finally { IOUtils.closeWhileHandlingException(tokenStream); } } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { Reader reader = new StringReader(source); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { for (final AttributeSource tok : tokens) { tok.getAttribute(TokenTrackingAttribute.class).freezeStage(); } tokenStream = tokenFilterFactory.create(listBasedTokenStream); tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); listBasedTokenStream = new ListBasedTokenStream(tokens); } return namedList; }
From source file:org.elasticsearch.plugin.ingest.kuromoji_part_of_speech_extract.KuromojiPartOfSpeechExtractProcessor.java
License:Apache License
private Analyzer loadAnalyzer(List<String> posTags) { Map<String, String> tokenizerOptions = new HashMap<>(); tokenizerOptions.put("mode", JapaneseTokenizer.Mode.NORMAL.toString()); TokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(tokenizerOptions); TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[1]; tokenFilterFactories[0] = new JapanesePartOfSpeechKeepFilterFactory(new HashMap<>(), this.posTags); Analyzer analyzer = new Analyzer() { @Override/* w w w . ja v a2 s.co m*/ protected TokenStreamComponents createComponents(String s) { Tokenizer tokenizer = tokenizerFactory.create(); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { tokenStream = tokenFilterFactory.create(tokenStream); } return new TokenStreamComponents(tokenizer, tokenStream); } }; return analyzer; }
From source file:org.hibernate.search.engine.impl.TokenizerChain.java
License:LGPL
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tk = tokenizer.create();//from w ww . j a v a 2 s .com TokenStream ts = tk; for (TokenFilterFactory filter : filters) { ts = filter.create(ts); } return new TokenStreamComponents(tk, ts); }
From source file:org.tallison.gramreaper.ingest.schema.MyTokenizerChain.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tk = tokenizer.create();/* w w w . ja va 2 s . co m*/ TokenStream ts = tk; for (TokenFilterFactory filter : filters) { ts = filter.create(ts); } return new TokenStreamComponents(tk, ts); }