List of usage examples for org.apache.lucene.analysis.util TokenizerFactory create
abstract public Tokenizer create(AttributeFactory factory);
From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java
License:Apache License
/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze/* w ww. j ava 2 s . com*/ * @param context The {@link AnalysisContext analysis context}. * * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.tokenStream(context.getFieldName(), value); NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } finally { IOUtils.closeWhileHandlingException(tokenStream); } } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { Reader reader = new StringReader(source); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { for (final AttributeSource tok : tokens) { tok.getAttribute(TokenTrackingAttribute.class).freezeStage(); } tokenStream = tokenFilterFactory.create(listBasedTokenStream); tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); listBasedTokenStream = new ListBasedTokenStream(tokens); } return namedList; }
From source file:org.apache.solr.rest.schema.analysis.FSTSynonymFilterFactory.java
License:Apache License
@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override//from www.jav a 2s. c om protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, analyzer); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
From source file:pl.litwiniuk.rowicki.modsynonyms.ModificatedFSTSynonymFilterFactory.java
License:Apache License
@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer = new Analyzer() { @Override//from ww w. j a v a2 s. co m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader) : factory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { if (format == null || format.equals("solr")) { // TODO: expose dedup as a parameter? map = loadSolrSynonyms(loader, true, analyzer); } else if (format.equals("wordnet")) { map = loadWordnetSynonyms(loader, true, analyzer); } else { // TODO: somehow make this more pluggable throw new IllegalArgumentException("Unrecognized synonyms format: " + format); } } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
From source file:pl.litwiniuk.rowicki.modsynonyms.SlowSynonymFilterFactory.java
License:Apache License
private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader) { return tokFactory.create(reader); }