List of usage examples for org.apache.lucene.analysis Analyzer Analyzer
public Analyzer()
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(testFile)); final SynonymMap map = parser.build(); Analyzer analyzer = new Analyzer() { @Override//from www . j av a 2 s . c o m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)); } }; directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("two three one"); iw.addDocument(doc); field.setStringValue("three one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:com.basistech.IndexFiles.java
License:Open Source License
static Analyzer setupAnalyzer(final boolean filters) { Map<String, String> tokenizerFactoryArgs = Maps.newHashMap(); tokenizerFactoryArgs.put("rlpContext", "rlp-context.xml"); tokenizerFactoryArgs.put("lang", "kor"); tokenizerFactoryArgs.put("postLemma", "true"); tokenizerFactoryArgs.put("postCompoundComponents", "true"); tokenizerFactoryArgs.put("postPartOfSpeech", "true"); final RLPTokenizerFactory tokenizerFactory = new RLPTokenizerFactory(tokenizerFactoryArgs); Map<String, String> emptyOptions = Maps.newHashMap(); final HTMLStripCharFilterFactory charFilterFactory = new HTMLStripCharFilterFactory(emptyOptions); final ICUFoldingFilterFactory foldingFilterFactory = new ICUFoldingFilterFactory(emptyOptions); final ReversedWildcardFilterFactory reversedWildcardFilterFactory = new ReversedWildcardFilterFactory( emptyOptions);//from w w w .j a v a 2 s .co m final RemoveDuplicatesTokenFilterFactory removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory( emptyOptions); return new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = tokenizerFactory.create(reader); if (filters) { TokenStream filter = foldingFilterFactory.create(source); filter = reversedWildcardFilterFactory.create(filter); filter = removeDuplicatesTokenFilterFactory.create(filter); return new TokenStreamComponents(source, filter); } else { return new TokenStreamComponents(source); } } @Override protected Reader initReader(String fieldName, Reader reader) { if (filters) { return charFilterFactory.create(reader); } else { return reader; } } }; }
From source file:com.bigdata.search.ConfigurableAnalyzerFactory.java
License:Open Source License
@Override public Analyzer getAnalyzer(final String languageCode, boolean filterStopwords) { final Analyzer unlogged = delegate.getAnalyzer(languageCode, filterStopwords); if (log.isDebugEnabled()) { return new Analyzer() { @Override/* ww w . jav a2s.c o m*/ public TokenStream tokenStream(final String fieldName, final Reader reader) { final int id = loggerIdCounter++; final String term = TermCompletionAnalyzer.getStringReaderContents((StringReader) reader); log.debug(id + " " + languageCode + " **" + term + "**"); return new TokenFilter(unlogged.tokenStream(fieldName, reader)) { TermAttribute attr = addAttribute(TermAttribute.class); @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { log.debug(id + " |" + attr.term() + "|"); return true; } return false; } }; } }; } else { return unlogged; } }
From source file:com.github.ippeiukai.externaltoken.lucene.analysis.TestPatternTokenizer.java
License:Apache License
/** blast some random strings through the analyzer */ public void testRandomStrings() throws Exception { Analyzer a = new Analyzer() { @Override/*from ww w. j a va 2 s . c o m*/ public TokenStream tokenStream(String fieldName, Reader reader) { Tokenizer tokenizer = null; try { tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), -1); } catch (IOException e) { throw new RuntimeException(e); } return tokenizer; } }; checkRandomData(random, a, 10000 * RANDOM_MULTIPLIER); Analyzer b = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { Tokenizer tokenizer = null; try { tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), 0); } catch (IOException e) { throw new RuntimeException(e); } return tokenizer; } }; checkRandomData(random, b, 10000 * RANDOM_MULTIPLIER); }
From source file:com.grantingersoll.opengrok.analysis.c.TestCSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override/*from w w w. j a v a2 s. c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new CSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }
From source file:com.grantingersoll.opengrok.analysis.clojure.TestClojureSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override//from w w w . java 2s. c om protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new ClojureSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }
From source file:com.grantingersoll.opengrok.analysis.csharp.TestCSharpSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override/* w w w.j a v a 2s . co m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new CSharpSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }
From source file:com.grantingersoll.opengrok.analysis.cxx.TestCxxSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override// w w w . j ava 2 s .c o m protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new CxxSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }
From source file:com.grantingersoll.opengrok.analysis.erlang.TestErlangSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override/*from ww w . j av a 2 s .c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new ErlangSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }
From source file:com.grantingersoll.opengrok.analysis.fortran.TestFortranSymbolTokenizer.java
License:Open Source License
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override/*from ww w. ja v a2 s.c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new FortranSymbolTokenizer(newAttributeFactory()); return new TokenStreamComponents(tokenizer); } }; }