List of usage examples for org.apache.lucene.analysis TokenStream reset
public void reset() throws IOException
From source file:org.elasticsearch.index.analysis.PatternTokenizerTests.java
License:Apache License
/** * TODO: rewrite tests not to use string comparison. *//*from ww w. jav a2 s .c o m*/ private static String tsToString(TokenStream in) throws IOException { StringBuilder out = new StringBuilder(); CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class); // extra safety to enforce, that the state is not preserved and also // assign bogus values in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); in.reset(); while (in.incrementToken()) { if (out.length() > 0) out.append(' '); out.append(termAtt.toString()); in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); } in.close(); return out.toString(); }
From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java
License:Apache License
@Test public void testThatDefaultAndStandardAnalyzerChangedIn10Beta1() throws IOException { Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_1_0_0_Beta1); Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1); // special case, these two are the same instance assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer)); PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1); final int n = atLeast(10); Version version = Version.CURRENT;/* w ww. ja va2 s. c o m*/ for (int i = 0; i < n; i++) { if (version.equals(Version.V_1_0_0_Beta1)) { assertThat(currentDefaultAnalyzer, is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))); } else { assertThat(currentDefaultAnalyzer, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); } Analyzer analyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(version); TokenStream ts = analyzer.tokenStream("foo", "This is it Dude"); ts.reset(); CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); List<String> list = new ArrayList<String>(); while (ts.incrementToken()) { list.add(charTermAttribute.toString()); } if (version.onOrAfter(Version.V_1_0_0_Beta1)) { assertThat(list.size(), is(4)); assertThat(list, contains("this", "is", "it", "dude")); } else { assertThat(list.size(), is(1)); assertThat(list, contains("dude")); } ts.close(); version = randomVersion(); } }
From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java
License:Apache License
@Test public void testAnalyzerChangedIn10RC1() throws IOException { Analyzer pattern = PreBuiltAnalyzers.PATTERN.getAnalyzer(Version.V_1_0_0_RC1); Analyzer standardHtml = PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(Version.V_1_0_0_RC1); final int n = atLeast(10); Version version = Version.CURRENT;/*from w w w . ja va2 s.c o m*/ for (int i = 0; i < n; i++) { if (version.equals(Version.V_1_0_0_RC1)) { assertThat(pattern, is(PreBuiltAnalyzers.PATTERN.getAnalyzer(version))); assertThat(standardHtml, is(PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version))); } else { assertThat(pattern, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); assertThat(standardHtml, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); } Analyzer analyzer = randomBoolean() ? PreBuiltAnalyzers.PATTERN.getAnalyzer(version) : PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version); TokenStream ts = analyzer.tokenStream("foo", "This is it Dude"); ts.reset(); CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); List<String> list = new ArrayList<String>(); while (ts.incrementToken()) { list.add(charTermAttribute.toString()); } if (version.onOrAfter(Version.V_1_0_0_RC1)) { assertThat(list.toString(), list.size(), is(4)); assertThat(list, contains("this", "is", "it", "dude")); } else { assertThat(list.size(), is(1)); assertThat(list, contains("dude")); } ts.close(); version = randomVersion(); } }
From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java
License:Apache License
@Test public void testRSLPRules() throws Exception { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).put("index.analysis.filter.myStemmer.type", "br_rslp").build(); AnalysisService analysisService = createAnalysisService(index, settings); TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer"); Tokenizer tokenizer = new KeywordTokenizer(); Map<String, String> words = buildWordList(); Set<String> inputWords = words.keySet(); for (String word : inputWords) { tokenizer.setReader(new StringReader(word)); TokenStream ts = filterFactory.create(tokenizer); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(words.get(word))); ts.close();//from w ww .j ava2 s. c o m } }
From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java
License:Apache License
@Test public void testRSLPPhrases() throws Exception { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).put("index.analysis.analyzer.myAnalyzer.type", "custom") .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myAnalyzer.filter", "br_rslp").build(); AnalysisService analysisService = createAnalysisService(index, settings); Analyzer analyzer = analysisService.analyzer("myAnalyzer"); Map<String, List<String>> phrases = buildPhraseList(); for (String phrase : phrases.keySet()) { List<String> outputWords = phrases.get(phrase); TokenStream ts = analyzer.tokenStream("test", phrase); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); for (String expected : outputWords) { assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); }//from www . j a va 2 s. com ts.close(); } }
From source file:org.elasticsearch.index.analysis.SimpleIcuCollationTokenFilterTests.java
License:Apache License
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException { CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class); CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class); stream1.reset(); stream2.reset();//from w ww .ja v a 2 s . com assertThat(stream1.incrementToken(), equalTo(true)); assertThat(stream2.incrementToken(), equalTo(true)); assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison))); assertThat(stream1.incrementToken(), equalTo(false)); assertThat(stream2.incrementToken(), equalTo(false)); stream1.end(); stream2.end(); stream1.close(); stream2.close(); }
From source file:org.elasticsearch.index.analysis.SimplePolishTokenFilterTests.java
License:Apache License
private void testToken(String source, String expected) throws IOException { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).put("index.analysis.filter.myStemmer.type", "polish_stem") .build();/*from w w w. ja v a2 s. c om*/ AnalysisService analysisService = createAnalysisService(index, settings); TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer"); Tokenizer tokenizer = new KeywordTokenizer(); tokenizer.setReader(new StringReader(source)); TokenStream ts = filterFactory.create(tokenizer); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); }
From source file:org.elasticsearch.index.analysis.SimplePolishTokenFilterTests.java
License:Apache License
private void testAnalyzer(String source, String... expected_terms) throws IOException { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).build(); AnalysisService analysisService = createAnalysisService(index, settings); Analyzer analyzer = analysisService.analyzer("polish").analyzer(); TokenStream ts = analyzer.tokenStream("test", source); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); for (String expected : expected_terms) { assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); }/*w w w . ja va2s . co m*/ }
From source file:org.elasticsearch.index.analysis.SimpleUkrainianAnalyzerTests.java
License:Apache License
private static void testAnalyzer(String source, String... expected_terms) throws IOException { TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisUkrainianPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("ukrainian").analyzer(); TokenStream ts = analyzer.tokenStream("test", source); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); for (String expected : expected_terms) { assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); }/*from w w w. jav a 2s . com*/ assertThat(ts.incrementToken(), equalTo(false)); }
From source file:org.elasticsearch.index.analysis.SimpleVietnameseAnalyzerTests.java
License:Apache License
private static void testAnalyzer(String source, String... expected_terms) throws IOException { TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisVietnamesePlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("vietnamese").analyzer(); TokenStream ts = analyzer.tokenStream("test", source); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset(); for (String expected : expected_terms) { assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); }//from w w w . j a v a 2s . co m assertThat(ts.incrementToken(), equalTo(false)); }