List of usage examples for org.apache.lucene.analysis CharFilter correctOffset
public final int correctOffset(int currentOff)
From source file:org.elasticsearch.index.analysis.SimpleIcuNormalizerCharFilterTests.java
License:Apache License
@Test public void testDefaultSetting() throws Exception { Settings settings = Settings.settingsBuilder().put("path.home", createTempDir()) .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer").build(); AnalysisService analysisService = createAnalysisService(settings); CharFilterFactory charFilterFactory = analysisService.charFilter("myNormalizerChar"); String input = "5?????g?/??chk?"; Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE); String expectedOutput = normalizer.normalize(input); CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input)); char[] tempBuff = new char[10]; StringBuilder output = new StringBuilder(); while (true) { int length = inputReader.read(tempBuff); if (length == -1) break; output.append(tempBuff, 0, length); assertEquals(output.toString(),//from w w w . j ava 2s.c om normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length())))); } assertEquals(expectedOutput, output.toString()); }
From source file:org.elasticsearch.index.analysis.SimpleIcuNormalizerCharFilterTests.java
License:Apache License
@Test public void testNameAndModeSetting() throws Exception { Settings settings = Settings.settingsBuilder().put("path.home", createTempDir()) .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer") .put("index.analysis.char_filter.myNormalizerChar.name", "nfkc") .put("index.analysis.char_filter.myNormalizerChar.mode", "decompose").build(); AnalysisService analysisService = createAnalysisService(settings); CharFilterFactory charFilterFactory = analysisService.charFilter("myNormalizerChar"); String input = "5?????g?/??chk?"; Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.DECOMPOSE); String expectedOutput = normalizer.normalize(input); CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input)); char[] tempBuff = new char[10]; StringBuilder output = new StringBuilder(); while (true) { int length = inputReader.read(tempBuff); if (length == -1) break; output.append(tempBuff, 0, length); assertEquals(output.toString(),//from ww w . j ava 2 s . c om normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length())))); } assertEquals(expectedOutput, output.toString()); }