Example usage for org.apache.lucene.analysis.reverse ReverseStringFilter ReverseStringFilter

List of usage examples for org.apache.lucene.analysis.reverse ReverseStringFilter ReverseStringFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.reverse ReverseStringFilter ReverseStringFilter.

Prototype

public ReverseStringFilter(TokenStream in, char marker) 

Source Link

Document

Create a new ReverseStringFilter that reverses and marks all tokens in the supplied TokenStream .

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.solr.configuration.DefaultAnalyzersConfigurationTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    this.exactPathAnalyzer = new Analyzer() {
        @Override//from w ww. jav  a  2  s. c  o m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
            filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
            filter = new ReverseStringFilter(Version.LUCENE_47, filter);
            return new TokenStreamComponents(source, filter);
        }
    };

    this.directChildrenPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
            filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
            filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
            filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
            filter = new ReverseStringFilter(Version.LUCENE_47, filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.directChildrenPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };

    this.allChildrenPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new PathHierarchyTokenizer(reader);
            TokenStream filter = new PatternCaptureGroupTokenFilter(source, false,
                    Pattern.compile("((\\/).*)"));
            filter = new RemoveDuplicatesTokenFilter(filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.allChildrenPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
}

From source file:org.apache.solr.analysis.ReverseStringFilterFactory.java

License:Apache License

public ReverseStringFilter create(TokenStream in) {
    assureMatchVersion();
    return new ReverseStringFilter(luceneMatchVersion, in);
}

From source file:org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
        /*/*from  www. ja v  a2s.  c o  m*/
         * We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
         * Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
         */
        final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
        TokenStream result = tokenStream;
        // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
        if (side == Side.BACK) {
            result = new ReverseStringFilter(version, result);
        }
        result = new EdgeNGramTokenFilter(version, result, minGram, maxGram);
        if (side == Side.BACK) {
            result = new ReverseStringFilter(version, result);
        }
        return result;
    }
    return new EdgeNGramTokenFilter(version, tokenStream, side, minGram, maxGram);
}

From source file:org.elasticsearch.index.analysis.ReverseTokenFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    return new ReverseStringFilter(version, tokenStream);
}

From source file:org.elasticsearch.search.suggest.phrase.NoisyChannelSpellCheckerTests.java

License:Apache License

@Test
public void testMarvelHerosMultiGenerator() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    mapping.put("body_ngram", new Analyzer() {

        @Override//from  w ww.  j ava 2 s  .c om
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            ShingleFilter tf = new ShingleFilter(t, 2, 3);
            tf.setOutputUnigrams(false);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, tf));
        }

    });

    mapping.put("body", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, t));
        }

    });
    mapping.put("body_reverse", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            return new TokenStreamComponents(t,
                    new ReverseStringFilter(Version.LUCENE_41, new LowerCaseFilter(Version.LUCENE_41, t)));
        }

    });
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_41),
            mapping);

    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_41, wrapper);
    IndexWriter writer = new IndexWriter(dir, conf);
    BufferedReader reader = new BufferedReader(new InputStreamReader(
            NoisyChannelSpellCheckerTests.class.getResourceAsStream("/config/names.txt"), Charsets.UTF_8));
    String line = null;
    while ((line = reader.readLine()) != null) {
        Document doc = new Document();
        doc.add(new Field("body", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_reverse", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
    }

    DirectoryReader ir = DirectoryReader.open(writer, false);
    LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram",
            0.95d, new BytesRef(" "), 0.5f);
    NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
    DirectSpellChecker spellchecker = new DirectSpellChecker();
    spellchecker.setMinQueryLength(1);
    DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body",
            SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
    DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse",
            SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper,
            MultiFields.getTerms(ir, "body_reverse"));
    CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);

    Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1,
            ir, "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
    corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix

    corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body",
            wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir,
            "body", wordScorer, 0, 2).corrections;
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
    assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("gorr the god jewel"));
    assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("varr the god jewel"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir,
            "body", wordScorer, 1.5f, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir,
            "body", wordScorer, 1.5f, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));

}

From source file:org.elasticsearch.test.unit.search.suggest.phrase.NoisyChannelSpellCheckerTests.java

License:Apache License

@Test
public void testMarvelHerosMultiGenerator() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    mapping.put("body_ngram", new Analyzer() {

        @Override/*from w  w  w.jav a  2s  .  co  m*/
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            ShingleFilter tf = new ShingleFilter(t, 2, 3);
            tf.setOutputUnigrams(false);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, tf));
        }

    });

    mapping.put("body", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            return new TokenStreamComponents(t, new LowerCaseFilter(Version.LUCENE_41, t));
        }

    });
    mapping.put("body_reverse", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer t = new StandardTokenizer(Version.LUCENE_41, reader);
            return new TokenStreamComponents(t,
                    new ReverseStringFilter(Version.LUCENE_41, new LowerCaseFilter(Version.LUCENE_41, t)));
        }

    });
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(Version.LUCENE_41),
            mapping);

    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_41, wrapper);
    IndexWriter writer = new IndexWriter(dir, conf);
    BufferedReader reader = new BufferedReader(new InputStreamReader(
            NoisyChannelSpellCheckerTests.class.getResourceAsStream("/config/names.txt")));
    String line = null;
    while ((line = reader.readLine()) != null) {
        Document doc = new Document();
        doc.add(new Field("body", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_reverse", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
    }

    DirectoryReader ir = DirectoryReader.open(writer, false);
    LaplaceScorer wordScorer = new LaplaceScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
    NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
    DirectSpellChecker spellchecker = new DirectSpellChecker();
    spellchecker.setMinQueryLength(1);
    DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body",
            SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
    DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse",
            SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
    CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);

    Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1,
            ir, "body", wordScorer, 1, 2);
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
    corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body",
            wordScorer, 1, 2);
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body",
            wordScorer, 1, 2);
    assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix

    corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body",
            wordScorer, 1, 2);
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir,
            "body", wordScorer, 0, 2);
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
    assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("gorr the god jewel"));
    assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("tarr the god jewel"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir,
            "body", wordScorer, 1.5f, 2);
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));

    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir,
            "body", wordScorer, 1.5f, 2);
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));

}