Example usage for org.apache.lucene.document TextField setTokenStream

List of usage examples for org.apache.lucene.document TextField setTokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.document TextField setTokenStream.

Prototype

public void setTokenStream(TokenStream tokenStream) 

Source Link

Document

Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.

Usage

From source file:org.tallison.lucene.queryparser.spans.TestAdvancedAnalyzers.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {
    lcMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true);

    Map<String, String> attrs = new HashMap<>();
    attrs.put("generateWordParts", "1");
    attrs.put("generateNumberParts", "1");
    attrs.put("catenateWords", "1");
    attrs.put("catenateNumbers", "1");
    attrs.put("catenateAll", "1");
    attrs.put("splitOnCaseChange", "1");
    attrs.put("preserveOriginal", "1");
    complexAnalyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(TestAdvancedAnalyzers.class))
            .withTokenizer("whitespace").addTokenFilter("worddelimiter", attrs).addTokenFilter("kstem")
            .addTokenFilter("removeduplicates").build();

    synAnalyzer = new Analyzer() {
        @Override/*from  w w  w .ja va2  s  .  com*/
        public TokenStreamComponents createComponents(String fieldName) {

            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockNonWhitespaceFilter(tokenizer);

            filter = new MockSynFilter(filter);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockNonWhitespaceFilter(new MockSynFilter(in));
        }

    };

    baseAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockNonWhitespaceFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockNonWhitespaceFilter(new LowerCaseFilter(in));
        }

    };

    ucVowelAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }
    };

    ucVowelMTAnalyzer = new Analyzer() {
        @Override
        public TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }

        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    Analyzer tmpUCVowelAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }
    };
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(baseAnalyzer)
            .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
    String[] docs = new String[] { "abc_def", "lmnop", "abc one", "abc two", "qrs one", "qrs two", "tuv one",
            "tuv two", "qrs tuv", "qrs_tuv" };
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newTextField(FIELD1, docs[i], Field.Store.YES));
        TextField tf = new TextField(FIELD2, docs[i], Field.Store.YES);
        tf.setTokenStream(ucVowelAnalyzer.tokenStream(FIELD2, docs[i]));
        doc.add(tf);
        doc.add(newTextField(FIELD3, docs[i], Field.Store.YES));

        TextField tf4 = new TextField(FIELD4, docs[i], Field.Store.YES);
        tf4.setTokenStream(tmpUCVowelAnalyzer.tokenStream(FIELD4, docs[i]));
        doc.add(tf4);
        writer.addDocument(doc);
    }
    reader = writer.getReader();
    searcher = newSearcher(reader);
    writer.close();
}