Example usage for org.apache.lucene.analysis Analyzer Analyzer

List of usage examples for org.apache.lucene.analysis Analyzer Analyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Analyzer Analyzer.

Prototype

public Analyzer() 

Source Link

Document

Create a new Analyzer, reusing the same set of components per-thread across calls to #tokenStream(String,Reader) .

Usage

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();

    String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres";

    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
    parser.parse(new StringReader(testFile));

    final SynonymMap map = parser.build();
    Analyzer analyzer = new Analyzer() {
        @Override//from  www .  j av  a 2  s  .  c  o m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
            return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
        }
    };

    directory = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("two three one");
    iw.addDocument(doc);
    field.setStringValue("three one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:com.basistech.IndexFiles.java

License:Open Source License

static Analyzer setupAnalyzer(final boolean filters) {
    Map<String, String> tokenizerFactoryArgs = Maps.newHashMap();
    tokenizerFactoryArgs.put("rlpContext", "rlp-context.xml");
    tokenizerFactoryArgs.put("lang", "kor");
    tokenizerFactoryArgs.put("postLemma", "true");
    tokenizerFactoryArgs.put("postCompoundComponents", "true");
    tokenizerFactoryArgs.put("postPartOfSpeech", "true");

    final RLPTokenizerFactory tokenizerFactory = new RLPTokenizerFactory(tokenizerFactoryArgs);
    Map<String, String> emptyOptions = Maps.newHashMap();
    final HTMLStripCharFilterFactory charFilterFactory = new HTMLStripCharFilterFactory(emptyOptions);
    final ICUFoldingFilterFactory foldingFilterFactory = new ICUFoldingFilterFactory(emptyOptions);
    final ReversedWildcardFilterFactory reversedWildcardFilterFactory = new ReversedWildcardFilterFactory(
            emptyOptions);//from  w  w  w .j a v a 2  s  .co m
    final RemoveDuplicatesTokenFilterFactory removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(
            emptyOptions);

    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final Tokenizer source = tokenizerFactory.create(reader);
            if (filters) {
                TokenStream filter = foldingFilterFactory.create(source);
                filter = reversedWildcardFilterFactory.create(filter);
                filter = removeDuplicatesTokenFilterFactory.create(filter);
                return new TokenStreamComponents(source, filter);
            } else {
                return new TokenStreamComponents(source);
            }
        }

        @Override
        protected Reader initReader(String fieldName, Reader reader) {
            if (filters) {
                return charFilterFactory.create(reader);
            } else {
                return reader;
            }
        }
    };
}

From source file:com.bigdata.search.ConfigurableAnalyzerFactory.java

License:Open Source License

@Override
public Analyzer getAnalyzer(final String languageCode, boolean filterStopwords) {

    final Analyzer unlogged = delegate.getAnalyzer(languageCode, filterStopwords);
    if (log.isDebugEnabled()) {
        return new Analyzer() {
            @Override/* ww  w  .  jav  a2s.c o  m*/
            public TokenStream tokenStream(final String fieldName, final Reader reader) {
                final int id = loggerIdCounter++;
                final String term = TermCompletionAnalyzer.getStringReaderContents((StringReader) reader);
                log.debug(id + " " + languageCode + " **" + term + "**");
                return new TokenFilter(unlogged.tokenStream(fieldName, reader)) {

                    TermAttribute attr = addAttribute(TermAttribute.class);

                    @Override
                    public boolean incrementToken() throws IOException {
                        if (input.incrementToken()) {
                            log.debug(id + " |" + attr.term() + "|");
                            return true;
                        }
                        return false;
                    }
                };
            }
        };
    } else {
        return unlogged;
    }

}

From source file:com.github.ippeiukai.externaltoken.lucene.analysis.TestPatternTokenizer.java

License:Apache License

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
    Analyzer a = new Analyzer() {
        @Override/*from   ww w.  j  a  va 2 s .  c  o m*/
        public TokenStream tokenStream(String fieldName, Reader reader) {
            Tokenizer tokenizer = null;
            try {
                tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), -1);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            return tokenizer;
        }
    };
    checkRandomData(random, a, 10000 * RANDOM_MULTIPLIER);

    Analyzer b = new Analyzer() {
        @Override
        public TokenStream tokenStream(String fieldName, Reader reader) {
            Tokenizer tokenizer = null;
            try {
                tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), 0);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            return tokenizer;
        }
    };
    checkRandomData(random, b, 10000 * RANDOM_MULTIPLIER);
}

From source file:com.grantingersoll.opengrok.analysis.c.TestCSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override/*from   w w  w. j  a v  a2 s. c  o  m*/
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new CSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}

From source file:com.grantingersoll.opengrok.analysis.clojure.TestClojureSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override//from  w w w .  java  2s. c  om
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new ClojureSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}

From source file:com.grantingersoll.opengrok.analysis.csharp.TestCSharpSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override/* w  w  w.j a v  a  2s  .  co  m*/
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new CSharpSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}

From source file:com.grantingersoll.opengrok.analysis.cxx.TestCxxSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override//  w  w w .  j  ava  2  s  .c o  m
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new CxxSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}

From source file:com.grantingersoll.opengrok.analysis.erlang.TestErlangSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override/*from  ww  w . j av a  2 s .c o m*/
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new ErlangSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}

From source file:com.grantingersoll.opengrok.analysis.fortran.TestFortranSymbolTokenizer.java

License:Open Source License

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override/*from  ww  w. ja  v a2  s.c o  m*/
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new FortranSymbolTokenizer(newAttributeFactory());
            return new TokenStreamComponents(tokenizer);
        }
    };
}