Example usage for org.apache.lucene.analysis MockTokenizer SIMPLE

List of usage examples for org.apache.lucene.analysis MockTokenizer SIMPLE

Introduction

In this page you can find the example usage for org.apache.lucene.analysis MockTokenizer SIMPLE.

Prototype

CharacterRunAutomaton SIMPLE

To view the source code for org.apache.lucene.analysis MockTokenizer SIMPLE.

Click Source Link

Document

Acts like LetterTokenizer.

Usage

From source file:org.apache.solr.analysis.MockTokenizerFactory.java

License:Apache License

/** Creates a new MockTokenizerFactory */
public MockTokenizerFactory(Map<String, String> args) {
    super(args);//from  w  w w . j a v  a2  s  . co  m
    String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace"));
    if ("keyword".equalsIgnoreCase(patternArg)) {
        pattern = MockTokenizer.KEYWORD;
    } else if ("simple".equalsIgnoreCase(patternArg)) {
        pattern = MockTokenizer.SIMPLE;
    } else {
        pattern = MockTokenizer.WHITESPACE;
    }

    enableChecks = getBoolean(args, "enableChecks", true);
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public Query getQueryDOA(String query, Analyzer a) throws Exception {
    if (a == null)
        a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    QueryParser qp = getParserConfig(a);
    setDefaultOperatorAND(qp);/*from  w w  w  .j  ava 2 s.c o  m*/
    return getQuery(query, qp);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testRange() throws Exception {
    assertQueryEquals("[ a TO z]", null, "[a TO z]");
    assertQueryEquals("[ a TO z}", null, "[a TO z}");
    assertQueryEquals("{ a TO z]", null, "{a TO z]");

    assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT,
            ((TermRangeQuery) getQuery("[ a TO z]")).getRewriteMethod());

    QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));

    qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,
            ((TermRangeQuery) getQuery("[ a TO z]", qp)).getRewriteMethod());

    // test open ranges
    assertQueryEquals("[ a TO * ]", null, "[a TO *]");
    assertQueryEquals("[ * TO z ]", null, "[* TO z]");
    assertQueryEquals("[ * TO * ]", null, "[* TO *]");

    // mixing exclude and include bounds
    assertQueryEquals("{ a TO z ]", null, "{a TO z]");
    assertQueryEquals("[ a TO z }", null, "[a TO z}");
    assertQueryEquals("{ a TO * ]", null, "{a TO *]");
    assertQueryEquals("[ * TO z }", null, "[* TO z}");

    assertQueryEquals("[ a TO z ]", null, "[a TO z]");
    assertQueryEquals("{ a TO z}", null, "{a TO z}");
    assertQueryEquals("{ a TO z }", null, "{a TO z}");
    assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
    assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
    assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
    assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
    assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");

    assertQueryEquals("[* TO Z]", null, "[* TO z]");
    assertQueryEquals("[A TO *]", null, "[a TO *]");
    assertQueryEquals("[* TO *]", null, "[* TO *]");
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testDateRange() throws Exception {
    String startDate = getLocalizedDate(2002, 1, 1);
    String endDate = getLocalizedDate(2002, 1, 4);
    // we use the default Locale/TZ since LuceneTestCase randomizes it
    Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
    endDateExpected.clear();//from   www .  jav a2s  .co  m
    endDateExpected.set(2002, 1, 4, 23, 59, 59);
    endDateExpected.set(Calendar.MILLISECOND, 999);
    final String defaultField = "default";
    final String monthField = "month";
    final String hourField = "hour";
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    QueryParser qp = getParserConfig(a);

    // set a field specific date resolution
    setDateResolution(qp, monthField, DateTools.Resolution.MONTH);

    // set default date resolution to MILLISECOND
    qp.setDateResolution(DateTools.Resolution.MILLISECOND);

    // set second field specific date resolution
    setDateResolution(qp, hourField, DateTools.Resolution.HOUR);

    // for this field no field specific date resolution has been set,
    // so verify if the default resolution is used
    assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, endDateExpected.getTime(),
            DateTools.Resolution.MILLISECOND);

    // verify if field specific date resolutions are used for these two
    // fields
    assertDateRangeQueryEquals(qp, monthField, startDate, endDate, endDateExpected.getTime(),
            DateTools.Resolution.MONTH);

    assertDateRangeQueryEquals(qp, hourField, startDate, endDate, endDateExpected.getTime(),
            DateTools.Resolution.HOUR);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testBoost() throws Exception {
    CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
    Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
    QueryParser qp = getParserConfig(oneStopAnalyzer);
    Query q = getQuery("on^1.0", qp);
    assertNotNull(q);//from  w  w w.  ja v  a  2 s . c om
    q = getQuery("\"hello\"^2.0", qp);
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("hello^2.0", qp);
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("\"on\"^1.0", qp);
    assertNotNull(q);

    Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    QueryParser qp2 = getParserConfig(a2);
    q = getQuery("the^3", qp2);
    // "the" is a stop word so the result is an empty query:
    assertNotNull(q);
    assertEquals("", q.toString());
    assertEquals(1.0f, q.getBoost(), 0.01f);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testStopwords() throws Exception {
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
    QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
    Query result = getQuery("field:the OR field:foo", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0,
            ((BooleanQuery) result).clauses().size() == 0);
    result = getQuery("field:woo OR field:the", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a TermQuery", result instanceof TermQuery);
    result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    if (VERBOSE)/*from w  w w  .jav  a  2s. c  o  m*/
        System.out.println("Result: " + result);
    assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2,
            ((BooleanQuery) result).clauses().size() == 2);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testPositionIncrement() throws Exception {
    QueryParser qp = getParserConfig(/*from  ww w  .  j a v  a 2 s  . co  m*/
            new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
    qp.setEnablePositionIncrements(true);
    String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
    // 0 2 5 7 8
    int expectedPositions[] = { 1, 3, 4, 6, 9 };
    PhraseQuery pq = (PhraseQuery) getQuery(qtxt, qp);
    // System.out.println("Query text: "+qtxt);
    // System.out.println("Result: "+pq);
    Term t[] = pq.getTerms();
    int pos[] = pq.getPositions();
    for (int i = 0; i < t.length; i++) {
        // System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
        assertEquals("term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]);
    }
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testPhraseQueryToString() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    QueryParser qp = getParserConfig(analyzer);
    qp.setEnablePositionIncrements(true);
    PhraseQuery q = (PhraseQuery) getQuery("\"this hi this is a test is\"", qp);
    assertEquals("field:\"? hi ? ? ? test\"", q.toString());
}

From source file:org.tallison.lucene.queryparser.spans.TestAdvancedAnalyzers.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {
    lcMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true);

    Map<String, String> attrs = new HashMap<>();
    attrs.put("generateWordParts", "1");
    attrs.put("generateNumberParts", "1");
    attrs.put("catenateWords", "1");
    attrs.put("catenateNumbers", "1");
    attrs.put("catenateAll", "1");
    attrs.put("splitOnCaseChange", "1");
    attrs.put("preserveOriginal", "1");
    complexAnalyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(TestAdvancedAnalyzers.class))
            .withTokenizer("whitespace").addTokenFilter("worddelimiter", attrs).addTokenFilter("kstem")
            .addTokenFilter("removeduplicates").build();

    synAnalyzer = new Analyzer() {
        @Override/*from w w w.j ava2s.co m*/
        public TokenStreamComponents createComponents(String fieldName) {

            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockNonWhitespaceFilter(tokenizer);

            filter = new MockSynFilter(filter);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockNonWhitespaceFilter(new MockSynFilter(in));
        }

    };

    baseAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockNonWhitespaceFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockNonWhitespaceFilter(new LowerCaseFilter(in));
        }

    };

    ucVowelAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }
    };

    ucVowelMTAnalyzer = new Analyzer() {
        @Override
        public TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }

        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    Analyzer tmpUCVowelAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenFilter filter = new MockUCVowelFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new MockUCVowelFilter(new LowerCaseFilter(in));
        }
    };
    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(baseAnalyzer)
            .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
    String[] docs = new String[] { "abc_def", "lmnop", "abc one", "abc two", "qrs one", "qrs two", "tuv one",
            "tuv two", "qrs tuv", "qrs_tuv" };
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newTextField(FIELD1, docs[i], Field.Store.YES));
        TextField tf = new TextField(FIELD2, docs[i], Field.Store.YES);
        tf.setTokenStream(ucVowelAnalyzer.tokenStream(FIELD2, docs[i]));
        doc.add(tf);
        doc.add(newTextField(FIELD3, docs[i], Field.Store.YES));

        TextField tf4 = new TextField(FIELD4, docs[i], Field.Store.YES);
        tf4.setTokenStream(tmpUCVowelAnalyzer.tokenStream(FIELD4, docs[i]));
        doc.add(tf4);
        writer.addDocument(doc);
    }
    reader = writer.getReader();
    searcher = newSearcher(reader);
    writer.close();
}

From source file:org.tallison.lucene.queryparser.spans.TestOverallSpanQueryParser.java

License:Apache License

public void testSingleAnalyzerMultitermNorm() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
    Directory dir = newDirectory();//w w  w .j  a v a  2s. co  m
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer)
            .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
    String[] docs = new String[] { "foobarbaz", };
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newTextField(FIELD1, docs[i], Field.Store.YES));
        w.addDocument(doc);
    }
    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r);
    w.close();

    compareHits(PARSER, "foo*baz", s, 0);
    SpanQueryParser parser = new SpanQueryParser(FIELD1, analyzer, analyzer);
    compareHits(parser, "foo*baz", s, 0);
    r.close();
    dir.close();
}