List of usage examples for org.apache.lucene.analysis MockTokenizer SIMPLE
CharacterRunAutomaton SIMPLE
To view the source code for org.apache.lucene.analysis MockTokenizer SIMPLE.
Click Source Link
From source file:org.apache.solr.analysis.MockTokenizerFactory.java
License:Apache License
/** Creates a new MockTokenizerFactory */ public MockTokenizerFactory(Map<String, String> args) { super(args);//from w w w . j a v a2 s . co m String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace")); if ("keyword".equalsIgnoreCase(patternArg)) { pattern = MockTokenizer.KEYWORD; } else if ("simple".equalsIgnoreCase(patternArg)) { pattern = MockTokenizer.SIMPLE; } else { pattern = MockTokenizer.WHITESPACE; } enableChecks = getBoolean(args, "enableChecks", true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); QueryParser qp = getParserConfig(a); setDefaultOperatorAND(qp);/*from w w w .j ava 2 s.c o m*/ return getQuery(query, qp); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testRange() throws Exception { assertQueryEquals("[ a TO z]", null, "[a TO z]"); assertQueryEquals("[ a TO z}", null, "[a TO z}"); assertQueryEquals("{ a TO z]", null, "{a TO z]"); assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery) getQuery("[ a TO z]")).getRewriteMethod()); QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((TermRangeQuery) getQuery("[ a TO z]", qp)).getRewriteMethod()); // test open ranges assertQueryEquals("[ a TO * ]", null, "[a TO *]"); assertQueryEquals("[ * TO z ]", null, "[* TO z]"); assertQueryEquals("[ * TO * ]", null, "[* TO *]"); // mixing exclude and include bounds assertQueryEquals("{ a TO z ]", null, "{a TO z]"); assertQueryEquals("[ a TO z }", null, "[a TO z}"); assertQueryEquals("{ a TO * ]", null, "{a TO *]"); assertQueryEquals("[ * TO z }", null, "[* TO z}"); assertQueryEquals("[ a TO z ]", null, "[a TO z]"); assertQueryEquals("{ a TO z}", null, "{a TO z}"); assertQueryEquals("{ a TO z }", null, "{a TO z}"); assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0"); assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); assertQueryEquals("[* TO Z]", null, "[* TO z]"); assertQueryEquals("[A TO *]", null, "[a TO *]"); assertQueryEquals("[* TO *]", null, "[* TO *]"); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testDateRange() throws Exception { String startDate = getLocalizedDate(2002, 1, 1); String endDate = getLocalizedDate(2002, 1, 4); // we use the default Locale/TZ since LuceneTestCase randomizes it Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); endDateExpected.clear();//from www . jav a2s .co m endDateExpected.set(2002, 1, 4, 23, 59, 59); endDateExpected.set(Calendar.MILLISECOND, 999); final String defaultField = "default"; final String monthField = "month"; final String hourField = "hour"; Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); QueryParser qp = getParserConfig(a); // set a field specific date resolution setDateResolution(qp, monthField, DateTools.Resolution.MONTH); // set default date resolution to MILLISECOND qp.setDateResolution(DateTools.Resolution.MILLISECOND); // set second field specific date resolution setDateResolution(qp, hourField, DateTools.Resolution.HOUR); // for this field no field specific date resolution has been set, // so verify if the default resolution is used assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, endDateExpected.getTime(), DateTools.Resolution.MILLISECOND); // verify if field specific date resolutions are used for these two // fields assertDateRangeQueryEquals(qp, monthField, startDate, endDate, endDateExpected.getTime(), DateTools.Resolution.MONTH); assertDateRangeQueryEquals(qp, hourField, startDate, endDate, endDateExpected.getTime(), DateTools.Resolution.HOUR); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); QueryParser qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0", qp); assertNotNull(q);//from w w w. ja v a 2 s . c om q = getQuery("\"hello\"^2.0", qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0", qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0", qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); QueryParser qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = getQuery("field:woo OR field:the", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE)/*from w w w .jav a 2s. c o m*/ System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testPositionIncrement() throws Exception { QueryParser qp = getParserConfig(/*from ww w . j a v a 2 s . co m*/ new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int expectedPositions[] = { 1, 3, 4, 6, 9 }; PhraseQuery pq = (PhraseQuery) getQuery(qtxt, qp); // System.out.println("Query text: "+qtxt); // System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { // System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals("term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]); } }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testPhraseQueryToString() throws Exception { Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); QueryParser qp = getParserConfig(analyzer); qp.setEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery) getQuery("\"this hi this is a test is\"", qp); assertEquals("field:\"? hi ? ? ? test\"", q.toString()); }
From source file:org.tallison.lucene.queryparser.spans.TestAdvancedAnalyzers.java
License:Apache License
@BeforeClass public static void beforeClass() throws Exception { lcMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true); Map<String, String> attrs = new HashMap<>(); attrs.put("generateWordParts", "1"); attrs.put("generateNumberParts", "1"); attrs.put("catenateWords", "1"); attrs.put("catenateNumbers", "1"); attrs.put("catenateAll", "1"); attrs.put("splitOnCaseChange", "1"); attrs.put("preserveOriginal", "1"); complexAnalyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(TestAdvancedAnalyzers.class)) .withTokenizer("whitespace").addTokenFilter("worddelimiter", attrs).addTokenFilter("kstem") .addTokenFilter("removeduplicates").build(); synAnalyzer = new Analyzer() { @Override/*from w w w.j ava2s.co m*/ public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); TokenFilter filter = new MockNonWhitespaceFilter(tokenizer); filter = new MockSynFilter(filter); return new TokenStreamComponents(tokenizer, filter); } @Override protected TokenStream normalize(String fieldName, TokenStream in) { return new MockNonWhitespaceFilter(new MockSynFilter(in)); } }; baseAnalyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); TokenFilter filter = new MockNonWhitespaceFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } @Override protected TokenStream normalize(String fieldName, TokenStream in) { return new MockNonWhitespaceFilter(new LowerCaseFilter(in)); } }; ucVowelAnalyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); TokenFilter filter = new MockUCVowelFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } @Override protected TokenStream normalize(String fieldName, TokenStream in) { return new MockUCVowelFilter(new LowerCaseFilter(in)); } }; ucVowelMTAnalyzer = new Analyzer() { @Override public TokenStream normalize(String fieldName, TokenStream in) { return new MockUCVowelFilter(new LowerCaseFilter(in)); } @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, true); TokenFilter filter = new MockUCVowelFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } }; Analyzer tmpUCVowelAnalyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); TokenFilter filter = new MockUCVowelFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } @Override protected TokenStream normalize(String fieldName, TokenStream in) { return new MockUCVowelFilter(new LowerCaseFilter(in)); } }; directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(baseAnalyzer) .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy())); String[] docs = new String[] { "abc_def", "lmnop", "abc one", "abc two", "qrs one", "qrs two", "tuv one", "tuv two", "qrs tuv", "qrs_tuv" }; for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newTextField(FIELD1, docs[i], Field.Store.YES)); TextField tf = new TextField(FIELD2, docs[i], Field.Store.YES); tf.setTokenStream(ucVowelAnalyzer.tokenStream(FIELD2, docs[i])); doc.add(tf); doc.add(newTextField(FIELD3, docs[i], Field.Store.YES)); TextField tf4 = new TextField(FIELD4, docs[i], Field.Store.YES); tf4.setTokenStream(tmpUCVowelAnalyzer.tokenStream(FIELD4, docs[i])); doc.add(tf4); writer.addDocument(doc); } reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
From source file:org.tallison.lucene.queryparser.spans.TestOverallSpanQueryParser.java
License:Apache License
public void testSingleAnalyzerMultitermNorm() throws Exception { Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); Directory dir = newDirectory();//w w w .j a v a 2s. co m RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer) .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy())); String[] docs = new String[] { "foobarbaz", }; for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newTextField(FIELD1, docs[i], Field.Store.YES)); w.addDocument(doc); } IndexReader r = w.getReader(); IndexSearcher s = newSearcher(r); w.close(); compareHits(PARSER, "foo*baz", s, 0); SpanQueryParser parser = new SpanQueryParser(FIELD1, analyzer, analyzer); compareHits(parser, "foo*baz", s, 0); r.close(); dir.close(); }