List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES
String[] TOKEN_TYPES
To view the source code for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES.
Click Source Link
From source file:com.piza.search.AutoPhraseTokenizer.java
License:Apache License
@Override public final boolean incrementToken() throws IOException { clearAttributes();/* w w w . ja va 2 s .c om*/ skippedPositions = 0; while (true) { int tokenType = scanner.getNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return false; } if (scanner.yylength() <= maxTokenLength) { posIncrAtt.setPositionIncrement(skippedPositions + 1); scanner.getText(termAtt); final int start = scanner.yychar(); offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]); this.phraseProcess.addTerm(termAtt.toString(), offsetAtt.startOffset(), offsetAtt.endOffset()); return true; } else // When we skip a too-long term, we still increment the // position increment skippedPositions++; } }
From source file:org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser.java
License:Apache License
/** * {@inheritDoc}/*from www . j a va 2s.c o m*/ */ protected Query getPrefixQuery(String field, String termStr) throws ParseException { // only create a prefix query when the term is a single word / token Analyzer a = getAnalyzer(); TokenStream ts = a.tokenStream(field, new StringReader(termStr)); int count = 0; boolean isCJ = false; try { TypeAttribute t = ts.addAttribute(TypeAttribute.class); ts.reset(); while (ts.incrementToken()) { count++; isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type()); } ts.end(); } catch (IOException e) { throw new ParseException(e.getMessage()); } finally { try { ts.close(); } catch (IOException e) { // ignore } } if (count > 1 && isCJ) { return getFieldQuery(field, termStr); } else { return getWildcardQuery(field, termStr + "*"); } }
From source file:org.sindice.siren.analysis.TestTupleTokenizer.java
License:Apache License
@Test public void testLanguage() throws Exception { this.assertTokenizesTo(_t, "\"test\"@en", new String[] { "test", "en" }, new String[] { "<ALPHANUM>", "<LANGUAGE>" }); this.assertTokenizesTo(_t, "\"toto@titi.fr \"@fr", new String[] { "toto", "titi.fr", "fr" }, new String[] { StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], TupleTokenizer.TOKEN_TYPES[TupleTokenizer.LANGUAGE] }); }