List of usage examples for org.apache.lucene.analysis TokenStream reset
public void reset() throws IOException
From source file:com.sindicetech.siren.analysis.TestConciseJsonAnalyzer.java
License:Open Source License
@Test public void testNumeric() throws Exception { _a.registerDatatype(XSDDatatype.XSD_LONG.toCharArray(), new LongNumericAnalyzer(64)); final TokenStream t = _a.tokenStream("", new StringReader("{ \"a\" : 12 }")); final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); t.reset(); assertTrue(t.incrementToken());/* w w w .ja va 2 s . c o m*/ assertTrue(termAtt.toString().startsWith("a:")); t.end(); t.close(); }
From source file:com.sindicetech.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java
License:Open Source License
private CachingTokenFilter getBuffer(Analyzer analyzer, FieldQueryNode fieldNode) { final TokenStream source; final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); try {//from w w w . j a va 2 s . co m source = analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } return new CachingTokenFilter(source); }
From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Open Source License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {//w ww . j av a2 s .c o m source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } // phrase query else if (numTokens != 1) { String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node); final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); // assign datatype pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); // set position increment newFieldNode.setPositionIncrement(i); // assign datatype newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); pq.add(newFieldNode); } return pq; } } return node; }
From source file:com.sindicetech.siren.solr.analysis.BaseSirenStreamTestCase.java
License:Open Source License
public void assertTokenStreamContents(final TokenStream stream, final String[] expectedImages) throws Exception { assertTrue("has TermAttribute", stream.hasAttribute(CharTermAttribute.class)); final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); stream.reset(); for (int i = 0; i < expectedImages.length; i++) { stream.clearAttributes();/* w w w .j av a 2 s. com*/ assertTrue("token " + i + " does not exists", stream.incrementToken()); assertEquals(expectedImages[i], termAtt.toString()); } assertFalse("end of stream", stream.incrementToken()); stream.end(); stream.close(); }
From source file:com.stratio.cassandra.index.query.Condition.java
License:Apache License
protected String analyze(String field, String value, ColumnMapper<?> columnMapper) { TokenStream source = null; try {//from w ww. jav a2 s . co m Analyzer analyzer = columnMapper.analyzer(); source = analyzer.tokenStream(field, value); source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) { return null; } termAtt.fillBytesRef(); if (source.incrementToken()) { throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value); } source.end(); return BytesRef.deepCopyOf(bytes).utf8ToString(); } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + value, e); } finally { IOUtils.closeWhileHandlingException(source); } }
From source file:com.stratio.cassandra.lucene.schema.analysis.SnowballAnalyzerBuilderTest.java
License:Apache License
private List<String> analyze(String value, Analyzer analyzer) { List<String> result = new ArrayList<>(); TokenStream stream = null; try {//w w w . ja v a2 s. co m stream = analyzer.tokenStream(null, value); stream.reset(); while (stream.incrementToken()) { String analyzedValue = stream.getAttribute(CharTermAttribute.class).toString(); result.add(analyzedValue); } } catch (Exception e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(stream); } return result; }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term + "] "); // B }//from w ww . j a v a 2s .co m stream.close(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); }//from w w w .j a va 2s . co m System.out.print("[" + term + "] "); } stream.close(); System.out.println(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D }/* ww w . j a v a 2s. co m*/ System.out.print("[" + // #E term + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } stream.close(); System.out.println(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); stream.reset(); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.toString()); }/*from ww w.ja v a2s .c om*/ Assert.assertFalse(stream.incrementToken()); stream.close(); }