List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:com.sindicetech.siren.analysis.TestConciseJsonAnalyzer.java
License:Open Source License
@Test public void testNumeric() throws Exception { _a.registerDatatype(XSDDatatype.XSD_LONG.toCharArray(), new LongNumericAnalyzer(64)); final TokenStream t = _a.tokenStream("", new StringReader("{ \"a\" : 12 }")); final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); t.reset();/* ww w .j av a 2s . com*/ assertTrue(t.incrementToken()); assertTrue(termAtt.toString().startsWith("a:")); t.end(); t.close(); }
From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Open Source License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {// w w w. j a va 2s .co m source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } // phrase query else if (numTokens != 1) { String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node); final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); // assign datatype pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); // set position increment newFieldNode.setPositionIncrement(i); // assign datatype newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); pq.add(newFieldNode); } return pq; } } return node; }
From source file:com.sindicetech.siren.solr.analysis.BaseSirenStreamTestCase.java
License:Open Source License
public void assertTokenStreamContents(final TokenStream stream, final String[] expectedImages) throws Exception { assertTrue("has TermAttribute", stream.hasAttribute(CharTermAttribute.class)); final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); stream.reset();/*from ww w . j ava2s . com*/ for (int i = 0; i < expectedImages.length; i++) { stream.clearAttributes(); assertTrue("token " + i + " does not exists", stream.incrementToken()); assertEquals(expectedImages[i], termAtt.toString()); } assertFalse("end of stream", stream.incrementToken()); stream.end(); stream.close(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { stream.reset();/*from w ww.j a v a 2 s.c o m*/ CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term + "] "); // B } stream.close(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); stream.reset();//from w w w. j av a 2s . co m CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); } System.out.print("[" + term + "] "); } stream.close(); System.out.println(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); stream.reset();//from ww w. ja va2s . c o m CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D } System.out.print("[" + // #E term + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } stream.close(); System.out.println(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); stream.reset();// w w w .j ava 2s. com CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.toString()); } Assert.assertFalse(stream.incrementToken()); stream.close(); }
From source file:com.sxc.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); stream.reset();/*from w w w . j a va 2 s. c om*/ PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.println("posIncr=" + posIncr.getPositionIncrement()); } stream.close(); }
From source file:com.sxc.lucene.analysis.synonym.SynonymAnalyzerTest.java
License:Apache License
public void testJumps() throws Exception { TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A new StringReader("jumps")); // #A stream.reset();/*from w w w.jav a2 s .co m*/ CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int i = 0; String[] expected = new String[] { "jumps", // #B "hops", // #B "leaps" }; // #B while (stream.incrementToken()) { assertEquals(expected[i], term.toString()); int expectedPos; // #C if (i == 0) { // #C expectedPos = 1; // #C } else { // #C expectedPos = 0; // #C } // #C assertEquals(expectedPos, // #C posIncr.getPositionIncrement()); // #C i++; } stream.close(); assertEquals(3, i); }
From source file:com.talis.lucene.analysis.Utils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer a, String input, String... expected) throws IOException { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); for (int i = 0; i < expected.length; i++) { Token t = ts.next();//from www . j a va 2 s. c om assertNotNull(t); assertEquals(expected[i], t.termText()); } assertNull(ts.next()); ts.close(); }