List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.sindice.siren.qparser.analysis.QNamesFilterTest.java
License:Apache License
@Test public void testQNameHTTP() throws Exception { final String query = "<http://ns/#s> <http://ns/p> <http://ns/o>"; final NTripleQueryAnalyzer analyzer = new NTripleQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream(null, new StringReader(query)); final TokenFilter filter = new QNamesFilter(stream, "./src/test/resources/conf/qnames"); final CupScannerWrapper wrapper = new CupScannerWrapper(filter); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);/*from w w w .jav a 2 s . com*/ assertTrue(symbol.value.toString().equals("http://ns/#s")); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.value.toString().equals("http://ns/p")); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.value.toString().equals("http://ns/o")); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer1() throws Exception { final String query = "[0]<http://ns/#s> [66]<http://ns/p> [4]<http://ns/o>"; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream(null, new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);/*from w ww. j av a 2s. c om*/ assertTrue("recieved symbol: " + symbol.sym, symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[0] " + XSDDatatype.XSD_ANY_URI + ":http://ns/#s", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue("recieved symbol: " + symbol.sym, symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[66] " + XSDDatatype.XSD_ANY_URI + ":http://ns/p", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue("recieved symbol: " + symbol.sym, symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[4] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer2() throws Exception { final String query = "[1]<http://ns/o1> [20]<http://ns/o2> [3]<http://ns/o3> [2]<http://ns/o4>"; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream("tets", new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);/*from w w w . j a v a2 s.c om*/ assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[1] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o1", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[20] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o2", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[3] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o3", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[2] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o4", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer3() throws Exception { final String query = "[1]<http://ns/p> [3]\"test\""; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream("tets", new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);/*from www .j a va2 s. c o m*/ assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[1] " + XSDDatatype.XSD_ANY_URI + ":http://ns/p", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.LITERAL); assertEquals("[3] " + XSDDatatype.XSD_STRING + ":test", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer4() throws Exception { final String query = "[3]'test' [1]<http://ns/p>"; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream("tets", new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);/* w w w .j av a 2s . co m*/ assertTrue(symbol.sym == TabularQueryTokenizer.LPATTERN); assertEquals("[3] " + XSDDatatype.XSD_STRING + ":test", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[1] " + XSDDatatype.XSD_ANY_URI + ":http://ns/p", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer5() throws Exception { final String query = "[1]<http://ns/s> [2]<http://ns/p> *"; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream("tets", new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);// ww w. j ava 2 s .co m assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[1] " + XSDDatatype.XSD_ANY_URI + ":http://ns/s", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[2] " + XSDDatatype.XSD_ANY_URI + ":http://ns/p", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue("recieved symbol: " + symbol.sym, symbol.sym == TabularQueryTokenizer.ERROR); stream.close(); }
From source file:org.sindice.siren.qparser.analysis.TabularQueryStandardAnalyzerTest.java
License:Open Source License
@Test public void testTabularQueryStandardAnalyzer6() throws Exception { final String query = "[1]'test' [20]<http://ns/o2> [3]\"tea 4 two\" [2]'you'^^<mydatatype>"; final TabularQueryAnalyzer analyzer = new TabularQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream("tets", new StringReader(query)); final CupScannerWrapper wrapper = new CupScannerWrapper(stream); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);//from w w w. ja va 2 s. com assertTrue(symbol.sym == TabularQueryTokenizer.LPATTERN); assertEquals("[1] " + XSDDatatype.XSD_STRING + ":test", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.URIPATTERN); assertEquals("[20] " + XSDDatatype.XSD_ANY_URI + ":http://ns/o2", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.LITERAL); assertEquals("[3] " + XSDDatatype.XSD_STRING + ":tea 4 two", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.sym == TabularQueryTokenizer.LPATTERN); assertEquals("[2] mydatatype:you", symbol.value.toString()); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }
From source file:org.sindice.siren.qparser.entity.EntityQueryParser.java
License:Apache License
/** * Parse a NTriple query and return a Lucene {@link Query}. * * @param qstr The query string//from www . j a va2 s . co m * @param defaultField The default field to query * @param analyzer The query analyser * @return A Lucene's {@link Query} * @throws ParseException If something is wrong with the query string */ public static final Query parse(final String qstr, final String defaultField, final Analyzer analyzer) throws ParseException { final TokenStream stream = analyzer.tokenStream(defaultField, new StringReader(qstr)); final EntityQParserImpl lparser = new EntityQParserImpl(new CupScannerWrapper(stream)); Symbol sym = null; try { sym = lparser.parse(); stream.close(); // safe since stream is backed by StringReader } catch (final Exception e) { e.printStackTrace(); if (e != null) throw new ParseException(e.toString()); } // final NTripleQueryBuilder translator = new NTripleQueryBuilder(Version.LUCENE_31, defaultField); // final NTripleQuery q = (NTripleQuery) sym.value; // q.traverseBottomUp(translator); // // return q.getQuery(); return null; }
From source file:org.sindice.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { this.positionIncrementsEnabled = false; final Boolean positionIncrementsEnabled = this.getQueryConfigHandler() .get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); if (positionIncrementsEnabled != null) { this.positionIncrementsEnabled = positionIncrementsEnabled; }/*from w w w . j a v a 2 s . c o m*/ final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final String datatype = (String) fieldNode.getTag(DatatypeQueryNode.DATATYPE_TAGID); if (datatype == null) { return node; } final Analyzer analyzer = this.getQueryConfigHandler().get(KeywordConfigurationKeys.DATATYPES_ANALYZERS) .get(datatype); if (analyzer == null) { throw new QueryNodeException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "No analyzer associated with " + datatype)); } PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; final TokenStream source; try { source = analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } try { while (buffer.incrementToken()) { numTokens++; final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { if (nbTwigs != 0) { // Twig special case return new WildcardNodeQueryNode(); } return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { String term = null; try { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } fieldNode.setText(term); return fieldNode; } else { // no phrase query: final LinkedList<QueryNode> children = new LinkedList<QueryNode>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; final int positionIncrement = 1; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.setPositionIncrement(position); } else { newFieldNode.setPositionIncrement(i); } children.add(new FieldQueryNode(field, term, -1, -1)); } if (node.getParent() instanceof TokenizedPhraseQueryNode) { throw new QueryNodeException(new MessageImpl("Cannot build a MultiPhraseQuery")); } // If multiple terms at one single position, this must be a query // expansion. Perform a OR between the terms. if (severalTokensAtSamePosition && positionCount == 1) { return new GroupQueryNode(new OrQueryNode(children)); } // if several tokens at same position && position count > 1, then // results can be unexpected else { final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); for (int i = 0; i < children.size(); i++) { pq.add(children.get(i)); } return pq; } } } else if (node instanceof TwigQueryNode) { nbTwigs--; assert nbTwigs >= 0; } return node; }
From source file:org.sindice.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {//from w ww .j ava 2s . c om source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } else if (numTokens != 1) { // phrase query final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); newFieldNode.setPositionIncrement(i); pq.add(newFieldNode); } return pq; } } return node; }