List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.pageseeder.flint.lucene.search.Fields.java
License:Apache License
/** * Returns the terms for a field/* w w w . j a va2 s. co m*/ * * @param field The field * @param text The text to analyze * @param analyzer The analyzer * * @return the corresponding list of terms produced by the analyzer. * * @throws IOException */ public static List<String> toTerms(String field, String text, Analyzer analyzer) { List<String> terms = new ArrayList<String>(); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { String term = attribute.toString(); terms.add(term); } stream.end(); stream.close(); } catch (IOException ex) { // Should not occur since we use a StringReader ex.printStackTrace(); } return terms; }
From source file:org.riotfamily.search.SimpleSearchQueryParser.java
License:Apache License
protected List<Token> getTokens(String text) { ArrayList<Token> tokens = Generics.newArrayList(); try {//w ww .jav a2s . com TokenStream source = analyzer.tokenStream(null, new StringReader(text)); Token token; while ((token = source.next()) != null) { tokens.add(token); } source.close(); } catch (IOException e) { } return tokens; }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public String[] tokenize(String input) { ArrayList<String> tokens = new ArrayList<String>(); try {/*from w w w .j a va 2s . c o m*/ TokenStream stream = analyzer.tokenStream(null, new StringReader(input)); TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); //stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { String term = termattr.term(); tokens.add(term); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(System.err); } catch (IOException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(); } return tokens.toArray(new String[0]); }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public Query createPhraseQuery(String field, String phrase) throws IOException { PhraseQuery query = new PhraseQuery(); /*/*from w ww . ja va 2 s.c o m*/ String[] array = phrase.split("\\s+"); for(int i = 0; i < array.length; i++) { query.add(new Term(field, array[i])); } */ try { TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase)); //stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); Term t = new Term(field, termattr.term()); query.add(t); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { e.printStackTrace(System.err); System.err.println(String.format("Phrase: \"%s\"", phrase)); } return query; }
From source file:org.sc.probro.lucene.ProteinSearcher.java
License:Apache License
public String[] tokenize(String input) { ArrayList<String> tokens = new ArrayList<String>(); try {//from w ww. j a v a 2 s . c o m TokenStream stream = analyzer.tokenStream(null, new StringReader(input)); stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); String term = termattr.term(); tokens.add(term); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(System.err); } catch (IOException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(); } return tokens.toArray(new String[0]); }
From source file:org.sc.probro.lucene.ProteinSearcher.java
License:Apache License
public Query createPhraseQuery(String field, String phrase) throws IOException { PhraseQuery query = new PhraseQuery(); /*/*from ww w . j av a2 s. c om*/ String[] array = phrase.split("\\s+"); for(int i = 0; i < array.length; i++) { query.add(new Term(field, array[i])); } */ try { TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase)); stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); Term t = new Term(field, termattr.term()); query.add(t); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { e.printStackTrace(System.err); System.err.println(String.format("Phrase: \"%s\"", phrase)); } return query; }
From source file:org.sd.text.lucene.LuceneUtils.java
License:Open Source License
/** * Split the string into tokens using the given analyzer. *//* ww w .j a va 2 s.c o m*/ public static final List<String> getTokenTexts(Analyzer analyzer, String fieldName, String string) { if (string == null) return null; final List<String> result = new ArrayList<String>(); if (analyzer != null) { final TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(string)); try { while (tokenStream.incrementToken()) { if (tokenStream.hasAttribute(TermAttribute.class)) { final TermAttribute termAttribute = (TermAttribute) tokenStream .getAttribute(TermAttribute.class); result.add(termAttribute.term()); } } tokenStream.close(); } catch (IOException e) { throw new IllegalStateException(e); } } else { result.add(string); } return result; }
From source file:org.sd.text.lucene.LuceneUtils.java
License:Open Source License
public static final List<List<String>> getPhraseTexts(Analyzer analyzer, String fieldName, String string) { if (string == null) return null; final List<List<String>> result = new LinkedList<List<String>>(); List<String> curPhrase = new ArrayList<String>(); result.add(curPhrase);/*from w ww . j a v a 2s .c o m*/ if (analyzer != null) { final TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(string)); int lastEndOffset = 0; try { while (tokenStream.incrementToken()) { boolean incPhrase = true; if (tokenStream.hasAttribute(OffsetAttribute.class)) { final OffsetAttribute offsetAttribute = (OffsetAttribute) tokenStream .getAttribute(OffsetAttribute.class); if (offsetAttribute.startOffset() == lastEndOffset) { incPhrase = false; } lastEndOffset = offsetAttribute.endOffset(); } if (tokenStream.hasAttribute(TermAttribute.class)) { final TermAttribute termAttribute = (TermAttribute) tokenStream .getAttribute(TermAttribute.class); if (incPhrase && curPhrase.size() > 0) { curPhrase = new ArrayList<String>(); result.add(curPhrase); } curPhrase.add(termAttribute.term()); } } tokenStream.close(); } catch (IOException e) { throw new IllegalStateException(e); } } else { curPhrase.add(string); } return result; }
From source file:org.sindice.siren.analysis.TestTupleAnalyzer.java
License:Apache License
public void assertAnalyzesTo(final Analyzer a, final String input, final String[] expectedImages, final String[] expectedTypes, final int[] expectedPosIncrs, final int[] expectedTupleID, final int[] expectedCellID) throws Exception { final TokenStream t = a.reusableTokenStream("", new StringReader(input)); assertTrue("has TermAttribute", t.hasAttribute(TermAttribute.class)); final TermAttribute termAtt = t.getAttribute(TermAttribute.class); TypeAttribute typeAtt = null;//from w ww.jav a 2 s. co m if (expectedTypes != null) { assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class)); typeAtt = t.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncrAtt = null; if (expectedPosIncrs != null) { assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class)); posIncrAtt = t.getAttribute(PositionIncrementAttribute.class); } TupleAttribute tupleAtt = null; if (expectedTupleID != null) { assertTrue("has TupleAttribute", t.hasAttribute(TupleAttribute.class)); tupleAtt = t.getAttribute(TupleAttribute.class); } CellAttribute cellAtt = null; if (expectedCellID != null) { assertTrue("has CellAttribute", t.hasAttribute(CellAttribute.class)); cellAtt = t.getAttribute(CellAttribute.class); } for (int i = 0; i < expectedImages.length; i++) { assertTrue("token " + i + " exists", t.incrementToken()); assertEquals(expectedImages[i], termAtt.term()); if (expectedTypes != null) { assertEquals(expectedTypes[i], typeAtt.type()); } if (expectedPosIncrs != null) { assertEquals(expectedPosIncrs[i], posIncrAtt.getPositionIncrement()); } if (expectedTupleID != null) { assertEquals(expectedTupleID[i], tupleAtt.tuple()); } if (expectedCellID != null) { assertEquals(expectedCellID[i], cellAtt.cell()); } } assertFalse("end of stream", t.incrementToken()); t.end(); t.close(); }
From source file:org.sindice.siren.qparser.analysis.QNamesFilterTest.java
License:Apache License
@Test public void testInvalidQName() throws Exception { final String query = "<http:> <foaf:2> <foaf:-qw>"; final NTripleQueryAnalyzer analyzer = new NTripleQueryAnalyzer(); final TokenStream stream = analyzer.tokenStream(null, new StringReader(query)); final TokenFilter filter = new QNamesFilter(stream, "./src/test/resources/conf/qnames"); final CupScannerWrapper wrapper = new CupScannerWrapper(filter); Symbol symbol = wrapper.next_token(); assertTrue(symbol != null);// ww w. jav a 2 s .c o m assertTrue(symbol.value.toString().equals("http:")); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.value.toString().equals("foaf:2")); symbol = wrapper.next_token(); assertTrue(symbol != null); assertTrue(symbol.value.toString().equals("foaf:-qw")); symbol = wrapper.next_token(); assertTrue(symbol == null); stream.close(); }