List of usage examples for org.apache.lucene.analysis TokenStream reset
public void reset() throws IOException
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException { int slop = getSlop(node); if (slop < 0) slop = 0;// w ww. j ava2s. c o m boolean inOrder = true; if (node.hasAttribute("ordered")) inOrder = node.getAttribute("ordered").equals("yes"); if (!hasElementContent(node)) { String qstr = getText(node); List<SpanTermQuery> list = new ArrayList<>(8); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { list.add(new SpanTermQuery(new Term(field, termAttr.toString()))); } stream.end(); stream.close(); } catch (IOException e) { throw new XPathException("Error while parsing phrase query: " + qstr); } return new SpanNearQuery(list.toArray(new SpanTermQuery[list.size()]), slop, inOrder); } SpanQuery[] children = parseSpanChildren(field, node, analyzer); return new SpanNearQuery(children, slop, inOrder); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private String getTerm(String field, String text, Analyzer analyzer) throws XPathException { String term = null;// ww w .j av a2 s . co m try { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); if (stream.incrementToken()) { term = termAttr.toString(); } stream.end(); stream.close(); return term; } catch (IOException e) { throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e); } }
From source file:org.exist.indexing.range.RangeIndexWorker.java
License:Open Source License
protected BytesRef analyzeContent(String field, QName qname, String data, DocumentSet docs) throws XPathException { final Analyzer analyzer = getAnalyzer(qname, field, docs); if (!isCaseSensitive(qname, field, docs)) { data = data.toLowerCase();//w ww . j a v a2 s.c o m } if (analyzer == null) { return new BytesRef(data); } try { TokenStream stream = analyzer.tokenStream(field, new StringReader(data)); TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class); BytesRef token = null; try { stream.reset(); if (stream.incrementToken()) { termAttr.fillBytesRef(); token = termAttr.getBytesRef(); } stream.end(); } finally { stream.close(); } return token; } catch (IOException e) { throw new XPathException("Error analyzing the query string: " + e.getMessage(), e); } }
From source file:org.fastcatsearch.ir.index.SearchIndexWriter.java
License:Apache License
private void indexValue(int docNo, int i, Object value, boolean isIgnoreCase, int positionIncrementGap) throws IOException, IRException { if (value == null) { return;//from www.j a v a2 s . c om } char[] fieldValue = value.toString().toCharArray(); TokenStream tokenStream = indexAnalyzerList[i].tokenStream(indexId, new CharArrayReader(fieldValue), indexingAnalyzerOption); tokenStream.reset(); CharsRefTermAttribute termAttribute = null; PositionIncrementAttribute positionAttribute = null; StopwordAttribute stopwordAttribute = null; AdditionalTermAttribute additionalTermAttribute = null; CharTermAttribute charTermAttribute = null; //? ? . if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) { termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class); } if (tokenStream.hasAttribute(PositionIncrementAttribute.class)) { positionAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class); } if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) { additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class); } // stopword . if (tokenStream.hasAttribute(StopwordAttribute.class)) { stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class); } if (tokenStream.hasAttribute(CharTermAttribute.class)) { charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); } int lastPosition = 0; while (tokenStream.incrementToken()) { CharVector key = null; if (termAttribute != null) { CharsRef charRef = termAttribute.charsRef(); char[] buffer = new char[charRef.length()]; System.arraycopy(charRef.chars, charRef.offset, buffer, 0, charRef.length); key = new CharVector(buffer, 0, buffer.length); } else { key = new CharVector(charTermAttribute.buffer(), 0, charTermAttribute.length()); } int position = -1; if (positionAttribute != null) { position = positionAttribute.getPositionIncrement() + positionIncrementGap; lastPosition = position; } // logger.debug("FIELD#{}: {} >> {} ({})", indexId, key, docNo, position); if (stopwordAttribute != null && stopwordAttribute.isStopword()) { //ignore } else { memoryPosting.add(key, docNo, position); } // if(synonymAttribute != null) { // CharVector[] synonym = synonymAttribute.getSynonym(); // if(synonym != null) { // for(CharVector token : synonym) { // memoryPosting.add(token, docNo, position); // } // } // } if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) { Iterator<String> iter = additionalTermAttribute.iterateAdditionalTerms(); while (iter.hasNext()) { CharVector token = new CharVector(iter.next().toCharArray()); memoryPosting.add(token, docNo, lastPosition); } } } }
From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java
public static void main(String[] args) throws IOException { if (args.length != 3) { printUsage();//from ww w. j av a 2 s .co m System.exit(0); } File pluginDir = new File(args[0]); String pluginClassName = args[1]; String analyzerId = args[2]; RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName); AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId); Analyzer analyzer = null; try { analyzer = analyzerPool.getFromPool(); //? ? ? ?. Scanner sc = new Scanner(System.in); System.out.println("=================================="); System.out.println(" Fastcat analyzer"); System.out.println(" Enter 'quit' for exit program. "); System.out.println("=================================="); System.out.print("Input String: "); while (sc.hasNextLine()) { String str = sc.nextLine(); if (str.equalsIgnoreCase("quit")) { break; } try { char[] value = str.toCharArray(); TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value), new AnalyzerOption()); tokenStream.reset(); CharsRefTermAttribute termAttribute = null; if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) { termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class); } SynonymAttribute synonymAttribute = null; if (tokenStream.hasAttribute(SynonymAttribute.class)) { synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class); } AdditionalTermAttribute additionalTermAttribute = null; if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) { additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class); } StopwordAttribute stopwordAttribute = null; if (tokenStream.hasAttribute(StopwordAttribute.class)) { stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class); } CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String word = ""; //? ?? CharsRefTermAttribute ? . if (termAttribute != null) { word = termAttribute.toString(); } else { //CharsRefTermAttribute ? ?? CharTermAttribute ? ?. word = charTermAttribute.toString(); } // ?? . if (stopwordAttribute.isStopword()) { continue; } // // ?? . // System.out.print(">> "); System.out.println(word); // . if (synonymAttribute != null) { List synonyms = synonymAttribute.getSynonyms(); if (synonyms != null) { for (Object synonymObj : synonyms) { if (synonymObj instanceof CharVector) { CharVector synonym = (CharVector) synonymObj; System.out.print("S> "); System.out.println(synonym); } else if (synonymObj instanceof List) { List synonymList = (List) synonymObj; for (Object synonym : synonymList) { System.out.print("S> "); System.out.println(synonym); } } } } } // . // ??? ? ? ?? ?, ?? . if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) { Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms(); while (termIter.hasNext()) { String token = termIter.next(); System.out.print("A> "); System.out.println(word); } } } } catch (IOException e) { e.printStackTrace(); } System.out.print("Input String: "); } } finally { if (analyzer != null) { analyzerPool.releaseToPool(analyzer); } } System.out.print("Bye!"); }
From source file:org.gbif.namefinder.analysis.sciname.SciNameAnalyzerTest.java
License:Apache License
private TokenStream getTokens(Reader input) throws IOException { SciNameAnalyzer ana = new SciNameAnalyzer(); TokenStream tokens = ana.tokenStream(null, input); tokens.reset(); return tokens; }
From source file:org.genemania.completion.lucene.GeneCompletionProvider.java
License:Open Source License
public Long getNodeId(String symbol) { try {// ww w . ja v a2 s .c o m TokenStream tokens = analyze(symbol); PhraseQuery query = new PhraseQuery(); tokens.reset(); while (tokens.incrementToken()) { TermAttribute term = tokens.getAttribute(TermAttribute.class); query.add(new Term(GeneIndexBuilder.GENE_FIELD, term.term())); } tokens.end(); tokens.close(); final Set<Long> nodes = new HashSet<Long>(); searcher.search(query, new AbstractCollector() { @Override public void handleHit(int id) { try { Document document = searcher.doc(id); nodes.add(Long.parseLong(document.get(GeneIndexBuilder.NODE_ID_FIELD))); } catch (IOException e) { log(e); } } }); if (nodes.size() > 0) { return nodes.iterator().next(); } } catch (IOException e) { log(e); } return null; }
From source file:org.genemania.data.classification.lucene.LuceneGeneClassifier.java
License:Open Source License
public void classify(final String symbol, final IGeneClassificationHandler handler) throws ApplicationException { try {// w ww. ja v a 2s. c o m TokenStream tokens = analyze(symbol); PhraseQuery query = new PhraseQuery(); tokens.reset(); while (tokens.incrementToken()) { TermAttribute term = tokens.getAttribute(TermAttribute.class); query.add(new Term(LuceneMediator.GENE_SYMBOL, term.term())); } tokens.end(); tokens.close(); searcher.search(query, new AbstractCollector() { @Override public void handleHit(int doc) { try { Document document = searcher.doc(doc); long organismId = Long.parseLong(document.get(LuceneMediator.GENE_ORGANISM_ID)); handler.handleClassification(symbol, organismId); } catch (IOException e) { log(e); } } }); } catch (IOException e) { throw new ApplicationException(e); } }
From source file:org.genemania.mediator.lucene.LuceneMediator.java
License:Open Source License
protected PhraseQuery createPhraseQuery(String field, String phrase) throws IOException { TokenStream stream = analyze(phrase); stream.reset(); PhraseQuery query = new PhraseQuery(); while (stream.incrementToken()) { TermAttribute term = stream.getAttribute(TermAttribute.class); query.add(new Term(field, term.term())); }//from ww w .jav a 2s . com stream.end(); stream.close(); return query; }
From source file:org.gridkit.coherence.search.lucene.CapturedTokenStream.java
License:Apache License
public void append(TokenStream ts, int positionGap, int offsetShift) throws IOException { PositionIncrementAttribute pi = null; pi = ts.getAttribute(PositionIncrementAttribute.class); OffsetAttribute off = null;// www . ja v a 2s .co m if (offsetShift != 0) { off = ts.getAttribute(OffsetAttribute.class); } ts.reset(); while (ts.incrementToken()) { if (positionGap != 0) { pi.setPositionIncrement(positionGap); positionGap = 0; } if (off != null) { off.setOffset(offsetShift + off.startOffset(), offsetShift + off.endOffset()); } tokens.add(ts.captureState()); lastPos += pi.getPositionIncrement(); } }