List of usage examples for org.apache.lucene.analysis TokenStream getAttribute
public final <T extends Attribute> T getAttribute(Class<T> attClass)
The caller must pass in a Class<?
From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java
public static void main(String[] args) throws IOException { if (args.length != 3) { printUsage();// w w w . j a v a2 s . co m System.exit(0); } File pluginDir = new File(args[0]); String pluginClassName = args[1]; String analyzerId = args[2]; RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName); AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId); Analyzer analyzer = null; try { analyzer = analyzerPool.getFromPool(); //? ? ? ?. Scanner sc = new Scanner(System.in); System.out.println("=================================="); System.out.println(" Fastcat analyzer"); System.out.println(" Enter 'quit' for exit program. "); System.out.println("=================================="); System.out.print("Input String: "); while (sc.hasNextLine()) { String str = sc.nextLine(); if (str.equalsIgnoreCase("quit")) { break; } try { char[] value = str.toCharArray(); TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value), new AnalyzerOption()); tokenStream.reset(); CharsRefTermAttribute termAttribute = null; if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) { termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class); } SynonymAttribute synonymAttribute = null; if (tokenStream.hasAttribute(SynonymAttribute.class)) { synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class); } AdditionalTermAttribute additionalTermAttribute = null; if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) { additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class); } StopwordAttribute stopwordAttribute = null; if (tokenStream.hasAttribute(StopwordAttribute.class)) { stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class); } CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String word = ""; //? ?? CharsRefTermAttribute ? . if (termAttribute != null) { word = termAttribute.toString(); } else { //CharsRefTermAttribute ? ?? CharTermAttribute ? ?. word = charTermAttribute.toString(); } // ?? . if (stopwordAttribute.isStopword()) { continue; } // // ?? . // System.out.print(">> "); System.out.println(word); // . if (synonymAttribute != null) { List synonyms = synonymAttribute.getSynonyms(); if (synonyms != null) { for (Object synonymObj : synonyms) { if (synonymObj instanceof CharVector) { CharVector synonym = (CharVector) synonymObj; System.out.print("S> "); System.out.println(synonym); } else if (synonymObj instanceof List) { List synonymList = (List) synonymObj; for (Object synonym : synonymList) { System.out.print("S> "); System.out.println(synonym); } } } } } // . // ??? ? ? ?? ?, ?? . if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) { Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms(); while (termIter.hasNext()) { String token = termIter.next(); System.out.print("A> "); System.out.println(word); } } } } catch (IOException e) { e.printStackTrace(); } System.out.print("Input String: "); } } finally { if (analyzer != null) { analyzerPool.releaseToPool(analyzer); } } System.out.print("Bye!"); }
From source file:org.gbif.namefinder.analysis.sciname.SciNameIterator.java
License:Apache License
public SciNameIterator(TokenStream tokens) { super();/*from ww w .j a va 2 s . co m*/ this.tokens = tokens; termAtt = tokens.getAttribute(CharTermAttribute.class); sciNameAtt = tokens.getAttribute(SciNameAttribute.class); offsetAtt = tokens.getAttribute(OffsetAttribute.class); nextName(); }
From source file:org.genemania.completion.lucene.GeneCompletionProvider.java
License:Open Source License
public Long getNodeId(String symbol) { try {/* w w w. j a v a 2 s .c om*/ TokenStream tokens = analyze(symbol); PhraseQuery query = new PhraseQuery(); tokens.reset(); while (tokens.incrementToken()) { TermAttribute term = tokens.getAttribute(TermAttribute.class); query.add(new Term(GeneIndexBuilder.GENE_FIELD, term.term())); } tokens.end(); tokens.close(); final Set<Long> nodes = new HashSet<Long>(); searcher.search(query, new AbstractCollector() { @Override public void handleHit(int id) { try { Document document = searcher.doc(id); nodes.add(Long.parseLong(document.get(GeneIndexBuilder.NODE_ID_FIELD))); } catch (IOException e) { log(e); } } }); if (nodes.size() > 0) { return nodes.iterator().next(); } } catch (IOException e) { log(e); } return null; }
From source file:org.genemania.data.classification.lucene.LuceneGeneClassifier.java
License:Open Source License
public void classify(final String symbol, final IGeneClassificationHandler handler) throws ApplicationException { try {/* w ww . j a va2 s . co m*/ TokenStream tokens = analyze(symbol); PhraseQuery query = new PhraseQuery(); tokens.reset(); while (tokens.incrementToken()) { TermAttribute term = tokens.getAttribute(TermAttribute.class); query.add(new Term(LuceneMediator.GENE_SYMBOL, term.term())); } tokens.end(); tokens.close(); searcher.search(query, new AbstractCollector() { @Override public void handleHit(int doc) { try { Document document = searcher.doc(doc); long organismId = Long.parseLong(document.get(LuceneMediator.GENE_ORGANISM_ID)); handler.handleClassification(symbol, organismId); } catch (IOException e) { log(e); } } }); } catch (IOException e) { throw new ApplicationException(e); } }
From source file:org.genemania.data.normalizer.GeneCompletionProvider2.java
License:Open Source License
public void computeProposals(final CompletionConsumer consumer, String queryString) { try {//from w w w . j a va 2 s .co m if (queryString.length() == 0) { return; } TokenStream stream = analyzer.tokenStream(LuceneGeneMediator.GENE_SYMBOL, new StringReader(queryString)); TermAttribute term = stream.getAttribute(TermAttribute.class); if (!stream.incrementToken()) { return; } BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term(LuceneMediator.GENE_ORGANISM_ID, String.valueOf(organism.getId()))), Occur.MUST); query.add(new PrefixQuery(new Term(LuceneMediator.GENE_SYMBOL, term.term())), Occur.MUST); searcher.search(query, new AbstractCollector() { @Override public void handleHit(int id) { try { Document document = searcher.doc(id); consumer.consume(document.get(LuceneMediator.GENE_SYMBOL)); } catch (IOException e) { log(e); } } }); } catch (IOException e) { log(e); } catch (TooManyClauses e) { consumer.tooManyCompletions(); } finally { consumer.finish(); } }
From source file:org.genemania.mediator.lucene.LuceneMediator.java
License:Open Source License
protected PhraseQuery createPhraseQuery(String field, String phrase) throws IOException { TokenStream stream = analyze(phrase); stream.reset();/* w w w . java 2 s .co m*/ PhraseQuery query = new PhraseQuery(); while (stream.incrementToken()) { TermAttribute term = stream.getAttribute(TermAttribute.class); query.add(new Term(field, term.term())); } stream.end(); stream.close(); return query; }
From source file:org.gridkit.coherence.search.lucene.CapturedTokenStream.java
License:Apache License
public void append(TokenStream ts, int positionGap, int offsetShift) throws IOException { PositionIncrementAttribute pi = null; pi = ts.getAttribute(PositionIncrementAttribute.class); OffsetAttribute off = null;/*w w w. j a v a 2 s. c o m*/ if (offsetShift != 0) { off = ts.getAttribute(OffsetAttribute.class); } ts.reset(); while (ts.incrementToken()) { if (positionGap != 0) { pi.setPositionIncrement(positionGap); positionGap = 0; } if (off != null) { off.setOffset(offsetShift + off.startOffset(), offsetShift + off.endOffset()); } tokens.add(ts.captureState()); lastPos += pi.getPositionIncrement(); } }
From source file:org.gridkit.coherence.search.lucene.TokenStreamCheck.java
License:Apache License
@Test public void analyze() throws IOException { WhitespaceAnalyzer wa = new WhitespaceAnalyzer(Version.LUCENE_42); wa.getOffsetGap("xxx"); TokenStream ts = wa.tokenStream("test", new StringReader("red black tree")); ts.reset();//from w w w . j a va 2 s. com ts.incrementToken(); ts.getAttribute(CharTermAttribute.class).buffer(); CapturedTokenStream cts = new CapturedTokenStream(ts); cts.reset(); cts.incrementToken(); cts.getAttribute(CharTermAttribute.class).buffer(); }
From source file:org.hbasene.index.HBaseIndexWriter.java
License:Apache License
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { String docId = doc.get(this.primaryKeyField); if (docId == null) { throw new IllegalArgumentException( "Primary Key " + this.primaryKeyField + " not present in the document to be added "); // TODO: Special type of exception needed ? }// www .ja va 2 s .c om int position = 0; Map<String, List<Integer>> termPositions = new HashMap<String, List<Integer>>(); Map<String, byte[]> fieldsToStore = new HashMap<String, byte[]>(); for (Fieldable field : doc.getFields()) { // Indexed field if (field.isIndexed() && field.isTokenized()) { TokenStream tokens = field.tokenStreamValue(); if (tokens == null) { tokens = analyzer.tokenStream(field.name(), new StringReader(field.stringValue())); } tokens.addAttribute(TermAttribute.class); tokens.addAttribute(PositionIncrementAttribute.class); // collect term frequencies per doc if (position > 0) { position += analyzer.getPositionIncrementGap(field.name()); } // Build the termPositions vector for all terms while (tokens.incrementToken()) { String term = createColumnName(field.name(), tokens.getAttribute(TermAttribute.class).term()); List<Integer> pvec = termPositions.get(term); if (pvec == null) { pvec = Lists.newArrayList(); termPositions.put(term, pvec); } position += (tokens.getAttribute(PositionIncrementAttribute.class).getPositionIncrement() - 1); pvec.add(++position); } tokens.close(); } // Untokenized fields go in without a termPosition if (field.isIndexed() && !field.isTokenized()) { String term = this.createColumnName(field.name(), field.stringValue()); String key = term; termPositions.put(key, EMPTY_TERM_POSITIONS); } // Stores each field as a column under this doc key if (field.isStored()) { byte[] value = field.isBinary() ? field.getBinaryValue() : Bytes.toBytes(field.stringValue()); // first byte flags if binary or not final byte[] prefix = Bytes.toBytes((field.isBinary() ? 'B' : 'T')); fieldsToStore.put(field.name(), Bytes.add(prefix, value)); } } indexStore.indexDocument(docId, new DocumentIndexContext(termPositions, fieldsToStore)); termPositions.clear(); fieldsToStore.clear(); }
From source file:org.hibernate.search.backend.spi.SingularTermDeletionQuery.java
License:LGPL
@Override public Query toLuceneQuery(DocumentBuilderIndexedEntity documentBuilder) { AnalyzerReference analyzerReferenceForEntity = documentBuilder.getAnalyzerReference(); String stringValue = documentBuilder.objectToString(fieldName, this.getValue(), new ContextualExceptionBridgeHelper()); if (this.getType() == Type.STRING) { try {/*from w w w.j a v a 2s . com*/ if (analyzerReferenceForEntity.is(RemoteAnalyzerReference.class)) { // no need to take into account the analyzer here as it will be dealt with remotely return new TermQuery(new Term(this.getFieldName(), stringValue)); } ScopedLuceneAnalyzer analyzerForEntity = (ScopedLuceneAnalyzer) analyzerReferenceForEntity .unwrap(LuceneAnalyzerReference.class).getAnalyzer(); TokenStream tokenStream = analyzerForEntity.tokenStream(this.getFieldName(), stringValue); tokenStream.reset(); try { BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder(); while (tokenStream.incrementToken()) { String term = tokenStream.getAttribute(CharTermAttribute.class).toString(); booleanQueryBuilder.add(new TermQuery(new Term(this.getFieldName(), term)), Occur.FILTER); } return booleanQueryBuilder.build(); } finally { tokenStream.close(); } } catch (IOException e) { throw new AssertionFailure( "No IOException can occur while using a TokenStream that is generated via String"); } } else { FieldBridge fieldBridge = documentBuilder.getBridge(fieldName); if (NumericFieldUtils.isNumericFieldBridge(fieldBridge)) { return NumericFieldUtils.createExactMatchQuery(fieldName, this.getValue()); } else { return new TermQuery(new Term(this.getFieldName(), stringValue)); } } }