Example usage for org.apache.lucene.analysis TokenStream getAttribute

List of usage examples for org.apache.lucene.analysis TokenStream getAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream getAttribute.

Prototype

public final <T extends Attribute> T getAttribute(Class<T> attClass) 

Source Link

Document

Returns the instance of the passed in Attribute contained in this AttributeSource

The caller must pass in a Class<?

Usage

From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java

public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        printUsage();// w  w w  . j a v a2  s  . co  m
        System.exit(0);
    }

    File pluginDir = new File(args[0]);
    String pluginClassName = args[1];
    String analyzerId = args[2];
    RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName);
    AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId);
    Analyzer analyzer = null;

    try {
        analyzer = analyzerPool.getFromPool();
        //? ? ? ?.

        Scanner sc = new Scanner(System.in);
        System.out.println("==================================");
        System.out.println(" Fastcat analyzer");
        System.out.println(" Enter 'quit' for exit program. ");
        System.out.println("==================================");
        System.out.print("Input String: ");
        while (sc.hasNextLine()) {
            String str = sc.nextLine();
            if (str.equalsIgnoreCase("quit")) {
                break;
            }
            try {
                char[] value = str.toCharArray();
                TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value),
                        new AnalyzerOption());
                tokenStream.reset();

                CharsRefTermAttribute termAttribute = null;
                if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
                    termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
                }
                SynonymAttribute synonymAttribute = null;
                if (tokenStream.hasAttribute(SynonymAttribute.class)) {
                    synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class);
                }
                AdditionalTermAttribute additionalTermAttribute = null;
                if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
                    additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
                }

                StopwordAttribute stopwordAttribute = null;
                if (tokenStream.hasAttribute(StopwordAttribute.class)) {
                    stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
                }

                CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);

                while (tokenStream.incrementToken()) {
                    String word = "";
                    //? ??  CharsRefTermAttribute ? .
                    if (termAttribute != null) {
                        word = termAttribute.toString();
                    } else {
                        //CharsRefTermAttribute ?   ??  CharTermAttribute ?  ?.
                        word = charTermAttribute.toString();
                    }

                    // ?? .
                    if (stopwordAttribute.isStopword()) {
                        continue;
                    }

                    //
                    // ??  .
                    //
                    System.out.print(">> ");
                    System.out.println(word);

                    //   .
                    if (synonymAttribute != null) {
                        List synonyms = synonymAttribute.getSynonyms();
                        if (synonyms != null) {
                            for (Object synonymObj : synonyms) {
                                if (synonymObj instanceof CharVector) {
                                    CharVector synonym = (CharVector) synonymObj;
                                    System.out.print("S> ");
                                    System.out.println(synonym);
                                } else if (synonymObj instanceof List) {
                                    List synonymList = (List) synonymObj;
                                    for (Object synonym : synonymList) {
                                        System.out.print("S> ");
                                        System.out.println(synonym);
                                    }
                                }
                            }
                        }
                    }

                    //  .
                    // ??? ? ?  ?? ?, ??  .
                    if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
                        Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms();
                        while (termIter.hasNext()) {
                            String token = termIter.next();
                            System.out.print("A> ");
                            System.out.println(word);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            System.out.print("Input String: ");
        }
    } finally {
        if (analyzer != null) {
            analyzerPool.releaseToPool(analyzer);
        }
    }
    System.out.print("Bye!");
}

From source file:org.gbif.namefinder.analysis.sciname.SciNameIterator.java

License:Apache License

public SciNameIterator(TokenStream tokens) {
    super();/*from   ww  w .j a  va  2  s  . co m*/
    this.tokens = tokens;
    termAtt = tokens.getAttribute(CharTermAttribute.class);
    sciNameAtt = tokens.getAttribute(SciNameAttribute.class);
    offsetAtt = tokens.getAttribute(OffsetAttribute.class);
    nextName();
}

From source file:org.genemania.completion.lucene.GeneCompletionProvider.java

License:Open Source License

public Long getNodeId(String symbol) {
    try {/* w w w.  j  a v a  2  s .c om*/
        TokenStream tokens = analyze(symbol);
        PhraseQuery query = new PhraseQuery();
        tokens.reset();
        while (tokens.incrementToken()) {
            TermAttribute term = tokens.getAttribute(TermAttribute.class);
            query.add(new Term(GeneIndexBuilder.GENE_FIELD, term.term()));
        }
        tokens.end();
        tokens.close();

        final Set<Long> nodes = new HashSet<Long>();
        searcher.search(query, new AbstractCollector() {
            @Override
            public void handleHit(int id) {
                try {
                    Document document = searcher.doc(id);
                    nodes.add(Long.parseLong(document.get(GeneIndexBuilder.NODE_ID_FIELD)));
                } catch (IOException e) {
                    log(e);
                }
            }
        });
        if (nodes.size() > 0) {
            return nodes.iterator().next();
        }
    } catch (IOException e) {
        log(e);
    }
    return null;
}

From source file:org.genemania.data.classification.lucene.LuceneGeneClassifier.java

License:Open Source License

public void classify(final String symbol, final IGeneClassificationHandler handler)
        throws ApplicationException {
    try {/* w  ww  . j  a va2  s .  co m*/
        TokenStream tokens = analyze(symbol);
        PhraseQuery query = new PhraseQuery();
        tokens.reset();
        while (tokens.incrementToken()) {
            TermAttribute term = tokens.getAttribute(TermAttribute.class);
            query.add(new Term(LuceneMediator.GENE_SYMBOL, term.term()));
        }
        tokens.end();
        tokens.close();

        searcher.search(query, new AbstractCollector() {
            @Override
            public void handleHit(int doc) {
                try {
                    Document document = searcher.doc(doc);
                    long organismId = Long.parseLong(document.get(LuceneMediator.GENE_ORGANISM_ID));
                    handler.handleClassification(symbol, organismId);
                } catch (IOException e) {
                    log(e);
                }
            }
        });
    } catch (IOException e) {
        throw new ApplicationException(e);
    }
}

From source file:org.genemania.data.normalizer.GeneCompletionProvider2.java

License:Open Source License

public void computeProposals(final CompletionConsumer consumer, String queryString) {
    try {//from  w  w  w  .  j a  va  2  s .co  m
        if (queryString.length() == 0) {
            return;
        }

        TokenStream stream = analyzer.tokenStream(LuceneGeneMediator.GENE_SYMBOL,
                new StringReader(queryString));
        TermAttribute term = stream.getAttribute(TermAttribute.class);
        if (!stream.incrementToken()) {
            return;
        }

        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term(LuceneMediator.GENE_ORGANISM_ID, String.valueOf(organism.getId()))),
                Occur.MUST);
        query.add(new PrefixQuery(new Term(LuceneMediator.GENE_SYMBOL, term.term())), Occur.MUST);
        searcher.search(query, new AbstractCollector() {
            @Override
            public void handleHit(int id) {
                try {
                    Document document = searcher.doc(id);
                    consumer.consume(document.get(LuceneMediator.GENE_SYMBOL));
                } catch (IOException e) {
                    log(e);
                }
            }
        });
    } catch (IOException e) {
        log(e);
    } catch (TooManyClauses e) {
        consumer.tooManyCompletions();
    } finally {
        consumer.finish();
    }
}

From source file:org.genemania.mediator.lucene.LuceneMediator.java

License:Open Source License

protected PhraseQuery createPhraseQuery(String field, String phrase) throws IOException {
    TokenStream stream = analyze(phrase);
    stream.reset();/*  w  w  w  . java  2 s  .co  m*/
    PhraseQuery query = new PhraseQuery();
    while (stream.incrementToken()) {
        TermAttribute term = stream.getAttribute(TermAttribute.class);
        query.add(new Term(field, term.term()));
    }
    stream.end();
    stream.close();
    return query;
}

From source file:org.gridkit.coherence.search.lucene.CapturedTokenStream.java

License:Apache License

public void append(TokenStream ts, int positionGap, int offsetShift) throws IOException {
    PositionIncrementAttribute pi = null;
    pi = ts.getAttribute(PositionIncrementAttribute.class);
    OffsetAttribute off = null;/*w  w w. j  a  v a 2 s. c  o  m*/
    if (offsetShift != 0) {
        off = ts.getAttribute(OffsetAttribute.class);
    }
    ts.reset();
    while (ts.incrementToken()) {
        if (positionGap != 0) {
            pi.setPositionIncrement(positionGap);
            positionGap = 0;
        }
        if (off != null) {
            off.setOffset(offsetShift + off.startOffset(), offsetShift + off.endOffset());
        }
        tokens.add(ts.captureState());
        lastPos += pi.getPositionIncrement();
    }
}

From source file:org.gridkit.coherence.search.lucene.TokenStreamCheck.java

License:Apache License

@Test
public void analyze() throws IOException {

    WhitespaceAnalyzer wa = new WhitespaceAnalyzer(Version.LUCENE_42);
    wa.getOffsetGap("xxx");
    TokenStream ts = wa.tokenStream("test", new StringReader("red black tree"));
    ts.reset();//from   w w  w . j a va 2  s. com
    ts.incrementToken();
    ts.getAttribute(CharTermAttribute.class).buffer();

    CapturedTokenStream cts = new CapturedTokenStream(ts);
    cts.reset();
    cts.incrementToken();
    cts.getAttribute(CharTermAttribute.class).buffer();
}

From source file:org.hbasene.index.HBaseIndexWriter.java

License:Apache License

public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
    String docId = doc.get(this.primaryKeyField);
    if (docId == null) {
        throw new IllegalArgumentException(
                "Primary Key " + this.primaryKeyField + " not present in the document to be added ");
        // TODO: Special type of exception needed ?

    }// www  .ja  va 2 s  .c  om
    int position = 0;
    Map<String, List<Integer>> termPositions = new HashMap<String, List<Integer>>();
    Map<String, byte[]> fieldsToStore = new HashMap<String, byte[]>();

    for (Fieldable field : doc.getFields()) {

        // Indexed field
        if (field.isIndexed() && field.isTokenized()) {
            TokenStream tokens = field.tokenStreamValue();

            if (tokens == null) {
                tokens = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
            }
            tokens.addAttribute(TermAttribute.class);
            tokens.addAttribute(PositionIncrementAttribute.class);

            // collect term frequencies per doc
            if (position > 0) {
                position += analyzer.getPositionIncrementGap(field.name());
            }

            // Build the termPositions vector for all terms
            while (tokens.incrementToken()) {
                String term = createColumnName(field.name(), tokens.getAttribute(TermAttribute.class).term());

                List<Integer> pvec = termPositions.get(term);

                if (pvec == null) {
                    pvec = Lists.newArrayList();
                    termPositions.put(term, pvec);
                }

                position += (tokens.getAttribute(PositionIncrementAttribute.class).getPositionIncrement() - 1);
                pvec.add(++position);

            }
            tokens.close();

        }

        // Untokenized fields go in without a termPosition
        if (field.isIndexed() && !field.isTokenized()) {
            String term = this.createColumnName(field.name(), field.stringValue());
            String key = term;
            termPositions.put(key, EMPTY_TERM_POSITIONS);

        }

        // Stores each field as a column under this doc key
        if (field.isStored()) {

            byte[] value = field.isBinary() ? field.getBinaryValue() : Bytes.toBytes(field.stringValue());

            // first byte flags if binary or not
            final byte[] prefix = Bytes.toBytes((field.isBinary() ? 'B' : 'T'));

            fieldsToStore.put(field.name(), Bytes.add(prefix, value));
        }
    }
    indexStore.indexDocument(docId, new DocumentIndexContext(termPositions, fieldsToStore));
    termPositions.clear();
    fieldsToStore.clear();
}

From source file:org.hibernate.search.backend.spi.SingularTermDeletionQuery.java

License:LGPL

@Override
public Query toLuceneQuery(DocumentBuilderIndexedEntity documentBuilder) {
    AnalyzerReference analyzerReferenceForEntity = documentBuilder.getAnalyzerReference();
    String stringValue = documentBuilder.objectToString(fieldName, this.getValue(),
            new ContextualExceptionBridgeHelper());

    if (this.getType() == Type.STRING) {
        try {/*from w  w  w.j a v a  2s  .  com*/
            if (analyzerReferenceForEntity.is(RemoteAnalyzerReference.class)) {
                // no need to take into account the analyzer here as it will be dealt with remotely
                return new TermQuery(new Term(this.getFieldName(), stringValue));
            }

            ScopedLuceneAnalyzer analyzerForEntity = (ScopedLuceneAnalyzer) analyzerReferenceForEntity
                    .unwrap(LuceneAnalyzerReference.class).getAnalyzer();
            TokenStream tokenStream = analyzerForEntity.tokenStream(this.getFieldName(), stringValue);
            tokenStream.reset();
            try {
                BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
                while (tokenStream.incrementToken()) {
                    String term = tokenStream.getAttribute(CharTermAttribute.class).toString();
                    booleanQueryBuilder.add(new TermQuery(new Term(this.getFieldName(), term)), Occur.FILTER);
                }
                return booleanQueryBuilder.build();
            } finally {
                tokenStream.close();
            }
        } catch (IOException e) {
            throw new AssertionFailure(
                    "No IOException can occur while using a TokenStream that is generated via String");
        }
    } else {
        FieldBridge fieldBridge = documentBuilder.getBridge(fieldName);
        if (NumericFieldUtils.isNumericFieldBridge(fieldBridge)) {
            return NumericFieldUtils.createExactMatchQuery(fieldName, this.getValue());
        } else {
            return new TermQuery(new Term(this.getFieldName(), stringValue));
        }
    }
}