Example usage for org.apache.lucene.analysis TokenStream addAttribute

List of usage examples for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:practica3b.Practica3b.java

public static TopDocs busquedaAuthor(IndexSearcher is, String tipo, String tipo_year, String authors,
        Integer num1, Integer num2, FacetsCollector fc) throws IOException {
    Analyzer analizador = new StandardAnalyzer();
    List<String> palabras = new ArrayList<String>();
    try {//from w  w  w  .  j ava  2 s  .  c  o  m
        TokenStream stream = analizador.tokenStream(null, new StringReader(authors));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query query = new TermQuery(new Term("Authors", palabras.get(0)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.MUST));
    }
    BooleanQuery.Builder bqbuilder = new BooleanQuery.Builder();
    for (int i = 0; i < bc.size(); i++) {
        bqbuilder.add(bc.get(i));
    }
    if (num1 != null) {
        Query q;
        if (num2 == null) {
            q = IntPoint.newExactQuery("Year", num1);
            bqbuilder.add(q, BooleanClause.Occur.MUST);
        } else {
            if (tipo_year.equals("range")) {
                q = IntPoint.newRangeQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            } else {
                q = IntPoint.newSetQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            }
        }
    }
    BooleanQuery bq = bqbuilder.build();
    fc = new FacetsCollector();
    TopDocs td = FacetsCollector.search(is, bq, 10, fc);
    for (ScoreDoc scoreDoc : td.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(scoreDoc.score + " - " + doc.get("Authors") + " - " + doc.get("Title") + " - Year: "
                + doc.get("Year"));
    }
    return td;
}

From source file:practica3b.Practica3b.java

public static ArrayList<BooleanClause> createClause(String busqueda, int tipo_busqueda, String tipo) {
    Analyzer analizador;//from   ww w.  j  a  va 2  s.co m
    List<String> palabras = new ArrayList<String>();
    if (tipo_busqueda == 1) {
        analizador = new EnglishAnalyzer();
    } else if (tipo_busqueda == 2) {
        analizador = new StandardAnalyzer();
    } else {
        analizador = new EnglishAnalyzer();
    }
    try {
        TokenStream stream = analizador.tokenStream(null, new StringReader(busqueda));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query q;
        if (tipo_busqueda == 1)
            q = new TermQuery(new Term("Title", palabras.get(i)));
        else if (tipo_busqueda == 2)
            q = new TermQuery(new Term("Authors", palabras.get(i)));
        else
            q = new TermQuery(new Term("Abstract", palabras.get(i)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.MUST));
    }
    return bc;
}

From source file:retriever.TermFreq.java

String analyze(String query) throws Exception {
    StringBuffer buff = new StringBuffer();
    TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from  www .j ava2s .  c  om*/
    while (stream.incrementToken()) {
        String term = termAtt.toString();
        term = term.toLowerCase();
        buff.append(term).append(" ");
    }
    stream.end();
    stream.close();
    return buff.toString();
}

From source file:ri.trabri.Lucene.java

protected ArrayList<String> geraTokens(String text) throws IOException {
    TokenStream stream = this.analyzer.tokenStream(null, new StringReader(text));
    ArrayList<String> words = new ArrayList<>();

    CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from   w  ww.  j  a  va  2  s  .  com*/
    while (stream.incrementToken()) {
        //System.out.println(cattr.toString());
        words.add(cattr.toString());
    }
    stream.end();
    stream.close();
    return words;
}

From source file:se.inera.intyg.webcert.web.service.diagnos.repo.DiagnosRepositoryImpl.java

License:Open Source License

@Override
public List<Diagnos> searchDiagnosisByDescription(String searchString, int nbrOfResults) {
    if (Strings.isNullOrEmpty(searchString)) {
        return Collections.emptyList();
    }/*from  www. j  a  v  a  2  s . co  m*/
    BooleanQuery query = new BooleanQuery();
    try (StandardAnalyzer analyzer = new StandardAnalyzer()) {
        TokenStream tokenStream = analyzer.tokenStream(DESC, searchString);
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = WildcardQuery.WILDCARD_STRING + charTermAttribute.toString()
                    + WildcardQuery.WILDCARD_STRING;
            query.add(new WildcardQuery(new Term(DESC, term)), BooleanClause.Occur.MUST);
        }
    } catch (IOException e) {
        throw new RuntimeException("IOException occurred in lucene index search", e);
    }
    return searchDiagnosisByQuery(query, nbrOfResults);
}

From source file:servlets.TermStatsComparator.java

String analyze(String query) {
    StringBuffer buff = new StringBuffer();
    try {//w ww . java  2 s.  com
        Analyzer analyzer = retriever.getAnalyzer();
        TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query));
        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            String term = termAtt.toString();
            buff.append(term);
            break;
        }
        stream.end();
        stream.close();
    } catch (Exception ex) {
        ex.printStackTrace();
        return query;
    }
    return buff.toString();
}

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Builds the prefix query./*w ww  .j  a v  a2 s  .com*/
 *
 * @param searchString the search string
 * @param field the field
 * @param analyzer the analyzer
 * @return the query
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException {
    final TokenStream tokenStream;
    final List<String> terms;
    try (StringReader textReader = new StringReader(searchString)) {
        tokenStream = analyzer.tokenStream(field, textReader);
        tokenStream.reset();
        terms = new ArrayList<>();
        final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            terms.add(charTermAttribute.toString());
        }
    }
    tokenStream.close();
    analyzer.close();

    final BooleanQuery.Builder bq = new BooleanQuery.Builder();

    if ((terms.size() > 0) && !searchString.endsWith(" ")) {
        final String last = terms.remove(terms.size() - 1);

        bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST);
    }

    terms.stream().forEach((s) -> {
        bq.add(new TermQuery(new Term(field, s)), Occur.MUST);
    });
    return bq.build();
}

From source file:stackoverflow.lucene.modified.MoreLikeThis.java

License:Apache License

/**
 * Adds term frequencies found by tokenizing text from reader into the Map words
 *
 * @param r a source of text to be tokenized
 * @param termFreqMap a Map of terms and their frequencies
 * @param fieldName Used by analyzer for any special per-field analysis
 *//*from  w w  w.  j  av  a2s . c  o m*/
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException(
                "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
    }
    TokenStream ts = analyzer.tokenStream(fieldName, r);
    int tokenCount = 0;
    // for every token
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if (tokenCount > maxNumTokensParsed) {
            break;
        }
        if (isNoiseWord(word)) {
            continue;
        }

        // increment frequency
        Int cnt = termFreqMap.get(word);
        if (cnt == null) {
            termFreqMap.put(word, new Int());
        } else {
            cnt.x++;
        }
    }
    ts.end();
    ts.close();
}

From source file:stroom.search.server.TestStandardAnalyser.java

License:Apache License

private void testAnalyser(final String input, final Analyzer analyzer) throws Exception {
    System.out.println("Testing analyser: " + analyzer.getClass().getName());

    final ReusableStringReader reader = new ReusableStringReader();
    reader.init(input);/*from ww w.  j  ava 2s.  c om*/

    final TokenStream stream = analyzer.tokenStream("Test", reader);

    // reset the TokenStream to the first token
    stream.reset();

    boolean hasMoreTokens = stream.incrementToken();

    final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    for (;;) {
        if (!hasMoreTokens)
            break;

        // Get the text of this term.
        final char[] tokenText = termAtt.buffer();
        final int tokenTextLen = termAtt.length();

        System.out.println(new String(tokenText, 0, tokenTextLen));

        hasMoreTokens = stream.incrementToken();
    }
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {
    stream.reset();/*from w  w w.j  ava  2  s  . c o  m*/
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term + "] "); //B
    }
    stream.close();
}