Example usage for org.apache.lucene.analysis TokenStream addAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream addAttribute.

Prototype

public final <T extends Attribute> T addAttribute(Class<T> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:practica3b.Practica3b.java

public static TopDocs busquedaAuthor(IndexSearcher is, String tipo, String tipo_year, String authors,
        Integer num1, Integer num2, FacetsCollector fc) throws IOException {
    Analyzer analizador = new StandardAnalyzer();
    List<String> palabras = new ArrayList<String>();
    try {//from w  w  w  .  j ava  2 s  .  c  o  m
        TokenStream stream = analizador.tokenStream(null, new StringReader(authors));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query query = new TermQuery(new Term("Authors", palabras.get(0)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.MUST));
    }
    BooleanQuery.Builder bqbuilder = new BooleanQuery.Builder();
    for (int i = 0; i < bc.size(); i++) {
        bqbuilder.add(bc.get(i));
    }
    if (num1 != null) {
        Query q;
        if (num2 == null) {
            q = IntPoint.newExactQuery("Year", num1);
            bqbuilder.add(q, BooleanClause.Occur.MUST);
        } else {
            if (tipo_year.equals("range")) {
                q = IntPoint.newRangeQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            } else {
                q = IntPoint.newSetQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            }
        }
    }
    BooleanQuery bq = bqbuilder.build();
    fc = new FacetsCollector();
    TopDocs td = FacetsCollector.search(is, bq, 10, fc);
    for (ScoreDoc scoreDoc : td.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(scoreDoc.score + " - " + doc.get("Authors") + " - " + doc.get("Title") + " - Year: "
                + doc.get("Year"));
    }
    return td;
}

From source file:practica3b.Practica3b.java

public static ArrayList<BooleanClause> createClause(String busqueda, int tipo_busqueda, String tipo) {
    Analyzer analizador;//from   ww w.  j  a  va 2  s.co m
    List<String> palabras = new ArrayList<String>();
    if (tipo_busqueda == 1) {
        analizador = new EnglishAnalyzer();
    } else if (tipo_busqueda == 2) {
        analizador = new StandardAnalyzer();
    } else {
        analizador = new EnglishAnalyzer();
    }
    try {
        TokenStream stream = analizador.tokenStream(null, new StringReader(busqueda));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query q;
        if (tipo_busqueda == 1)
            q = new TermQuery(new Term("Title", palabras.get(i)));
        else if (tipo_busqueda == 2)
            q = new TermQuery(new Term("Authors", palabras.get(i)));
        else
            q = new TermQuery(new Term("Abstract", palabras.get(i)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.MUST));
    }
    return bc;
}

From source file:retriever.TermFreq.java

String analyze(String query) throws Exception {
    StringBuffer buff = new StringBuffer();
    TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from  www .j ava2s .  c  om*/
    while (stream.incrementToken()) {
        String term = termAtt.toString();
        term = term.toLowerCase();
        buff.append(term).append(" ");
    }
    stream.end();
    stream.close();
    return buff.toString();
}

From source file:ri.trabri.Lucene.java

protected ArrayList<String> geraTokens(String text) throws IOException {
    TokenStream stream = this.analyzer.tokenStream(null, new StringReader(text));
    ArrayList<String> words = new ArrayList<>();

    CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from   w  ww.  j  a  va  2  s  .  com*/
    while (stream.incrementToken()) {
        //System.out.println(cattr.toString());
        words.add(cattr.toString());
    }
    stream.end();
    stream.close();
    return words;
}

From source file:se.inera.intyg.webcert.web.service.diagnos.repo.DiagnosRepositoryImpl.java

License:Open Source License

@Override
public List<Diagnos> searchDiagnosisByDescription(String searchString, int nbrOfResults) {
    if (Strings.isNullOrEmpty(searchString)) {
        return Collections.emptyList();
    }/*from  www. j  a  v  a  2  s . co  m*/
    BooleanQuery query = new BooleanQuery();
    try (StandardAnalyzer analyzer = new StandardAnalyzer()) {
        TokenStream tokenStream = analyzer.tokenStream(DESC, searchString);
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = WildcardQuery.WILDCARD_STRING + charTermAttribute.toString()
                    + WildcardQuery.WILDCARD_STRING;
            query.add(new WildcardQuery(new Term(DESC, term)), BooleanClause.Occur.MUST);
        }
    } catch (IOException e) {
        throw new RuntimeException("IOException occurred in lucene index search", e);
    }
    return searchDiagnosisByQuery(query, nbrOfResults);
}

From source file:servlets.TermStatsComparator.java

String analyze(String query) {
    StringBuffer buff = new StringBuffer();
    try {//w ww . java  2 s.  com
        Analyzer analyzer = retriever.getAnalyzer();
        TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query));
        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            String term = termAtt.toString();
            buff.append(term);
            break;
        }
        stream.end();
        stream.close();
    } catch (Exception ex) {
        ex.printStackTrace();
        return query;
    }
    return buff.toString();
}

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Builds the prefix query./*w ww  .j  a v  a2 s  .com*/
 *
 * @param searchString the search string
 * @param field the field
 * @param analyzer the analyzer
 * @return the query
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException {
    final TokenStream tokenStream;
    final List<String> terms;
    try (StringReader textReader = new StringReader(searchString)) {
        tokenStream = analyzer.tokenStream(field, textReader);
        tokenStream.reset();
        terms = new ArrayList<>();
        final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            terms.add(charTermAttribute.toString());
        }
    }
    tokenStream.close();
    analyzer.close();

    final BooleanQuery.Builder bq = new BooleanQuery.Builder();

    if ((terms.size() > 0) && !searchString.endsWith(" ")) {
        final String last = terms.remove(terms.size() - 1);

        bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST);
    }

    terms.stream().forEach((s) -> {
        bq.add(new TermQuery(new Term(field, s)), Occur.MUST);
    });
    return bq.build();
}

From source file:stackoverflow.lucene.modified.MoreLikeThis.java

License:Apache License

/**
 * Adds term frequencies found by tokenizing text from reader into the Map words
 *
 * @param r a source of text to be tokenized
 * @param termFreqMap a Map of terms and their frequencies
 * @param fieldName Used by analyzer for any special per-field analysis
 *//*from  w w  w.  j  av  a2s . c  o m*/
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException(
                "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
    }
    TokenStream ts = analyzer.tokenStream(fieldName, r);
    int tokenCount = 0;
    // for every token
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if (tokenCount > maxNumTokensParsed) {
            break;
        }
        if (isNoiseWord(word)) {
            continue;
        }

        // increment frequency
        Int cnt = termFreqMap.get(word);
        if (cnt == null) {
            termFreqMap.put(word, new Int());
        } else {
            cnt.x++;
        }
    }
    ts.end();
    ts.close();
}

From source file:stroom.search.server.TestStandardAnalyser.java

License:Apache License

private void testAnalyser(final String input, final Analyzer analyzer) throws Exception {
    System.out.println("Testing analyser: " + analyzer.getClass().getName());

    final ReusableStringReader reader = new ReusableStringReader();
    reader.init(input);/*from ww w.  j  ava 2s.  c om*/

    final TokenStream stream = analyzer.tokenStream("Test", reader);

    // reset the TokenStream to the first token
    stream.reset();

    boolean hasMoreTokens = stream.incrementToken();

    final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    for (;;) {
        if (!hasMoreTokens)
            break;

        // Get the text of this term.
        final char[] tokenText = termAtt.buffer();
        final int tokenTextLen = termAtt.length();

        System.out.println(new String(tokenText, 0, tokenTextLen));

        hasMoreTokens = stream.incrementToken();
    }
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {
    stream.reset();/*from w  w w.j  ava  2  s  . c o  m*/
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term + "] "); //B
    }
    stream.close();
}