Example usage for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:org.wltea.analyzer.ik_analyzer5.IKAnalzyerTest.java

License:Apache License

@Test
public void testIK() {

    String text = "???";

    //IK?smart??//from ww w . ja  v a2s  . com
    Analyzer analyzer = new IKAnalyzer(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        ts = analyzer.tokenStream("myfield", new StringReader(text));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        analyzer.close();
    }

}

From source file:org.wltea.analyzer.sample.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??//  w  w w .  j  a  v  a 2  s .c o  m
    Analyzer analyzer = new IKAnalyzerP(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        //         ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //         ts = analyzer.tokenStream("myfield", new StringReader("???pinyin hanyu Contribute index to jpinyin development by creating an account on GitHub"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.wltea.analyzer.sample.ThulacAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //Thulac?smart??
    Analyzer analyzer = new ThulacAnalyzer(true);
    //?LuceneTokenStream
    TokenStream ts = null;
    try {//  w  w w .jav a  2s.  co m
        long start = System.currentTimeMillis();
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
        System.out.println("wast:" + (System.currentTimeMillis() - start));
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.wltea.analyzer.test.IKAnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    //IK?smart??//from  w  w  w  .ja va 2  s. c o m
    Analyzer analyzer = new IKAnalyzer4PinYin(true);

    //?LuceneTokenStream
    TokenStream ts = null;
    try {
        ts = analyzer.tokenStream("myfield", new StringReader(
                "?????IKAnalyer can analysis english text too"));
        //???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        //??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        //??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);

        //?TokenStream?StringReader
        ts.reset();
        //??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        //TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        //TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:org.zenoss.zep.index.impl.lucene.LuceneQueryBuilder.java

License:Open Source License

/**
 * Tokenizes the given query using the same behavior as when the field is analyzed.
 *
 * @param fieldName The field name in the index.
 * @param analyzer  The analyzer to use to tokenize the query.
 * @param query     The query to tokenize.
 * @return The tokens from the query./*  w ww. jav  a  2 s .c om*/
 * @throws ZepException If an exception occur.
 */
private static List<String> getTokens(String fieldName, Analyzer analyzer, String query) throws ZepException {
    final List<String> tokens = new ArrayList<String>();
    try {
        TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(query));
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        try {
            ts.reset();
            while (ts.incrementToken()) {
                tokens.add(term.toString());
            }
            ts.end();
        } catch (IOException e) {
            throw new ZepException(e.getLocalizedMessage(), e);
        } finally {
            ts.close();
        }
    } catch (IOException e) {
        throw new ZepException(e.getLocalizedMessage(), e);
    }
    return tokens;
}

From source file:org.zilverline.lucene.BoostingParser.java

License:Open Source License

/**
 * Callback that returns Query with boosted fields using BoostFactors
 * /* w ww. j  a  v  a 2s  .  c  o  m*/
 * @param field the field to query
 * @param analyzer the analyzer to use
 * @param queryText the query
 * 
 * @return Query object
 * 
 * @throws ParseException if Query can't be made
 * 
 */
protected Query getFieldQuery(String field, Analyzer analyzer, String queryText) throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count
    // for field that contain 'contents' add boostfactors for other terms
    // specified in BoostFactor
    if (factors != null && factors.getFactors() != null && !factors.getFactors().isEmpty()
            && defaultField.equals(field)) {
        TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
        Vector v = new Vector();
        org.apache.lucene.analysis.Token t;

        while (true) {
            try {
                t = source.next();
            } catch (IOException e) {
                t = null;
            }
            if (t == null) {
                break;
            }
            v.addElement(t.termText());
            log.debug(field + " , " + t.termText());
        }

        try {
            source.close();
        } catch (IOException e) {
            log.error("Unexpected Exception");
        }

        if (v.size() == 0) {
            return null;
        } else {
            // create a new composed query
            BooleanQuery bq = new BooleanQuery();

            // For all boostfactors create a new PhraseQuery
            Iterator iter = factors.getFactors().entrySet().iterator();

            while (iter.hasNext()) {
                Map.Entry element = (Map.Entry) iter.next();
                String thisField = ((String) element.getKey()).toLowerCase();
                Float boost = (Float) element.getValue();
                PhraseQuery q = new PhraseQuery();

                // and add all the terms of the query
                for (int i = 0; i < v.size(); i++) {
                    q.add(new Term(thisField, (String) v.elementAt(i)));
                }

                // boost the query
                q.setBoost(boost.floatValue());

                // and add it to the composed query
                bq.add(q, false, false);
            }
            log.debug("Query: " + bq);

            return bq;
        }
    } else {
        log.debug("Treat like normal query: " + queryText);
        return super.getFieldQuery(field, analyzer, queryText);
    }
}

From source file:perf.TestAnalyzerPerf.java

License:Apache License

private static void testAnalyzer(String desc, File wikiLinesFile, Analyzer a, int warmupCount, int runCount)
        throws Exception {
    System.out.println("\nTEST: " + desc);

    // 64 KB buffer
    InputStream is = new FileInputStream(wikiLinesFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);

    long startTime = System.currentTimeMillis();
    long sumTime = 0;
    long hash = 0;
    long tokenCount = 0;
    int totCount = warmupCount + runCount;
    for (int i = 0; i < totCount; i++) {

        boolean isWarmup = i < warmupCount;

        if (i % 10000 == 0) {
            System.out.println(String.format(Locale.ROOT, "%.1f sec: %d...",
                    (System.currentTimeMillis() - startTime) / 1000.0, i));
        }/*from w  ww .  j a va  2  s . com*/
        String s = reader.readLine();
        long t0 = System.nanoTime();
        TokenStream ts = a.tokenStream("field", new StringReader(s));
        ts.reset();

        CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt;
        if (ts.hasAttribute(PositionIncrementAttribute.class)) {
            posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
        } else {
            posIncAtt = null;
        }
        OffsetAttribute offsetAtt;
        if (ts.hasAttribute(OffsetAttribute.class)) {
            offsetAtt = ts.getAttribute(OffsetAttribute.class);
        } else {
            offsetAtt = null;
        }

        while (ts.incrementToken()) {
            hash += 31 * ArrayUtil.hashCode(termAtt.buffer(), 0, termAtt.length());
            if (posIncAtt != null) {
                hash += 31 * posIncAtt.getPositionIncrement();
            }
            if (offsetAtt != null) {
                hash += 31 * offsetAtt.startOffset();
                hash += 31 * offsetAtt.endOffset();
            }
            if (isWarmup == false) {
                tokenCount++;
            }
        }
        ts.end();
        ts.close();

        if (isWarmup == false) {
            sumTime += System.nanoTime() - t0;
        }
    }
    reader.close();

    System.out.println(String.format(Locale.ROOT, "%s time=%.2f msec hash=%d tokens=%d", desc,
            (sumTime / 1000000.0), hash, tokenCount));
}

From source file:practica2_1.Practica2_1.java

public static List<String> tokenizeString(Analyzer analyzer, String string) {
    List<String> result = new ArrayList<String>();

    String cad;//  w  w  w.j  ava  2s.c  o m
    try {

        TokenStream stream = analyzer.tokenStream(null, new StringReader(string));
        //OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
        CharTermAttribute cAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();

        while (stream.incrementToken()) {
            //cad = stream.getAttribute(CharTermAttribute.class).toString();
            result.add(cAtt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        // not thrown b/c we're using a string reader...
        throw new RuntimeException(e);
    }
    return result;
}

From source file:practica3b.Practica3b.java

public static TopDocs busquedaAuthor(IndexSearcher is, String tipo, String tipo_year, String authors,
        Integer num1, Integer num2, FacetsCollector fc) throws IOException {
    Analyzer analizador = new StandardAnalyzer();
    List<String> palabras = new ArrayList<String>();
    try {//  ww w . ja  va 2  s  .c  o m
        TokenStream stream = analizador.tokenStream(null, new StringReader(authors));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query query = new TermQuery(new Term("Authors", palabras.get(0)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(query, BooleanClause.Occur.MUST));
    }
    BooleanQuery.Builder bqbuilder = new BooleanQuery.Builder();
    for (int i = 0; i < bc.size(); i++) {
        bqbuilder.add(bc.get(i));
    }
    if (num1 != null) {
        Query q;
        if (num2 == null) {
            q = IntPoint.newExactQuery("Year", num1);
            bqbuilder.add(q, BooleanClause.Occur.MUST);
        } else {
            if (tipo_year.equals("range")) {
                q = IntPoint.newRangeQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            } else {
                q = IntPoint.newSetQuery("Year", num1, num2);
                bqbuilder.add(q, BooleanClause.Occur.MUST);
            }
        }
    }
    BooleanQuery bq = bqbuilder.build();
    fc = new FacetsCollector();
    TopDocs td = FacetsCollector.search(is, bq, 10, fc);
    for (ScoreDoc scoreDoc : td.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(scoreDoc.score + " - " + doc.get("Authors") + " - " + doc.get("Title") + " - Year: "
                + doc.get("Year"));
    }
    return td;
}

From source file:practica3b.Practica3b.java

public static ArrayList<BooleanClause> createClause(String busqueda, int tipo_busqueda, String tipo) {
    Analyzer analizador;/*  w  w  w  . j  a v a  2 s.  com*/
    List<String> palabras = new ArrayList<String>();
    if (tipo_busqueda == 1) {
        analizador = new EnglishAnalyzer();
    } else if (tipo_busqueda == 2) {
        analizador = new StandardAnalyzer();
    } else {
        analizador = new EnglishAnalyzer();
    }
    try {
        TokenStream stream = analizador.tokenStream(null, new StringReader(busqueda));
        CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            palabras.add(catt.toString());
        }
        stream.close();
        stream.end();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>();
    for (int i = 0; i < palabras.size(); i++) {
        Query q;
        if (tipo_busqueda == 1)
            q = new TermQuery(new Term("Title", palabras.get(i)));
        else if (tipo_busqueda == 2)
            q = new TermQuery(new Term("Authors", palabras.get(i)));
        else
            q = new TermQuery(new Term("Abstract", palabras.get(i)));
        if (tipo.equals("should"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
        else if (tipo.equals("must"))
            bc.add(new BooleanClause(q, BooleanClause.Occur.MUST));
    }
    return bc;
}