Example usage for org.apache.lucene.analysis.en PorterStemFilter reset

List of usage examples for org.apache.lucene.analysis.en PorterStemFilter reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.en PorterStemFilter reset.

Prototype

@Override
public void reset() throws IOException 

Source Link

Document

NOTE: The default implementation chains the call to the input TokenStream, so be sure to call super.reset() when overriding this method.

Usage

From source file:com.nec.scg.senseRanking.CountTextSimilarity.java

public Map<String, Float> CountTF_IDF(String str, Analyzer a) {
    Map<String, Float> termVector = new TreeMap<String, Float>();

    try {/* w w  w.j  a  va2 s .  co  m*/
        TokenStream stream = a.tokenStream("content", new StringReader(str));
        PorterStemFilter filter = new PorterStemFilter(stream);
        CharTermAttribute cta = filter.addAttribute(CharTermAttribute.class);
        filter.reset();
        String strcat = null;
        int wordCount = 0;
        while (filter.incrementToken()) {
            strcat = cta.toString();
            // System.out.print("["+strcat+"]");
            if (!termVector.containsKey(strcat)) {
                termVector.put(strcat, 1f);
                wordCount++;
            } else {
                termVector.put(strcat, termVector.get(strcat) + 1);
                wordCount++;
            }
        }
        for (String ter : termVector.keySet()) {
            int hits = searchIndexforIDF(ter) + 1;
            float idf = (float) (Math.log(AllArticle * 1.0 / hits) + 1.0);
            float tf = termVector.get(ter) / wordCount;
            termVector.put(ter, tf * idf);
        }

        filter.end();
        stream.end();
        filter.close();
        stream.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return termVector;
}

From source file:nl.uva.p2psearch.Main.java

/**
 *
 * @param e s//  w ww.j  a v  a2 s.  c  o  m
 * @return s
 * @throws IOException s
 */
private static List<InvertedIndexEntry> getInvertedIndexEntries(final MetadataEntry e) throws IOException {
    String text = e.getDescription();
    Map<Number160, Integer> dictionary = new HashMap<>();
    List<InvertedIndexEntry> list = new ArrayList<>();

    Analyzer analyzer = new ArmenianAnalyzer(Version.LUCENE_42, Utils.getCharArrayStopwords());
    try (TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text))) {
        PorterStemFilter psf = new PorterStemFilter(tokenStream);
        CharTermAttribute term = psf.addAttribute(CharTermAttribute.class);
        psf.reset();
        StringBuilder sb = new StringBuilder();
        while (psf.incrementToken()) {
            Integer tf;
            Number160 termKey = Number160.createHash(term.toString());
            if (dictionary.containsKey(termKey)) {
                tf = dictionary.get(termKey);
                tf++;
            } else {
                tf = 1;
            }
            dictionary.put(termKey, tf);
            sb.append(term.toString()).append(" ");
            List<Number160> ll = new ArrayList<>();
            ll.add(Number160.createHash(e.getID()));
            list.add(new InvertedIndexEntry(termKey.toString(), term.toString(), tf, ll));
        }
        StandardTokenizer source = new StandardTokenizer(Version.LUCENE_42, new StringReader(sb.toString()));
        TokenStream tokenStreamSF = new StandardFilter(Version.LUCENE_42, source);
        try (ShingleFilter sf = new ShingleFilter(tokenStreamSF, 2, maxNGrams)) {
            sf.setOutputUnigrams(false);
            CharTermAttribute charTermAttribute = sf.addAttribute(CharTermAttribute.class);
            sf.reset();
            while (sf.incrementToken()) {
                String word = charTermAttribute.toString();

                String ng = word.replaceAll(" ", "_");
                Integer tf;
                Number160 termKey = Number160.createHash(ng);
                if (dictionary.containsKey(termKey)) {
                    tf = dictionary.get(termKey);
                    tf++;
                } else {
                    tf = 1;
                }
                dictionary.put(termKey, tf);
                List<Number160> ll = new ArrayList<>();
                ll.add(Number160.createHash(e.getID()));
                list.add(new InvertedIndexEntry(termKey.toString(), ng, tf, ll));
            }
        }
    }
    return list;
}