Example usage for org.apache.lucene.analysis StopFilter makeStopSet

List of usage examples for org.apache.lucene.analysis StopFilter makeStopSet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis StopFilter makeStopSet.

Prototype

public static CharArraySet makeStopSet(List<?> stopWords) 

Source Link

Document

Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor.

Usage

From source file:ai.castor.idf.IDFScorer.java

License:Apache License

public double calcIDF(String query, String answer, boolean analyze) throws ParseException {
    Analyzer analyzer;//from  w  w w  .  ja  v a 2 s  . c  o m
    if (analyze) {
        analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords));
    } else {
        analyzer = new WhitespaceAnalyzer();
    }

    QueryParser qp = new QueryParser(FIELD_BODY, analyzer);
    ClassicSimilarity similarity = new ClassicSimilarity();

    String escapedQuery = qp.escape(query);
    Query question = qp.parse(escapedQuery);
    HashSet<String> questionTerms = new HashSet<>(Arrays.asList(question.toString().trim().split("\\s+")));

    double idf = 0.0;
    HashSet<String> seenTerms = new HashSet<>();

    String[] terms = answer.split("\\s+");
    for (String term : terms) {
        try {
            TermQuery q = (TermQuery) qp.parse(term);
            Term t = q.getTerm();

            if (questionTerms.contains(t.toString()) && !seenTerms.contains(t.toString())) {
                idf += similarity.idf(reader.docFreq(t), reader.numDocs());
                seenTerms.add(t.toString());
            } else {
                idf += 0.0;
            }
        } catch (Exception e) {
            continue;
        }
    }
    return idf;
}

From source file:analysis.FtpFilePathAnalyzer.java

License:Apache License

/** Builds an analyzer with the given stop words. */
public FtpFilePathAnalyzer(String[] stopWords) {
    stopSet = StopFilter.makeStopSet(stopWords);
}

From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer1.java

License:Apache License

public StopAnalyzer1(String[] stopWords) {
    this.stopWords = StopFilter.makeStopSet(stopWords);
}

From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer2.java

License:Apache License

public StopAnalyzer2(String[] stopWords) {
    this.stopWords = StopFilter.makeStopSet(stopWords);
}

From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzerFlawed.java

License:Apache License

public StopAnalyzerFlawed(String[] stopWords) {
    this.stopWords = StopFilter.makeStopSet(stopWords);
}

From source file:com.appeligo.lucene.PorterStemAnalyzer.java

License:Apache License

/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
public PorterStemAnalyzer() {
    stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
}

From source file:com.appeligo.lucene.PorterStemAnalyzer.java

License:Apache License

/** Builds an analyzer which removes words in the provided array. */
public PorterStemAnalyzer(String[] stopWords) {
    this.stopWords = StopFilter.makeStopSet(stopWords);
}

From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java

License:Open Source License

/** Builds an analyzer without stop words but that remove accents */
public MyAnalyzer() {
    stopWords = StopFilter.makeStopSet(new String[] {});
}

From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyEnglishAnalyzer.java

License:Open Source License

/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
public MyEnglishAnalyzer() {
    stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
}

From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyFrenchAnalyzer.java

License:Open Source License

/** Builds an analyzer which removes words in FRENCH_STOP_WORDS. */
public MyFrenchAnalyzer() {
    stopWords = StopFilter.makeStopSet(FrenchAnalyzer.FRENCH_STOP_WORDS);
}