List of usage examples for org.apache.lucene.analysis StopFilter makeStopSet
public static CharArraySet makeStopSet(List<?> stopWords)
From source file:ai.castor.idf.IDFScorer.java
License:Apache License
public double calcIDF(String query, String answer, boolean analyze) throws ParseException { Analyzer analyzer;//from w w w . ja v a 2 s . c o m if (analyze) { analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(stopWords)); } else { analyzer = new WhitespaceAnalyzer(); } QueryParser qp = new QueryParser(FIELD_BODY, analyzer); ClassicSimilarity similarity = new ClassicSimilarity(); String escapedQuery = qp.escape(query); Query question = qp.parse(escapedQuery); HashSet<String> questionTerms = new HashSet<>(Arrays.asList(question.toString().trim().split("\\s+"))); double idf = 0.0; HashSet<String> seenTerms = new HashSet<>(); String[] terms = answer.split("\\s+"); for (String term : terms) { try { TermQuery q = (TermQuery) qp.parse(term); Term t = q.getTerm(); if (questionTerms.contains(t.toString()) && !seenTerms.contains(t.toString())) { idf += similarity.idf(reader.docFreq(t), reader.numDocs()); seenTerms.add(t.toString()); } else { idf += 0.0; } } catch (Exception e) { continue; } } return idf; }
From source file:analysis.FtpFilePathAnalyzer.java
License:Apache License
/** Builds an analyzer with the given stop words. */ public FtpFilePathAnalyzer(String[] stopWords) { stopSet = StopFilter.makeStopSet(stopWords); }
From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer1.java
License:Apache License
public StopAnalyzer1(String[] stopWords) { this.stopWords = StopFilter.makeStopSet(stopWords); }
From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzer2.java
License:Apache License
public StopAnalyzer2(String[] stopWords) { this.stopWords = StopFilter.makeStopSet(stopWords); }
From source file:aos.lucene.analysis.stopanalyzer.StopAnalyzerFlawed.java
License:Apache License
public StopAnalyzerFlawed(String[] stopWords) { this.stopWords = StopFilter.makeStopSet(stopWords); }
From source file:com.appeligo.lucene.PorterStemAnalyzer.java
License:Apache License
/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */ public PorterStemAnalyzer() { stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS); }
From source file:com.appeligo.lucene.PorterStemAnalyzer.java
License:Apache License
/** Builds an analyzer which removes words in the provided array. */ public PorterStemAnalyzer(String[] stopWords) { this.stopWords = StopFilter.makeStopSet(stopWords); }
From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyAnalyzer.java
License:Open Source License
/** Builds an analyzer without stop words but that remove accents */ public MyAnalyzer() { stopWords = StopFilter.makeStopSet(new String[] {}); }
From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyEnglishAnalyzer.java
License:Open Source License
/** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */ public MyEnglishAnalyzer() { stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS); }
From source file:com.bluexml.side.framework.alfresco.commons.lucene.MyFrenchAnalyzer.java
License:Open Source License
/** Builds an analyzer which removes words in FRENCH_STOP_WORDS. */ public MyFrenchAnalyzer() { stopWords = StopFilter.makeStopSet(FrenchAnalyzer.FRENCH_STOP_WORDS); }