List of usage examples for org.apache.lucene.analysis.standard StandardAnalyzer close
@Override public void close()
From source file:ca.ualberta.entitylinking.common.indexing.TFIDF3x.java
License:Open Source License
/** * This function assumes that the TFIDF vector of the document containing text is already * given. We simply build a tfidf-vector of the text out of the docVector. * The purpose of doing this is to save the time computing the tf-idf value for words in * the same document.//from w ww .ja v a2s . c o m * * @param text * @param docVector * @return */ public Map<String, Float> TextTFIDFVector(String text, Map<String, Float> docVector) { Map<String, Float> map = new HashMap<String, Float>(); //preprocess the text using StandardAnalyzer (StandardAnalyzer2 + StopAnalyzer). StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); TokenStream tokenStream = analyzer.tokenStream("string", new StringReader(text)); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); try { tokenStream.reset(); while (tokenStream.incrementToken()) { String term = charTermAttribute.toString(); if (docVector.containsKey(term)) map.put(term, docVector.get(term)); } } catch (Exception e) { e.printStackTrace(); } analyzer.close(); return map; }