AnalyzeText.java :  » Content-Management-System » contineo » org » contineo » core » text » Java Open Source

Java Open Source » Content Management System » contineo 
contineo » org » contineo » core » text » AnalyzeText.java
package org.contineo.core.text;

import java.util.Collection;
import java.util.Iterator;

import org.contineo.core.document.Term;
import org.contineo.core.document.dao.TermDAO;
import org.contineo.core.text.analyze.Analyzer;
import org.contineo.core.text.analyze.AnalyzerFactory;
import org.contineo.core.text.analyze.Entry;
import org.contineo.util.Context;

/**
 * Class for analysing texts like extracting keywords from a given text. 
 * Created on 24.03.2004
 * 
 * @author Michael Scholz
 */
public class AnalyzeText {

  public AnalyzeText() {
  }

  /**
   * This method selects 20 keywords of a given text in a specified language
   * and stores these keywords in a database.
   * 
   * @param menuId MenuId of the document the text is from.
   * @param text Text of a document.
   * @param language Identified language of the text.
   * @throws Exception
   */
  public void storeTerms(int menuId, String text, String language) throws Exception {
    TermDAO termDao = (TermDAO) Context.getInstance().getBean(TermDAO.class);
    Analyzer analyzer = AnalyzerFactory.getAnalyzer(language);
    analyzer.analyze(text);

    long words = analyzer.getWordCount();
    Collection terms = analyzer.getTopWords(20);
    Iterator iter = terms.iterator();

    while (iter.hasNext()) {
      Entry entry = (Entry) iter.next();
      Term term = new Term();
      term.setMenuId(menuId);
      term.setStem(entry.getWord());
      term.setValue(entry.getNumber() * 1000 / words);
      term.setWordCount(entry.getNumber());
      term.setOriginWord(entry.getOriginWord());
      termDao.store(term);
    }
  }

  /**
   * This method extracts a specified number of keywords and appends them to a
   * String
   * 
   * @param count Number of keywords.
   * @param text Given text of a document.
   * @param language Identified language of the text.
   * @return String of keywords like "Information, Retrieval, DMS, CMS"
   * @throws Exception
   */
  public String getTerms(int count, String text, String language) throws Exception {
    StringBuffer result = new StringBuffer();
    Analyzer analyzer = AnalyzerFactory.getAnalyzer(language);
    analyzer.analyze(text);

    Collection terms = analyzer.getTopWords(count);
    Iterator iter = terms.iterator();
    int temp = 0;

    while (iter.hasNext() && (temp < count)) {
      Entry entry = (Entry) iter.next();

      if (temp > 0) {
        result.append(", ");
      }

      result.append(entry.getOriginWord());
      temp++;
    }

    return result.toString();
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.