Example usage for org.apache.lucene.analysis TokenStream getAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream getAttribute.

Prototype

public final <T extends Attribute> T getAttribute(Class<T> attClass)

Source Link

Document

Returns the instance of the passed in Attribute contained in this AttributeSource

The caller must pass in a Class<?

Usage

From source file:filters.indexing.NegationScopeFilter.java

License:Open Source License

/**
 * Constructor for class NegationScopeFilter
 * @param input//  www.jav  a  2s.co m
 */
public NegationScopeFilter(TokenStream input) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

}

From source file:filters.indexing.StatsFilter.java

License:Open Source License

/**
 * Constructor for class NegationScopeFilter
 * //from   ww w  .  j  av  a  2 s .com
 * @param input
 */
public StatsFilter(TokenStream input, ReviewDocumentIndexer indexer) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

    synsets = indexer.theSynsets;
    reviewLengths = indexer.theStats;
    reviewLengthCounter = new Counter();
}

From source file:filters.indexing.TopicModelInputFilter.java

License:Open Source License

/**
 * Constructor for class IndexableFilter
 * //from w w w. ja  va2s.c om
 * @param input
 */
public TopicModelInputFilter(TokenStream input, TokenListsCollector tokenLists, ReviewId reviewId) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.hasAttribute(PayloadAttribute.class) ? input.getAttribute(PayloadAttribute.class)
            : null;

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = input.hasAttribute(PayloadAttribute.class) ? this.getAttribute(PayloadAttribute.class)
            : null;

    this.reviewId = reviewId;
    currentDocNumber = new Counter();
    tokenListsCollector = tokenLists;
}

From source file:filters.LemmatizationFilter.java

License:Open Source License

/**
 * Constructor for class LemmatizationFilter
 * /*  www .  j  a v a 2 s . c  o  m*/
 * @param input
 */
public LemmatizationFilter(TokenStream input, boolean lemmatized_output) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.lemmatized_output = lemmatized_output;

    lemmatizer = new Lemmatizer();
}

From source file:filters.LemmatizationFilter.java

License:Open Source License

/**
 * Constructor for class LemmatizationFilter
 *///ww w  .j  av a  2 s  .c o  m
public LemmatizationFilter(TokenStream input, boolean lemmatized_output, Dictionary wordnet,
        IndexMap compoundTermsIndex) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.lemmatized_output = lemmatized_output;

    lemmatizer = new Lemmatizer(wordnet, compoundTermsIndex);
}

From source file:filters.NamedEntityFilter.java

License:Open Source License

/**
 * Constructor for class NamedEntityFilter
 * /*from   w  w  w . j a  v  a  2  s . c om*/
 * @param input
 *            The input {@link TokenStream}
 */
public NamedEntityFilter(TokenStream input) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

}

From source file:filters.PosTaggingFilter.java

License:Open Source License

/**
 * Constructor for class PosTaggingFilter
 * //from   www. j a  v  a2s . c  om
 * @param input
 */
public PosTaggingFilter(TokenStream input) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);

    // Setting attributes for this token stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    tagger = initializeTagger();
}

From source file:filters.PosTaggingFilter.java

License:Open Source License

/**
 * Constructor for class PosTaggingFilter
 * //w w  w  .j  ava 2  s . c om
 * @param input
 * @param tagger
 *            The POS-tagger object that should be used to perform the tagging
 */
public PosTaggingFilter(TokenStream input, MaxentTagger tagger) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.tagger = tagger;
}

From source file:filters.StopLemmaFilter.java

License:Open Source License

/**
 * Constructor for class StopLemmaFilter
 * /*from  w  ww  .  j  a  va 2s. com*/
 * @param input
 */
public StopLemmaFilter(TokenStream input, boolean removeStopLemmas) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

    this.removeStopLemmas = removeStopLemmas;

    // Initializing stop-lemma lists
    stopLemmas = new HashMap<POS, ArrayList<String>>();
    POS[] pos_array = { POS.VERB, POS.NOUN, POS.ADJECTIVE, POS.ADVERB };
    for (POS pos : pos_array) {
        stopLemmas.put(pos, new ArrayList<String>());
    }

    // Populating stop-lemma lists with data from stop-lemma file
    try {
        BufferedReader stop_file = new BufferedReader(
                new InputStreamReader(new FileInputStream(Paths.stopLemmasFile)));

        // Read stop-lemmas from stop-lemma file, one lemma per line
        String stop_line;
        while ((stop_line = stop_file.readLine()) != null) {
            // Each line in the stop-lemma file contains a lemma and its corresponding POS
            // category
            String[] stoplemma = stop_line.split(" ", 2);
            POS stoplemma_pos = PosTag.toPOS(stoplemma[0].toUpperCase());
            String stoplemma_string = stoplemma[1].toLowerCase();

            // Adding stop-lemma to the list that corresponds to its POS category
            ArrayList<String> stop_list;
            if ((stop_list = stopLemmas.get(stoplemma_pos)) != null) {
                stop_list.add(stoplemma_string);
            } else {
                AppLogger.error.log(Level.WARNING, "Unrecognized POS in stop-lemma file: " + stoplemma_pos
                        + " on line \"" + stop_line + "\"");
            }
        }
        stop_file.close();

    } catch (FileNotFoundException e) {
        AppLogger.error.log(Level.SEVERE, "Stop-lemma file not found at location " + Paths.stopLemmasFile);
    } catch (IOException e) {
        AppLogger.error.log(Level.SEVERE, "Error reading from stop-lemma file");
    }

}

From source file:fr.lipn.yasemir.weighting.ckpd.TermFactory.java

License:Open Source License

public static Vector<NGramTerm> makeTermSequence(String text) {
    List<String> result = new ArrayList<String>();
    try {/*from   w w w. j a v a2 s. c  o m*/
        TokenStream stream = analyzer.tokenStream("text", new StringReader(text));

        while (stream.incrementToken()) {
            //result.add(stream.getAttribute(TermAttribute.class).term()); //old way
            result.add(stream.getAttribute(CharTermAttribute.class).toString());
        }
    } catch (IOException e) {
        // not thrown b/c we're using a string reader...
    }

    Vector<NGramTerm> ngtv = new Vector<NGramTerm>();
    for (String s : result) {
        NGramTerm t = new NGramTerm(s);
        ngtv.add(t);
    }
    return ngtv;
}