Example usage for org.apache.lucene.analysis TokenStream getAttribute

List of usage examples for org.apache.lucene.analysis TokenStream getAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream getAttribute.

Prototype

public final <T extends Attribute> T getAttribute(Class<T> attClass) 

Source Link

Document

Returns the instance of the passed in Attribute contained in this AttributeSource

The caller must pass in a Class<?

Usage

From source file:filters.indexing.NegationScopeFilter.java

License:Open Source License

/**
 * Constructor for class NegationScopeFilter
 * @param input//  www.jav  a  2s.co m
 */
public NegationScopeFilter(TokenStream input) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

}

From source file:filters.indexing.StatsFilter.java

License:Open Source License

/**
 * Constructor for class NegationScopeFilter
 * //from   ww w  .  j  av  a  2 s .com
 * @param input
 */
public StatsFilter(TokenStream input, ReviewDocumentIndexer indexer) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

    synsets = indexer.theSynsets;
    reviewLengths = indexer.theStats;
    reviewLengthCounter = new Counter();
}

From source file:filters.indexing.TopicModelInputFilter.java

License:Open Source License

/**
 * Constructor for class IndexableFilter
 * //from w w w. ja  va2s.c om
 * @param input
 */
public TopicModelInputFilter(TokenStream input, TokenListsCollector tokenLists, ReviewId reviewId) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.hasAttribute(PayloadAttribute.class) ? input.getAttribute(PayloadAttribute.class)
            : null;

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = input.hasAttribute(PayloadAttribute.class) ? this.getAttribute(PayloadAttribute.class)
            : null;

    this.reviewId = reviewId;
    currentDocNumber = new Counter();
    tokenListsCollector = tokenLists;
}

From source file:filters.LemmatizationFilter.java

License:Open Source License

/**
 * Constructor for class LemmatizationFilter
 * /*  www .  j  a v a 2 s . c  o  m*/
 * @param input
 */
public LemmatizationFilter(TokenStream input, boolean lemmatized_output) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.lemmatized_output = lemmatized_output;

    lemmatizer = new Lemmatizer();
}

From source file:filters.LemmatizationFilter.java

License:Open Source License

/**
 * Constructor for class LemmatizationFilter
 *///ww w  .j  av a  2 s  .c o  m
public LemmatizationFilter(TokenStream input, boolean lemmatized_output, Dictionary wordnet,
        IndexMap compoundTermsIndex) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.lemmatized_output = lemmatized_output;

    lemmatizer = new Lemmatizer(wordnet, compoundTermsIndex);
}

From source file:filters.NamedEntityFilter.java

License:Open Source License

/**
 * Constructor for class NamedEntityFilter
 * /*from   w  w  w . j a  v  a  2  s . c om*/
 * @param input
 *            The input {@link TokenStream}
 */
public NamedEntityFilter(TokenStream input) {
    super(input);

    // Getting attributes from input stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

}

From source file:filters.PosTaggingFilter.java

License:Open Source License

/**
 * Constructor for class PosTaggingFilter
 * //from   www. j a  v  a2s . c  om
 * @param input
 */
public PosTaggingFilter(TokenStream input) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);

    // Setting attributes for this token stream
    output_term = this.addAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    tagger = initializeTagger();
}

From source file:filters.PosTaggingFilter.java

License:Open Source License

/**
 * Constructor for class PosTaggingFilter
 * //w w  w  .j  ava 2  s . c om
 * @param input
 * @param tagger
 *            The POS-tagger object that should be used to perform the tagging
 */
public PosTaggingFilter(TokenStream input, MaxentTagger tagger) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.addAttribute(TypeAttribute.class);
    output_payload = this.addAttribute(PayloadAttribute.class);

    this.tagger = tagger;
}

From source file:filters.StopLemmaFilter.java

License:Open Source License

/**
 * Constructor for class StopLemmaFilter
 * /*from  w  ww  .  j  a  va 2s. com*/
 * @param input
 */
public StopLemmaFilter(TokenStream input, boolean removeStopLemmas) {
    super(input);

    // Getting attributes from input token stream
    input_term = input.getAttribute(TermAttribute.class);
    input_type = input.getAttribute(TypeAttribute.class);
    input_flags = input.getAttribute(FlagsAttribute.class);
    input_payload = input.getAttribute(PayloadAttribute.class);

    // Setting attributes for this token stream
    output_term = this.getAttribute(TermAttribute.class);
    output_type = this.getAttribute(TypeAttribute.class);
    output_flags = this.addAttribute(FlagsAttribute.class);
    output_payload = this.getAttribute(PayloadAttribute.class);

    this.removeStopLemmas = removeStopLemmas;

    // Initializing stop-lemma lists
    stopLemmas = new HashMap<POS, ArrayList<String>>();
    POS[] pos_array = { POS.VERB, POS.NOUN, POS.ADJECTIVE, POS.ADVERB };
    for (POS pos : pos_array) {
        stopLemmas.put(pos, new ArrayList<String>());
    }

    // Populating stop-lemma lists with data from stop-lemma file
    try {
        BufferedReader stop_file = new BufferedReader(
                new InputStreamReader(new FileInputStream(Paths.stopLemmasFile)));

        // Read stop-lemmas from stop-lemma file, one lemma per line
        String stop_line;
        while ((stop_line = stop_file.readLine()) != null) {
            // Each line in the stop-lemma file contains a lemma and its corresponding POS
            // category
            String[] stoplemma = stop_line.split(" ", 2);
            POS stoplemma_pos = PosTag.toPOS(stoplemma[0].toUpperCase());
            String stoplemma_string = stoplemma[1].toLowerCase();

            // Adding stop-lemma to the list that corresponds to its POS category
            ArrayList<String> stop_list;
            if ((stop_list = stopLemmas.get(stoplemma_pos)) != null) {
                stop_list.add(stoplemma_string);
            } else {
                AppLogger.error.log(Level.WARNING, "Unrecognized POS in stop-lemma file: " + stoplemma_pos
                        + " on line \"" + stop_line + "\"");
            }
        }
        stop_file.close();

    } catch (FileNotFoundException e) {
        AppLogger.error.log(Level.SEVERE, "Stop-lemma file not found at location " + Paths.stopLemmasFile);
    } catch (IOException e) {
        AppLogger.error.log(Level.SEVERE, "Error reading from stop-lemma file");
    }

}

From source file:fr.lipn.yasemir.weighting.ckpd.TermFactory.java

License:Open Source License

public static Vector<NGramTerm> makeTermSequence(String text) {
    List<String> result = new ArrayList<String>();
    try {/*from   w w w. j a v a2 s. c  o m*/
        TokenStream stream = analyzer.tokenStream("text", new StringReader(text));

        while (stream.incrementToken()) {
            //result.add(stream.getAttribute(TermAttribute.class).term()); //old way
            result.add(stream.getAttribute(CharTermAttribute.class).toString());
        }
    } catch (IOException e) {
        // not thrown b/c we're using a string reader...
    }

    Vector<NGramTerm> ngtv = new Vector<NGramTerm>();
    for (String s : result) {
        NGramTerm t = new NGramTerm(s);
        ngtv.add(t);
    }
    return ngtv;
}