List of usage examples for org.apache.lucene.analysis TokenStream getAttribute
public final <T extends Attribute> T getAttribute(Class<T> attClass)
The caller must pass in a Class<?
From source file:filters.indexing.NegationScopeFilter.java
License:Open Source License
/** * Constructor for class NegationScopeFilter * @param input// www.jav a 2s.co m */ public NegationScopeFilter(TokenStream input) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_flags = input.getAttribute(FlagsAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this token stream output_term = this.getAttribute(TermAttribute.class); output_type = this.getAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = this.getAttribute(PayloadAttribute.class); }
From source file:filters.indexing.StatsFilter.java
License:Open Source License
/** * Constructor for class NegationScopeFilter * //from ww w . j av a 2 s .com * @param input */ public StatsFilter(TokenStream input, ReviewDocumentIndexer indexer) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_flags = input.getAttribute(FlagsAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this token stream output_term = this.getAttribute(TermAttribute.class); output_type = this.getAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = this.getAttribute(PayloadAttribute.class); synsets = indexer.theSynsets; reviewLengths = indexer.theStats; reviewLengthCounter = new Counter(); }
From source file:filters.indexing.TopicModelInputFilter.java
License:Open Source License
/** * Constructor for class IndexableFilter * //from w w w. ja va2s.c om * @param input */ public TopicModelInputFilter(TokenStream input, TokenListsCollector tokenLists, ReviewId reviewId) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_flags = input.getAttribute(FlagsAttribute.class); input_payload = input.hasAttribute(PayloadAttribute.class) ? input.getAttribute(PayloadAttribute.class) : null; // Setting attributes for this token stream output_term = this.getAttribute(TermAttribute.class); output_type = this.getAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = input.hasAttribute(PayloadAttribute.class) ? this.getAttribute(PayloadAttribute.class) : null; this.reviewId = reviewId; currentDocNumber = new Counter(); tokenListsCollector = tokenLists; }
From source file:filters.LemmatizationFilter.java
License:Open Source License
/** * Constructor for class LemmatizationFilter * /* www . j a v a 2 s . c o m*/ * @param input */ public LemmatizationFilter(TokenStream input, boolean lemmatized_output) { super(input); // Getting attributes from input stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this stream output_term = this.addAttribute(TermAttribute.class); output_type = this.addAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = this.addAttribute(PayloadAttribute.class); this.lemmatized_output = lemmatized_output; lemmatizer = new Lemmatizer(); }
From source file:filters.LemmatizationFilter.java
License:Open Source License
/** * Constructor for class LemmatizationFilter *///ww w .j av a 2 s .c o m public LemmatizationFilter(TokenStream input, boolean lemmatized_output, Dictionary wordnet, IndexMap compoundTermsIndex) { super(input); // Getting attributes from input stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this stream output_term = this.getAttribute(TermAttribute.class); output_type = this.getAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = this.addAttribute(PayloadAttribute.class); this.lemmatized_output = lemmatized_output; lemmatizer = new Lemmatizer(wordnet, compoundTermsIndex); }
From source file:filters.NamedEntityFilter.java
License:Open Source License
/** * Constructor for class NamedEntityFilter * /*from w w w . j a v a 2 s . c om*/ * @param input * The input {@link TokenStream} */ public NamedEntityFilter(TokenStream input) { super(input); // Getting attributes from input stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this stream output_term = this.addAttribute(TermAttribute.class); output_type = this.addAttribute(TypeAttribute.class); output_payload = this.addAttribute(PayloadAttribute.class); }
From source file:filters.PosTaggingFilter.java
License:Open Source License
/** * Constructor for class PosTaggingFilter * //from www. j a v a2s . c om * @param input */ public PosTaggingFilter(TokenStream input) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); // Setting attributes for this token stream output_term = this.addAttribute(TermAttribute.class); output_type = this.addAttribute(TypeAttribute.class); output_payload = this.addAttribute(PayloadAttribute.class); tagger = initializeTagger(); }
From source file:filters.PosTaggingFilter.java
License:Open Source License
/** * Constructor for class PosTaggingFilter * //w w w .j ava 2 s . c om * @param input * @param tagger * The POS-tagger object that should be used to perform the tagging */ public PosTaggingFilter(TokenStream input, MaxentTagger tagger) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); // Setting attributes for this token stream output_term = this.getAttribute(TermAttribute.class); output_type = this.addAttribute(TypeAttribute.class); output_payload = this.addAttribute(PayloadAttribute.class); this.tagger = tagger; }
From source file:filters.StopLemmaFilter.java
License:Open Source License
/** * Constructor for class StopLemmaFilter * /*from w ww . j a va 2s. com*/ * @param input */ public StopLemmaFilter(TokenStream input, boolean removeStopLemmas) { super(input); // Getting attributes from input token stream input_term = input.getAttribute(TermAttribute.class); input_type = input.getAttribute(TypeAttribute.class); input_flags = input.getAttribute(FlagsAttribute.class); input_payload = input.getAttribute(PayloadAttribute.class); // Setting attributes for this token stream output_term = this.getAttribute(TermAttribute.class); output_type = this.getAttribute(TypeAttribute.class); output_flags = this.addAttribute(FlagsAttribute.class); output_payload = this.getAttribute(PayloadAttribute.class); this.removeStopLemmas = removeStopLemmas; // Initializing stop-lemma lists stopLemmas = new HashMap<POS, ArrayList<String>>(); POS[] pos_array = { POS.VERB, POS.NOUN, POS.ADJECTIVE, POS.ADVERB }; for (POS pos : pos_array) { stopLemmas.put(pos, new ArrayList<String>()); } // Populating stop-lemma lists with data from stop-lemma file try { BufferedReader stop_file = new BufferedReader( new InputStreamReader(new FileInputStream(Paths.stopLemmasFile))); // Read stop-lemmas from stop-lemma file, one lemma per line String stop_line; while ((stop_line = stop_file.readLine()) != null) { // Each line in the stop-lemma file contains a lemma and its corresponding POS // category String[] stoplemma = stop_line.split(" ", 2); POS stoplemma_pos = PosTag.toPOS(stoplemma[0].toUpperCase()); String stoplemma_string = stoplemma[1].toLowerCase(); // Adding stop-lemma to the list that corresponds to its POS category ArrayList<String> stop_list; if ((stop_list = stopLemmas.get(stoplemma_pos)) != null) { stop_list.add(stoplemma_string); } else { AppLogger.error.log(Level.WARNING, "Unrecognized POS in stop-lemma file: " + stoplemma_pos + " on line \"" + stop_line + "\""); } } stop_file.close(); } catch (FileNotFoundException e) { AppLogger.error.log(Level.SEVERE, "Stop-lemma file not found at location " + Paths.stopLemmasFile); } catch (IOException e) { AppLogger.error.log(Level.SEVERE, "Error reading from stop-lemma file"); } }
From source file:fr.lipn.yasemir.weighting.ckpd.TermFactory.java
License:Open Source License
public static Vector<NGramTerm> makeTermSequence(String text) { List<String> result = new ArrayList<String>(); try {/*from w w w. j a v a2 s. c o m*/ TokenStream stream = analyzer.tokenStream("text", new StringReader(text)); while (stream.incrementToken()) { //result.add(stream.getAttribute(TermAttribute.class).term()); //old way result.add(stream.getAttribute(CharTermAttribute.class).toString()); } } catch (IOException e) { // not thrown b/c we're using a string reader... } Vector<NGramTerm> ngtv = new Vector<NGramTerm>(); for (String s : result) { NGramTerm t = new NGramTerm(s); ngtv.add(t); } return ngtv; }