GermanUnknownWordModel.java :  » Natural-Language-Processing » Stanford-Parser » edu » stanford » nlp » parser » lexparser » Java Open Source

Java Open Source » Natural Language Processing » Stanford Parser 
Stanford Parser » edu » stanford » nlp » parser » lexparser » GermanUnknownWordModel.java
package edu.stanford.nlp.parser.lexparser;


/**
 * An unknown word model for German; relies on BaseUnknownWordModel plus number matching.
 * An assumption of this model is that numbers (arabic digit sequences)
 * are tagged CARD. This is correct for all of NEGRA/Tiger/TuebaDZ.
 *
 * @author Roger Levy
 * @author Greg Donaker (corrections and modeling improvements)
 * @author Christopher Manning (generalized and improved what Greg did)
 */
public class GermanUnknownWordModel extends BaseUnknownWordModel {

  private static final long serialVersionUID = 221L;

  private static final String numberMatch = "[0-9]+(?:\\.[0-9]*)";

  public GermanUnknownWordModel(Options.LexOptions op, Lexicon lex) {
    super(op, lex);
  }


  /** Calculate the log-prob score of a particular TaggedWord in the
   *  unknown word model.
   *
   *  @param itw the tag->word production in IntTaggedWord form
   *  @return The log-prob score of a particular TaggedWord.
   */
  @Override
  public float score(IntTaggedWord itw) {
    String word = itw.wordString();
    String tag = itw.tagString();

    if (word.matches(numberMatch)) {
      //EncodingPrintWriter.out.println("Number match for " + word,encoding);
      if (tag.equals("CARD")) {
        return 0.0f;
      } else {
        //EncodingPrintWriter.out.println("Unknown word estimate for " + word + " as " + tag + ": " + logProb,encoding); //debugging
        return Float.NEGATIVE_INFINITY;
      }
    } else {
      return super.score(itw);
    }
  }

}

java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.