PascalTemplate.java :  » Natural-Language-Processing » Stanford-Named-Entity-Recognizer » edu » stanford » nlp » ie » pascal » Java Open Source

Java Open Source » Natural Language Processing » Stanford Named Entity Recognizer 
Stanford Named Entity Recognizer » edu » stanford » nlp » ie » pascal » PascalTemplate.java
package edu.stanford.nlp.ie.pascal;

import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Index;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Maps non-background Pascal fields to strings.
 *
 * @author Chris Cox
 */


public class PascalTemplate {

  public static final String fields[] = {
    //dates
    "workshoppapersubmissiondate",
    "workshopnotificationofacceptancedate",
    "workshopcamerareadycopydate",
    "workshopdate",
    //location
    "workshoplocation",
    //workshop info
    "workshopacronym",
    "workshophomepage",
    "workshopname",
    //conference info
    "conferenceacronym",
    "conferencehomepage",
    "conferencename",
    //background symbol
    "0"
  };

  public static final String BACKGROUND_SYMBOL = "0";

  private static final Index fieldIndices;

  static {
    fieldIndices = new Index();
    for (int i = 0; i < fields.length; i++) {
      fieldIndices.add(fields[i]);
    }
  }

  private String[] values = null;


  public PascalTemplate() {
    values = new String[fields.length];
    for (int i = 0; i < values.length; i++) {
      values[i] = null;
    }
  }

  //copy constructor
  public PascalTemplate(PascalTemplate pt) {
    this.values = new String[fields.length];
    for (int i = 0; i < values.length; i++) {
      if (pt.values[i] == null) {
        this.values[i] = null;
      } else {
        this.values[i] = pt.values[i];
      }
    }
  };
  
  /*
   * Acronym stemming and matching fields
   */
  private static Pattern acronymPattern = Pattern.compile("([ \r-/a-zA-Z]+?)(?:[ -'*\t\r\n\f0-9]*)", Pattern.DOTALL);

  /**
   *
   * @param s1
   * @param s2
   * @param stemmedAcronymIndex
   * @return
   */
  public static boolean acronymMatch(String s1, String s2, HashMap stemmedAcronymIndex) {
    System.err.println("Testing match:" + s1 + " : " + s2);
    String stem1 = (String) stemmedAcronymIndex.get(s1);
    String stem2 = (String) stemmedAcronymIndex.get(s2);
    System.err.println("Got stems:" + s1 + " : " + s2);
    if (stem1.equals(stem2)) {
      return true;
    } else {
      return false;
    }
  }
  /**
   *
   * @param s
   * @param ct
   * @return
   */
  public static String stemAcronym(String s, CliqueTemplates ct) {
    if (ct.stemmedAcronymIndex.containsKey(s)) {
      return (String) ct.stemmedAcronymIndex.get(s);
    }
    Matcher matcher = acronymPattern.matcher(s);
    if (!matcher.matches() || s.equalsIgnoreCase("www")) {
      System.err.println("Not a valid acronym: " + s);
      return "null";
    }

    String stemmed = matcher.group(1).toLowerCase();
    if (stemmed.endsWith("-")) {
      stemmed = stemmed.substring(0, stemmed.length() - 1);
    }

    ct.stemmedAcronymIndex.put(s, stemmed);
    System.err.println("Stemmed: " + s + " to: " + stemmed);
    if (ct.inverseAcronymMap.containsKey(stemmed)) {
      HashSet set = (HashSet) ct.inverseAcronymMap.get(stemmed);
      set.add(s);
    } else {
      HashSet set = new HashSet();
      set.add(s);
      ct.inverseAcronymMap.put(stemmed, set);
    }
    return stemmed;
  }

/**
 * Merges partial (clique) templates into a full one.
 *
 * @param dt date template
 * @param location location
 * @param wi workshop/conference info template
 * @return the {@link PascalTemplate} resulting from this merge.
 */

  public static PascalTemplate mergeCliqueTemplates(DateTemplate dt, String location, InfoTemplate wi) {
    PascalTemplate pt = new PascalTemplate();
    pt.setValue("workshopnotificationofacceptancedate", dt.noadate);
    pt.setValue("workshopcamerareadycopydate", dt.crcdate);
    pt.setValue("workshopdate", dt.workdate);
    pt.setValue("workshoppapersubmissiondate", dt.subdate);
    pt.setValue("workshoplocation", location);
    pt.setValue("workshopacronym", wi.wacronym);
    pt.setValue("workshophomepage", wi.whomepage);
    pt.setValue("workshopname", wi.wname);
    pt.setValue("conferenceacronym", wi.cacronym);
    pt.setValue("conferencehomepage", wi.chomepage);
    pt.setValue("conferencename", wi.cname);
    return pt;
  }

/**
 * Sets template values.
 * @param fieldName (i.e. workshopname, workshopdate)
 * @param value
 */
  public void setValue(String fieldName, String value) {
    int index = getFieldIndex(fieldName);
    assert(index != -1);
    values[index] = value;
  }

  public void setValue(int index, String value) {
    if (index != values.length - 1) {
      values[index] = value;
    }
  }

  public String getValue(String fieldName) {
    int i = getFieldIndex(fieldName);
    if (i == -1 || i == values.length - 1) {
      return null;
    } else {
      return values[i];
    }
  }

  @Override
  public boolean equals(Object obj) {

    if (obj == null) {
      return false;
    }
    if (!(obj instanceof PascalTemplate)) {
      return false;
    }

    PascalTemplate pt = (PascalTemplate) obj;
    String[] values2 = pt.values;

    if (values.length != values2.length) {
      return false;
    }

    for (int i = 0; i < values.length - 1; i++) {
      if (values[i] == null) {
        if (values2[i] != null) {
          return false;
        }
      } else {
        if (values2[i] == null) {
          return false;
        }
        if (!values2[i].equals(values[i])) {
          return false;
        }
      }
    }
    return true;
  }

  @Override
  public int hashCode() {
    int tally = 37;
    int n;
    for (int i = 0; i < values.length - 1; i++) {
      if (values[i] == null) {
        n = 11;
      } else {
        n = values[i].hashCode();
      }
      tally = 17 * tally + n;
    }
    return tally;
  }

  /**
   *
   * @param tag field name (i.e. workshopdate, workshoplocation)
   * @return the reference of that field in the underlying {@link Index}
   */
  static public int getFieldIndex(String tag) {
    return (fieldIndices.indexOf(tag));
  }

  /**
   * Should be passed a <code>Counter[]</code>, each entry of which
   * keeps scores for possibilities in that template slot.  The counter
   * for each template value is incremented by the corresponding score of
   * this PascalTemplate.
   *
   * @param fieldValueCounter an array of counters, each of which holds label possibilities for one field
   * @param score increment counts by this much.
   */

  public void writeToFieldValueCounter(ClassicCounter[] fieldValueCounter, double score) {
    for (int i = 0; i < fields.length; i++) {
      if ((values[i] != null) && !values[i].equals("NULL")) {
        fieldValueCounter[i].incrementCount(values[i], score);
      }
    }
  }
/**
 * Divides this template into partial templates, and updates the counts of these
 * partial templates in the {@link CliqueTemplates} object.
 *
 * @param ct the partial templates counter object
 * @param score increment counts by this much
 */
  public void unpackToCliqueTemplates(CliqueTemplates ct, double score) {

    ct.dateCliqueCounter.incrementCount(new DateTemplate(values[0], values[1], values[2], values[3]), score);
    if (values[4] != null) {
      ct.locationCliqueCounter.incrementCount(values[4], score);
    }

    ct.workshopInfoCliqueCounter.incrementCount(new InfoTemplate(values[6], values[5], values[7], values[9], values[8], values[10], ct), score);
  }

  public void print() {
    System.err.println("PascalTemplate: ");
    System.err.println(this.toString());
  }

  @Override
  public String toString() {
    String str = new String("\n====================\n");
    for (int i = 0; i < values.length; i++) {
      if (values[i] != null) {
        if (!(values[i].equalsIgnoreCase("NULL"))) {
          str = str.concat(fields[i] + " : " + values[i] + "\n");
        }
      }
    }
    return str;
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.