Example usage for edu.stanford.nlp.ling CoreLabel ner

List of usage examples for edu.stanford.nlp.ling CoreLabel ner

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel ner.

Prototype

@Override
public String ner() 

Source Link

Usage

From source file:ca.ualberta.exemplar.core.CleanPrefixAnnotator.java

License:Open Source License

@Override
public void annotate(Annotation document) {
    if (document.has(SentencesAnnotation.class)) {
        for (CoreMap sentence : document.get(SentencesAnnotation.class)) {

            List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
            int numTokens = 0, numPrefixParts = 0;

            // Assumption: prefix is at max 10 tokens
            for (int i = 0; i < Math.min(tokens.size(), 10); i++) {

                CoreLabel token = tokens.get(i);
                String tokenText = token.get(TextAnnotation.class);

                if (tokenText != null && numTokens > 0 && (tokenText.equals("--") || tokenText.equals(":"))) {
                    // Assumption: if more than half the tokens are a date/location/number it's a prefix
                    double fraction = (double) numPrefixParts / (double) numTokens;
                    if (fraction > 0.5) {
                        CoreLabel nextToken = tokens.get(i + 1);
                        String before = document.get(TextAnnotation.class).substring(0,
                                nextToken.beginPosition());
                        nextToken.set(BeforeAnnotation.class, before);
                        sentence.set(TokensAnnotation.class, tokens.subList(i + 1, tokens.size()));
                        //System.out.println("Removed Prefix: " + before);
                    }/*from  ww  w.  j  a va2  s .  c  o  m*/
                    break;
                }

                numTokens++;
                String neTag = token.ner();
                if (neTag != null && (neTag.equals("DATE") || neTag.equals("LOCATION") || neTag.equals("NUMBER")
                        || neTag.equals("ORDINAL"))) {
                    numPrefixParts++;
                }
            }
        }
    }
}

From source file:edu.ucla.cs.scai.qa.questionclassifier.SyntacticTreeNode.java

public SyntacticTreeNode(Tree t, ArrayList<CoreLabel> tokens, SyntacticTreeNode parent) throws Exception {
    this.parent = parent;
    value = t.value();/*from  ww w.  j a  v a  2 s. c  o  m*/
    if (t.isLeaf()) {
        CoreLabel c = tokens.remove(0);
        begin = c.beginPosition();
        end = c.endPosition();
        if (c == null) {
            throw new Exception("Mapping between TreeNode and CoreLabel not found");
        } else {
            lemma = c.lemma();
            ner = c.ner();
            //System.out.println(value + " -> " + c.value());
            if (!value.equals(c.value())) {
                throw new Exception("Different words have been matched!");
            }
        }
    } else {
        boolean hasNPchildren = false;
        boolean hasWHNPchildren = false;
        boolean hasQPchildren = false;
        begin = Integer.MAX_VALUE;
        end = Integer.MIN_VALUE;
        for (Tree c : t.children()) {
            SyntacticTreeNode child = new SyntacticTreeNode(c, tokens, this);
            children.add(child);
            if (child.value.equals("NP")) {
                hasNPchildren = true;
            } else if (child.value.equals("QP")) {
                hasQPchildren = true;
            } else if (child.value.equals("WHNP")) {
                hasWHNPchildren = true;
            }
            begin = Math.min(begin, child.begin);
            end = Math.max(end, child.end);
        }
        if (value.equals("NP")) {
            if (hasNPchildren) {
                npCompound = true;
            } else if (hasQPchildren) {
                npQp = true;
            } else {
                npSimple = true;
            }
        } else if (value.equals("WHNP")) { //can a WHNP node have QP children?
            if (hasNPchildren || hasWHNPchildren) {
                whnpCompound = true;
            } else if (!hasQPchildren) {
                whnpSimple = true;
            }
        }
    }
}

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Combines my token and a CoreNlp token using predicted information
 * @param tok//from   w w  w .  j a  v  a  2  s.c o m
 * @param cl
 * @param currentCoreNlpSentenceIndex
 * @return
 */
public static Token combineTokensPred(Token tok, CoreLabel cl, int currentCoreNlpSentenceIndex) {
    Token combined = new Token(tok);
    combined.predText = cl.word();
    combined.predLemma = cl.lemma();
    combined.predPosition = -1; /* will be determined by document aligner */
    combined.predPosTag = cl.tag();
    combined.predSentencePosition = currentCoreNlpSentenceIndex;
    combined.predNer = Helper.translateNer(cl.ner());
    combined.predByteCount = new ByteCount(cl.beginPosition(), cl.endPosition());
    return combined;
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public List<CoreLabel> nerTokens() {
    if (nerString == null || "O".equals(nerString))
        return null;

    int start = headIndex - startIndex;
    int end = headIndex - startIndex + 1;
    while (start > 0) {
        CoreLabel prev = originalSpan.get(start - 1);
        if (nerString.equals(prev.ner())) {
            start--;//from  ww  w  .  j  a va2s  . co  m
        } else {
            break;
        }
    }
    while (end < originalSpan.size()) {
        CoreLabel next = originalSpan.get(end);
        if (nerString.equals(next.ner())) {
            end++;
        } else {
            break;
        }
    }
    return originalSpan.subList(start, end);
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public String getPattern(List<CoreLabel> pTokens) {

    ArrayList<String> phrase_string = new ArrayList<String>();
    String ne = "";
    for (CoreLabel token : pTokens) {
        if (token.index() == headWord.index()) {
            phrase_string.add(token.lemma());
            ne = "";

        } else if ((token.lemma().equals("and") || StringUtils.isPunct(token.lemma()))
                && pTokens.size() > pTokens.indexOf(token) + 1 && pTokens.indexOf(token) > 0
                && pTokens.get(pTokens.indexOf(token) + 1).ner()
                        .equals(pTokens.get(pTokens.indexOf(token) - 1).ner())) {

        } else if (token.index() == headWord.index() - 1 && token.ner().equals(nerString)) {
            phrase_string.add(token.lemma());
            ne = "";

        } else if (!token.ner().equals("O")) {
            if (!token.ner().equals(ne)) {
                ne = token.ner();//from w  w w .j  a va 2  s . c  o  m
                phrase_string.add("<" + ne + ">");
            }

        } else {
            phrase_string.add(token.lemma());
            ne = "";
        }
    }
    return StringUtils.join(phrase_string);
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

private static List<String> getContextHelper(List<? extends CoreLabel> words) {
    List<List<CoreLabel>> namedEntities = new ArrayList<List<CoreLabel>>();
    List<CoreLabel> ne = new ArrayList<CoreLabel>();
    String previousNEType = "";
    int previousNEIndex = -1;
    for (int i = 0; i < words.size(); i++) {
        CoreLabel word = words.get(i);
        if (!word.ner().equals("O")) {
            if (!word.ner().equals(previousNEType) || previousNEIndex != i - 1) {
                ne = new ArrayList<CoreLabel>();
                namedEntities.add(ne);/*w  w  w  . j  av a  2s.co m*/
            }
            ne.add(word);
            previousNEType = word.ner();
            previousNEIndex = i;
        }
    }

    List<String> neStrings = new ArrayList<String>();
    Set<String> hs = Generics.newHashSet();
    for (List<CoreLabel> namedEntity : namedEntities) {
        String ne_str = StringUtils.joinWords(namedEntity, " ");
        hs.add(ne_str);
    }
    neStrings.addAll(hs);
    return neStrings;
}

From source file:main.java.spelementex.Annotator.java

public void writeCrfRolesData(Map<Integer, Map<Integer, CoreLabel>> sentenceStartOffsetParserLabel,
        Map<Integer, SpatialElement> startOffsetSpatialElement, FileOutputStream output) throws IOException {
    SpatialElement se = null;/*from ww w. jav a  2  s  .c  om*/
    int[] roleOffsets;
    String featureStr = "";

    for (int sentence : sentenceStartOffsetParserLabel.keySet()) {
        Map<Integer, CoreLabel> startOffsetParserLabel = sentenceStartOffsetParserLabel.get(sentence);
        roleOffsets = new int[2];

        for (int startOffset : startOffsetParserLabel.keySet()) {
            CoreLabel token = startOffsetParserLabel.get(startOffset);
            // this is the text of the token
            String word = token.word();

            //generate crf feature
            CrfFeature crfFeat = new CrfFeature(word, token.lemma(), token.tag(), token.ner());
            crfFeat.setUniFV();

            se = startOffsetSpatialElement.containsKey(startOffset) ? startOffsetSpatialElement.get(startOffset)
                    : startOffset >= roleOffsets[1]
                            ? SpatialElement.getSE(word, startOffset, startOffsetSpatialElement)
                            : se;
            boolean isMotionSignal = se == null ? false : se.isMotionSignal();
            boolean isMotionSignalOnly = se == null ? false : se.isMotionSignalOnly();

            if (se == null)
                crfFeat.setType();
            else
                crfFeat.setType(se.getNonMotionSignalType(), isMotionSignal, isMotionSignalOnly);
            featureStr += crfFeat.toString() + "\n";
        }
        output.write((featureStr + "\n").getBytes());
        featureStr = "";
    }
}

From source file:main.java.spelementex.Annotator.java

/**
 * Writes a single test data file to use as input for CRF++ toolkit.
 * /*w  w w .java  2 s  . c om*/
 * @param sentenceStartOffsetParserLabel
 * @param output
 * @throws IOException 
 */
public void writeCrfData(Map<Integer, Map<Integer, CoreLabel>> sentenceStartOffsetParserLabel,
        FileOutputStream output) throws IOException {
    String featureStr = "";
    for (int sentence : sentenceStartOffsetParserLabel.keySet()) {
        Map<Integer, CoreLabel> startOffsetParserLabel = sentenceStartOffsetParserLabel.get(sentence);

        for (int startOffset : startOffsetParserLabel.keySet()) {
            CoreLabel token = startOffsetParserLabel.get(startOffset);
            //generate crf feature
            CrfFeature crfFeat = new CrfFeature(token.word(), token.lemma(), token.tag(), token.ner());
            crfFeat.setUniFV();
            featureStr += crfFeat.toString() + "\n";
        }

        output.write((featureStr + "\n").getBytes());
        featureStr = "";
    }
}

From source file:main.java.spelementex.Trainer.java

/**
 * Writes labelled training data to use as input for CRF++ toolkit.
 * Writes to two separate training data files: 1) only with labels for
 * motion-signal spatial entity tokens; and 2) with labels for all
 * spatial entity tokens except motion-signals.
 * /*from w  w w.j  ava 2s . c  o  m*/
 * @param seOutput
 * @param msOutput
 * @throws SAXException
 * @throws ParserConfigurationException
 * @throws IOException 
 */
public void writeTypeLabelledCrfData(FileOutputStream seOutput, FileOutputStream msOutput)
        throws SAXException, ParserConfigurationException, IOException {
    int[] seOffsets;
    int[] msOffsets;

    for (File file : trainFiles) {
        System.out.println("writing labelled CRF data for " + file.getPath());

        //parses the spatial entity annotations from input xml files
        Doc document = new Doc();
        Main.dp.parseXmlAnnotationsFile(file.toString(), document, true, false);
        //map which stores all spatial elements
        Map<Integer, SpatialElement> startOffsetSpatialElement = new HashMap<>(
                document.getStartOffsetSpatialElement());
        //map to store the sentence-wise nlp info of the text document
        //obtained from the Stanford CoreNLP parser
        Map<Integer, Map<Integer, CoreLabel>> sentenceStartOffsetParserLabel = StanfordParser.parse(document);

        SpatialElement se = null;

        String seFeatureStr = "";
        String msFeatureStr = "";

        for (int sentence : sentenceStartOffsetParserLabel.keySet()) {
            Map<Integer, CoreLabel> startOffsetParserLabel = sentenceStartOffsetParserLabel.get(sentence);
            seOffsets = new int[2];
            msOffsets = new int[2];

            for (int startOffset : startOffsetParserLabel.keySet()) {

                CoreLabel token = startOffsetParserLabel.get(startOffset);
                // this is the text of the token
                String word = token.word();

                //generate crf feature
                CrfFeature crfFeat = new CrfFeature(word, token.lemma(), token.tag(), token.ner());
                crfFeat.setUniFV();
                String featureStr = crfFeat.toString();

                seFeatureStr += featureStr;

                se = startOffsetSpatialElement.containsKey(startOffset)
                        ? startOffsetSpatialElement.get(startOffset)
                        : startOffset >= seOffsets[1]
                                ? SpatialElement.getSE(word, startOffset, startOffsetSpatialElement)
                                : se;
                boolean isMotionSignal = se == null ? false : se.isMotionSignal();
                boolean isMotionSignalOnly = se == null ? false : se.isMotionSignalOnly();

                if (se == null || isMotionSignalOnly)
                    seFeatureStr += " O\n";
                else if (seOffsets[1] != se.getEnd()) {
                    seOffsets[0] = startOffset;
                    int difference = startOffset - se.getStart();
                    seOffsets[1] = se.getEnd() + difference;
                }

                if (se != null && !isMotionSignalOnly) {
                    if (startOffset == seOffsets[0])
                        seFeatureStr += " B-" + se.getNonMotionSignalType() + "\n";
                    else if (startOffset < seOffsets[1])
                        seFeatureStr += " I-" + se.getNonMotionSignalType() + "\n";
                }

                msFeatureStr += featureStr;

                if (se == null || !isMotionSignal)
                    msFeatureStr += " O\n";
                else if (msOffsets[1] != se.getEnd()) {
                    msOffsets[0] = startOffset;
                    int difference = startOffset - se.getStart();
                    msOffsets[1] = se.getEnd() + difference;
                }

                if (se != null && isMotionSignal) {
                    if (startOffset == msOffsets[0])
                        msFeatureStr += " B-MOTION_SIGNAL\n";
                    else if (startOffset < msOffsets[1])
                        msFeatureStr += " I-MOTION_SIGNAL\n";
                }
            }
            seOutput.write((seFeatureStr + "\n").getBytes());
            seFeatureStr = "";
            msOutput.write((msFeatureStr + "\n").getBytes());
            msFeatureStr = "";
        }
    }
}

From source file:main.java.spelementex.Trainer.java

/**
 * Writes labeled training data to use as input for CRF++ toolkit.
 * For each role, writes to two separate training data files: 
 * 1) with labels for the role and other roles it doesn't overlap with;
 * 2) with labels for the roles and it does and doesn't overlap with except itself.
 * // w w w. j a  v a  2s  . c  o m
 * @param dataDir
 * @param train
 * @throws SAXException
 * @throws ParserConfigurationException
 * @throws IOException 
 */
public void writeRoleLabelledCrfData(String dataDir, boolean train)
        throws SAXException, ParserConfigurationException, IOException {
    int[] roleOffsets;
    int[] otherOffsets;
    String ext = train ? "Train" : "Test";

    Set<String> allRolesSet = new HashSet<>(SpatialRelation.ROLE_OVERLAP_ROLES_MAP.keySet());
    for (File file : trainFiles) {
        System.out.println("writing labelled CRF data for " + file.getPath());

        //parses the spatial entity annotations from input xml files
        Doc document = new Doc();
        Main.dp.parseXmlAnnotationsFile(file.toString(), document, true, true);
        //map which stores all spatial elements
        Map<Integer, SpatialElement> startOffsetSpatialElement = new HashMap<>(
                document.getStartOffsetSpatialElement());
        //map to store the sentence-wise nlp info of the text document
        //obtained from the Stanford CoreNLP parser
        Map<Integer, Map<Integer, CoreLabel>> sentenceStartOffsetParserLabel = StanfordParser.parse(document);

        for (String role : SpatialRelation.ROLE_OVERLAP_ROLES_MAP.keySet()) {

            Set<String> overlapRoles = new HashSet<>(SpatialRelation.ROLE_OVERLAP_ROLES_MAP.get(role));
            Set<String> nonOverlapRoles = new HashSet<>(allRolesSet);
            nonOverlapRoles.removeAll(overlapRoles);
            nonOverlapRoles.remove(role);

            FileOutputStream output = new FileOutputStream(dataDir + "\\" + role + ext + ".txt", true);
            FileOutputStream outputOther = overlapRoles.contains("null") ? null
                    : new FileOutputStream(dataDir + "\\" + role + ext + "Other.txt", true);

            SpatialElement se = null;

            String roleFeatureStr = "";
            String otherFeatureStr = "";

            List<Integer> nonOverlapRolesStartOffsets = document.getElementRolesStartOffsets(nonOverlapRoles);
            List<Integer> roleStartOffsets = !document.getElementRoleStartOffsets().containsKey(role)
                    ? new ArrayList<>()
                    : document.getElementRoleStartOffsets().get(role);
            roleStartOffsets.addAll(nonOverlapRolesStartOffsets);
            List<Integer> overlapRolesStartOffsets = outputOther != null
                    ? document.getElementRolesStartOffsets(overlapRoles)
                    : null;
            if (overlapRolesStartOffsets != null)
                overlapRolesStartOffsets.addAll(nonOverlapRolesStartOffsets);

            for (int sentence : sentenceStartOffsetParserLabel.keySet()) {
                Map<Integer, CoreLabel> startOffsetParserLabel = sentenceStartOffsetParserLabel.get(sentence);
                roleOffsets = new int[2];
                otherOffsets = new int[2];

                for (int startOffset : startOffsetParserLabel.keySet()) {

                    CoreLabel token = startOffsetParserLabel.get(startOffset);
                    // this is the text of the token
                    String word = token.word();

                    //generate crf feature
                    CrfFeature crfFeat = new CrfFeature(word, token.lemma(), token.tag(), token.ner());
                    crfFeat.setUniFV();

                    se = startOffsetSpatialElement.containsKey(startOffset)
                            ? startOffsetSpatialElement.get(startOffset)
                            : startOffset >= roleOffsets[1]
                                    ? SpatialElement.getSE(word, startOffset, startOffsetSpatialElement)
                                    : se;
                    boolean isMotionSignal = se == null ? false : se.isMotionSignal();
                    boolean isMotionSignalOnly = se == null ? false : se.isMotionSignalOnly();

                    if (se == null)
                        crfFeat.setType();
                    else
                        crfFeat.setType(se.getNonMotionSignalType(), isMotionSignal, isMotionSignalOnly);
                    String featureStr = crfFeat.toString();

                    roleFeatureStr += featureStr;

                    String roleLabel = se == null ? "O" : se.getRole(role, nonOverlapRoles);

                    if (se == null || roleLabel.equals("O"))
                        roleFeatureStr += " O\n";
                    else if (roleOffsets[1] != se.getEnd()) {
                        roleOffsets[0] = startOffset;
                        int difference = startOffset - se.getStart();
                        roleOffsets[1] = se.getEnd() + difference;
                    }

                    if (!roleLabel.equals("O")) {
                        if (startOffset == roleOffsets[0])
                            roleFeatureStr += " B-" + roleLabel + "\n";
                        else if (startOffset < roleOffsets[1])
                            roleFeatureStr += " I-" + roleLabel + "\n";
                    }

                    if (outputOther == null)
                        continue;

                    otherFeatureStr += featureStr;
                    roleLabel = se == null ? "O" : se.getRole(overlapRoles, nonOverlapRoles);

                    if (se == null || roleLabel.equals("O"))
                        otherFeatureStr += " O\n";
                    else if (otherOffsets[1] != se.getEnd()) {
                        otherOffsets[0] = startOffset;
                        int difference = startOffset - se.getStart();
                        otherOffsets[1] = se.getEnd() + difference;
                    }

                    if (!roleLabel.equals("O")) {
                        if (startOffset == otherOffsets[0])
                            otherFeatureStr += " B-" + roleLabel + "\n";
                        else if (startOffset < otherOffsets[1])
                            otherFeatureStr += " I-" + roleLabel + "\n";
                    }
                }
                output.write((roleFeatureStr + "\n").getBytes());
                roleFeatureStr = "";
                if (outputOther != null) {
                    outputOther.write((otherFeatureStr + "\n").getBytes());
                    otherFeatureStr = "";
                }
            }
        }
    }
}