Example usage for edu.stanford.nlp.ling WordLemmaTag WordLemmaTag

List of usage examples for edu.stanford.nlp.ling WordLemmaTag WordLemmaTag

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling WordLemmaTag WordLemmaTag.

Prototype

public WordLemmaTag(Label word) 

Source Link

Usage

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

public static List<List<List<WordLemmaTag>>> parseBNCXMLTokenized(InputStream is) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder = factory.newDocumentBuilder();
    Document doc = builder.parse(is);
    Element root = doc.getDocumentElement();
    List<List<List<WordLemmaTag>>> file = new ArrayList();
    List<List<WordLemmaTag>> para = null;
    NodeList sentences = root.getElementsByTagName("s");
    Node currentNode = null;//from   w w w .  j a  v  a  2s . co m
    for (int j = 0; j < sentences.getLength(); j++) {
        if (currentNode == null) { // if this is the first sentence
            currentNode = sentences.item(j).getParentNode();
            para = new ArrayList();
        } else {
            if (currentNode != sentences.item(j).getParentNode()) {
                file.add(para);
                para = new ArrayList();
            }
            currentNode = sentences.item(j).getParentNode();
        }
        List<WordLemmaTag> tokens = new ArrayList<>();
        for (int i = 0; i < sentences.item(j).getChildNodes().getLength(); i++) {
            if ("pause".equalsIgnoreCase(sentences.item(j).getChildNodes().item(i).getNodeName())) {
                if (!tokens.isEmpty()) {
                    para.add(tokens);
                }
                tokens = new ArrayList<>();
            }
            if (sentences.item(j).getChildNodes().item(i).getTextContent().trim().length() != 0) {
                tokens.add(new WordLemmaTag(sentences.item(j).getChildNodes().item(i).getTextContent().trim()));
            }
        }
        if (!tokens.isEmpty()) {
            para.add(tokens);
        }

    }
    if (para != null) {
        file.add(para);
    }

    return file;
}