Example usage for edu.stanford.nlp.ling CoreLabel setTag

List of usage examples for edu.stanford.nlp.ling CoreLabel setTag

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel setTag.

Prototype

@Override
public void setTag(String tag) 

Source Link

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

private CoreLabel getCoreLabel(int labelIndex) {
    if (originalCoreLabels[labelIndex] != null) {
        CoreLabel terminalLabel = originalCoreLabels[labelIndex];
        if (terminalLabel.value() == null && terminalLabel.word() != null) {
            terminalLabel.setValue(terminalLabel.word());
        }/*  w  w  w  . j  a  v  a 2 s. co  m*/
        return terminalLabel;
    }

    String wordStr = wordIndex.get(words[labelIndex]);
    CoreLabel terminalLabel = new CoreLabel();
    terminalLabel.setValue(wordStr);
    terminalLabel.setWord(wordStr);
    terminalLabel.setBeginPosition(beginOffsets[labelIndex]);
    terminalLabel.setEndPosition(endOffsets[labelIndex]);
    if (originalTags[labelIndex] != null) {
        terminalLabel.setTag(originalTags[labelIndex].tag());
    }
    return terminalLabel;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

protected CoreLabel tokenToWord(Token aToken) {
    CoreLabel l = CoreNlpUtils.tokenToWord(aToken);
    l.setValue(aToken.getCoveredText());
    if (!readPos) {
        l.setTag(null);
    }/*from   www  .  jav a 2  s.  c  om*/
    return l;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils.java

License:Open Source License

public static CoreLabel tokenToWord(Token aToken) {
    CoreLabel t = new CoreLabel();

    t.setOriginalText(aToken.getCoveredText());
    t.setWord(aToken.getCoveredText());//from ww  w  .  jav a  2 s .c o  m
    t.setBeginPosition(aToken.getBegin());
    t.setEndPosition(aToken.getEnd());

    if (aToken.getLemma() != null) {
        t.setLemma(aToken.getLemma().getValue());
    }

    if (aToken.getPos() != null) {
        t.setTag(aToken.getPos().getPosValue());
    }

    return t;
}

From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java

public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) {
    List<String> words = document.getSentenceTokenStrs(sentIdx);
    List<PoSTag> posTags = document.getSentencePoSTags(sentIdx);

    List<CoreLabel> tokenList = new ArrayList<CoreLabel>();
    for (int i = 0; i < words.size(); i++) {
        /*Re-create Stanford tokens*/
        CoreLabel token = new CoreLabel();
        token.setWord(words.get(i));//  ww w  . jav a2s .c o  m
        token.setTag(posTags.get(i).toString());
        token.setNER("O");
        token.setDocID(document.getName());
        token.setSentIndex(sentIdx);
        token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart());
        token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd());

        //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition());
        tokenList.add(token);
    }

    //Add NER labels for sentence
    List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx);
    for (Pair<TokenSpan, String> p : ners) {
        for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) {
            tokenList.get(k).setNER(p.getSecond());
        }
    }

    //Convert to Stanford Sentence
    CoreMap sentence = new ArrayCoreMap();
    sentence.set(TokensAnnotation.class, tokenList);
    sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition());
    sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition());
    return sentence;
}

From source file:lv.pipe.NerTagger.java

License:Open Source License

public static CoreLabel makeCoreLabel(Annotation a) {
    CoreLabel wi = new CoreLabel();
    if (!a.has(LabelText.class) || a.getText().equals(BOUNDARY)) {
        wi.setWord(BOUNDARY);/*from   w w  w .  ja v a2 s .c  o  m*/
        wi.set(AnswerAnnotation.class, OTHER);
        wi.set(NamedEntityTagGoldAnnotation.class, OTHER);
        wi.setLemma("_");
    } else {
        wi.setWord(a.getText());
    }
    wi.setIndex(a.get(LabelIndex.class, -1));
    wi.setLemma(a.get(LabelLemma.class, "_"));
    wi.set(LVFullTagAnnotation.class, a.get(LabelPosTag.class, "_"));
    wi.setTag(a.get(LabelPosTagSimple.class, "_"));
    wi.set(MorphologyFeatureStringAnnotation.class, a.get(LabelMorphoFeatures.class, "_"));
    wi.set(ParentAnnotation.class, Integer.toString((Integer) a.get(LabelParent.class, -1)));
    wi.set(LabelAnnotation.class, a.get(LabelDependency.class, "_"));
    return wi;
}

From source file:org.exist.xquery.corenlp.TrainClassifier.java

License:Open Source License

private Collection<List<CoreLabel>> readODSSpreadsheet(final String localFilePath) throws XPathException {
    Collection<List<CoreLabel>> documents = new ArrayList<>();
    List<CoreLabel> document = new ArrayList<>();

    //try (InputStream is = Files.newInputStream(tempInFile)) {
    try (InputStream is = uploadedFileBase64String != null ? uploadedFileBase64String.getInputStream()
            : new Resource(localFilePath).getInputStream()) {
        SpreadSheet spreadSheet = ODPackage.createFromStream(is, "UserAnnotatedDocument").getSpreadSheet();

        Sheet sheet = spreadSheet.getSheet(0);

        for (int i = 0; i < sheet.getRowCount(); i++) {
            CoreLabel row = new CoreLabel();
            String value1 = sheet.getValueAt(0, i).toString();
            String value2 = sheet.getValueAt(1, i).toString();

            row.setWord(value1);//from  w w w  .  j  a v  a2  s .  c o m
            row.setNER(value2);
            row.set(CoreAnnotations.AnswerAnnotation.class, value2);
            if (sheet.getColumnCount() > 2) {
                String value3 = sheet.getValueAt(2, i).toString();
                if (!"".equals(value3) && tagCol > -1) {
                    row.setTag(value3);
                }
            }

            if (!"".equals(value1)) {
                document.add(row);
            } else {
                documents.add(document);
                document = new ArrayList<>();
            }
        }
    } catch (IOException ioe) {
        throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe);
    }
    return documents;
}

From source file:org.exist.xquery.corenlp.TrainClassifier.java

License:Open Source License

private Collection<List<CoreLabel>> readXLSXSpreadsheet(final String localFilePath,
        final InputDocType inputFormat) throws XPathException {
    Workbook workbook = null;/*www .ja va  2  s.  c  o  m*/
    Collection<List<CoreLabel>> documents = new ArrayList<>();
    List<CoreLabel> document = new ArrayList<>();
    String fileName = "localFilePath";
    String extraSuffix = (inputFormat != InputDocType.XLSX) ? "" : "x";
    //try (InputStream is = Files.newInputStream(tempInFile)) {
    try (InputStream is = uploadedFileBase64String == null ? uploadedFileBase64String.getInputStream()
            : new Resource(fileName + extraSuffix).getInputStream()) {
        if (inputFormat == InputDocType.XLSX) {
            workbook = new XSSFWorkbook(is);
        } else {
            workbook = new HSSFWorkbook(is);
        }
    } catch (FileNotFoundException fe) {
        LOG.error(fe);
    } catch (IOException ioe) {
        LOG.error(ioe);
        throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe);
    }
    org.apache.poi.ss.usermodel.Sheet sheet = workbook.getSheetAt(0);
    Row row;
    Cell cell;
    Iterator rows = sheet.rowIterator();
    while (rows.hasNext()) {
        CoreLabel tok = new CoreLabel();
        row = (Row) rows.next();
        Iterator cells = row.cellIterator();
        int cellPos = 0;
        while (cells.hasNext()) {
            cell = (Cell) cells.next();
            //if (cell.getCellType() == Cell.CELL_TYPE_STRING) {
            switch (cellPos) {
            case 0:
                tok.setWord(cell.getStringCellValue());
                break;
            case 1:
                tok.setNER(cell.getStringCellValue());
                tok.set(CoreAnnotations.AnswerAnnotation.class, cell.getStringCellValue());
                break;
            case 2:
                tok.setTag(cell.getStringCellValue());
                break;
            default:
                break;
            }
            //} else if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
            //LOG.error("Cell has numeric value:" + cell.getNumericCellValue());
            //}
            cellPos++;
        }
        if (!"".equals(tok.word())) {
            document.add(tok);
        } else {
            documents.add(document);
            document = new ArrayList<>();
        }
    }
    return documents;
}

From source file:org.exist.xquery.corenlp.TrainClassifier.java

License:Open Source License

private Collection<List<CoreLabel>> readTSVSpreadsheet(final String localFilePath) throws XPathException {
    String separator = "\t";
    String line;//w w  w.  jav  a 2  s . com
    Collection<List<CoreLabel>> documents = new ArrayList<>();
    List<CoreLabel> document = new ArrayList<>();

    //try (BufferedReader tsv = Files.newBufferedReader(tempInFile)) {
    try (BufferedReader tsv = uploadedFileBase64String == null
            ? new BufferedReader(new InputStreamReader(uploadedFileBase64String.getInputStream(), "UTF-8"))
            : new Resource(localFilePath).getBufferedReader()) {
        while ((line = tsv.readLine()) != null) {
            CoreLabel tok = new CoreLabel();
            List<String> cells = Arrays.asList(line.split(separator));
            if (cells.size() > 0 && !"".equals(cells.get(0))) {
                tok.setWord(cells.get(0));
                tok.setNER(cells.get(1));
                tok.set(CoreAnnotations.AnswerAnnotation.class, cells.get(1));
                if (cells.size() > 2 && !"".equals(cells.get(2))) {
                    tok.setTag(cells.get(2));
                }
                document.add(tok);
            } else {
                documents.add(document);
                document = new ArrayList<>();
            }
        }
    } catch (FileNotFoundException fe) {
        LOG.error(fe);
    } catch (IOException ioe) {
        LOG.error(ioe);
        throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe);
    }
    return documents;
}