List of usage examples for edu.stanford.nlp.ling CoreLabel setTag
@Override public void setTag(String tag)
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
private CoreLabel getCoreLabel(int labelIndex) { if (originalCoreLabels[labelIndex] != null) { CoreLabel terminalLabel = originalCoreLabels[labelIndex]; if (terminalLabel.value() == null && terminalLabel.word() != null) { terminalLabel.setValue(terminalLabel.word()); }/* w w w . j a v a 2 s. co m*/ return terminalLabel; } String wordStr = wordIndex.get(words[labelIndex]); CoreLabel terminalLabel = new CoreLabel(); terminalLabel.setValue(wordStr); terminalLabel.setWord(wordStr); terminalLabel.setBeginPosition(beginOffsets[labelIndex]); terminalLabel.setEndPosition(endOffsets[labelIndex]); if (originalTags[labelIndex] != null) { terminalLabel.setTag(originalTags[labelIndex].tag()); } return terminalLabel; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java
License:Open Source License
protected CoreLabel tokenToWord(Token aToken) { CoreLabel l = CoreNlpUtils.tokenToWord(aToken); l.setValue(aToken.getCoveredText()); if (!readPos) { l.setTag(null); }/*from www . jav a 2 s. c om*/ return l; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.CoreNlpUtils.java
License:Open Source License
public static CoreLabel tokenToWord(Token aToken) { CoreLabel t = new CoreLabel(); t.setOriginalText(aToken.getCoveredText()); t.setWord(aToken.getCoveredText());//from ww w . jav a 2 s .c o m t.setBeginPosition(aToken.getBegin()); t.setEndPosition(aToken.getEnd()); if (aToken.getLemma() != null) { t.setLemma(aToken.getLemma().getValue()); } if (aToken.getPos() != null) { t.setTag(aToken.getPos().getPosValue()); } return t; }
From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java
public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) { List<String> words = document.getSentenceTokenStrs(sentIdx); List<PoSTag> posTags = document.getSentencePoSTags(sentIdx); List<CoreLabel> tokenList = new ArrayList<CoreLabel>(); for (int i = 0; i < words.size(); i++) { /*Re-create Stanford tokens*/ CoreLabel token = new CoreLabel(); token.setWord(words.get(i));// ww w . jav a2s .c o m token.setTag(posTags.get(i).toString()); token.setNER("O"); token.setDocID(document.getName()); token.setSentIndex(sentIdx); token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart()); token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd()); //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition()); tokenList.add(token); } //Add NER labels for sentence List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx); for (Pair<TokenSpan, String> p : ners) { for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) { tokenList.get(k).setNER(p.getSecond()); } } //Convert to Stanford Sentence CoreMap sentence = new ArrayCoreMap(); sentence.set(TokensAnnotation.class, tokenList); sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition()); sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition()); return sentence; }
From source file:lv.pipe.NerTagger.java
License:Open Source License
public static CoreLabel makeCoreLabel(Annotation a) { CoreLabel wi = new CoreLabel(); if (!a.has(LabelText.class) || a.getText().equals(BOUNDARY)) { wi.setWord(BOUNDARY);/*from w w w . ja v a2 s .c o m*/ wi.set(AnswerAnnotation.class, OTHER); wi.set(NamedEntityTagGoldAnnotation.class, OTHER); wi.setLemma("_"); } else { wi.setWord(a.getText()); } wi.setIndex(a.get(LabelIndex.class, -1)); wi.setLemma(a.get(LabelLemma.class, "_")); wi.set(LVFullTagAnnotation.class, a.get(LabelPosTag.class, "_")); wi.setTag(a.get(LabelPosTagSimple.class, "_")); wi.set(MorphologyFeatureStringAnnotation.class, a.get(LabelMorphoFeatures.class, "_")); wi.set(ParentAnnotation.class, Integer.toString((Integer) a.get(LabelParent.class, -1))); wi.set(LabelAnnotation.class, a.get(LabelDependency.class, "_")); return wi; }
From source file:org.exist.xquery.corenlp.TrainClassifier.java
License:Open Source License
private Collection<List<CoreLabel>> readODSSpreadsheet(final String localFilePath) throws XPathException { Collection<List<CoreLabel>> documents = new ArrayList<>(); List<CoreLabel> document = new ArrayList<>(); //try (InputStream is = Files.newInputStream(tempInFile)) { try (InputStream is = uploadedFileBase64String != null ? uploadedFileBase64String.getInputStream() : new Resource(localFilePath).getInputStream()) { SpreadSheet spreadSheet = ODPackage.createFromStream(is, "UserAnnotatedDocument").getSpreadSheet(); Sheet sheet = spreadSheet.getSheet(0); for (int i = 0; i < sheet.getRowCount(); i++) { CoreLabel row = new CoreLabel(); String value1 = sheet.getValueAt(0, i).toString(); String value2 = sheet.getValueAt(1, i).toString(); row.setWord(value1);//from w w w . j a v a2 s . c o m row.setNER(value2); row.set(CoreAnnotations.AnswerAnnotation.class, value2); if (sheet.getColumnCount() > 2) { String value3 = sheet.getValueAt(2, i).toString(); if (!"".equals(value3) && tagCol > -1) { row.setTag(value3); } } if (!"".equals(value1)) { document.add(row); } else { documents.add(document); document = new ArrayList<>(); } } } catch (IOException ioe) { throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe); } return documents; }
From source file:org.exist.xquery.corenlp.TrainClassifier.java
License:Open Source License
private Collection<List<CoreLabel>> readXLSXSpreadsheet(final String localFilePath, final InputDocType inputFormat) throws XPathException { Workbook workbook = null;/*www .ja va 2 s. c o m*/ Collection<List<CoreLabel>> documents = new ArrayList<>(); List<CoreLabel> document = new ArrayList<>(); String fileName = "localFilePath"; String extraSuffix = (inputFormat != InputDocType.XLSX) ? "" : "x"; //try (InputStream is = Files.newInputStream(tempInFile)) { try (InputStream is = uploadedFileBase64String == null ? uploadedFileBase64String.getInputStream() : new Resource(fileName + extraSuffix).getInputStream()) { if (inputFormat == InputDocType.XLSX) { workbook = new XSSFWorkbook(is); } else { workbook = new HSSFWorkbook(is); } } catch (FileNotFoundException fe) { LOG.error(fe); } catch (IOException ioe) { LOG.error(ioe); throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe); } org.apache.poi.ss.usermodel.Sheet sheet = workbook.getSheetAt(0); Row row; Cell cell; Iterator rows = sheet.rowIterator(); while (rows.hasNext()) { CoreLabel tok = new CoreLabel(); row = (Row) rows.next(); Iterator cells = row.cellIterator(); int cellPos = 0; while (cells.hasNext()) { cell = (Cell) cells.next(); //if (cell.getCellType() == Cell.CELL_TYPE_STRING) { switch (cellPos) { case 0: tok.setWord(cell.getStringCellValue()); break; case 1: tok.setNER(cell.getStringCellValue()); tok.set(CoreAnnotations.AnswerAnnotation.class, cell.getStringCellValue()); break; case 2: tok.setTag(cell.getStringCellValue()); break; default: break; } //} else if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC) { //LOG.error("Cell has numeric value:" + cell.getNumericCellValue()); //} cellPos++; } if (!"".equals(tok.word())) { document.add(tok); } else { documents.add(document); document = new ArrayList<>(); } } return documents; }
From source file:org.exist.xquery.corenlp.TrainClassifier.java
License:Open Source License
private Collection<List<CoreLabel>> readTSVSpreadsheet(final String localFilePath) throws XPathException { String separator = "\t"; String line;//w w w. jav a 2 s . com Collection<List<CoreLabel>> documents = new ArrayList<>(); List<CoreLabel> document = new ArrayList<>(); //try (BufferedReader tsv = Files.newBufferedReader(tempInFile)) { try (BufferedReader tsv = uploadedFileBase64String == null ? new BufferedReader(new InputStreamReader(uploadedFileBase64String.getInputStream(), "UTF-8")) : new Resource(localFilePath).getBufferedReader()) { while ((line = tsv.readLine()) != null) { CoreLabel tok = new CoreLabel(); List<String> cells = Arrays.asList(line.split(separator)); if (cells.size() > 0 && !"".equals(cells.get(0))) { tok.setWord(cells.get(0)); tok.setNER(cells.get(1)); tok.set(CoreAnnotations.AnswerAnnotation.class, cells.get(1)); if (cells.size() > 2 && !"".equals(cells.get(2))) { tok.setTag(cells.get(2)); } document.add(tok); } else { documents.add(document); document = new ArrayList<>(); } } } catch (FileNotFoundException fe) { LOG.error(fe); } catch (IOException ioe) { LOG.error(ioe); throw new XPathException(this, "Error while reading spreadsheet document: " + ioe.getMessage(), ioe); } return documents; }