List of usage examples for edu.stanford.nlp.pipeline Annotation toString
@Override
public String toString()
From source file:org.aksw.simba.cetus.annotator.CetusSurfaceFormExtractor.java
License:Open Source License
private void getTypeSurfaceForms(Annotation document, int neStartPos, int neEndPos, List<ExtendedTypedNamedEntity> results) { boolean entityFound = false; StringBuilder parseableTextBuilder = new StringBuilder(); List<CoreLabel> tokens = document.get(TokensAnnotation.class); int tokenPositions[][] = new int[tokens.size()][4]; int id = 0;/*from w ww . j av a 2 s .c o m*/ for (CoreLabel token : tokens) { tokenPositions[id][ORIG_TEXT_START] = token.get(CharacterOffsetBeginAnnotation.class); tokenPositions[id][ORIG_TEXT_END] = token.get(CharacterOffsetEndAnnotation.class); if ((tokenPositions[id][ORIG_TEXT_END] <= neStartPos) || (tokenPositions[id][ORIG_TEXT_START] >= neEndPos)) { if (parseableTextBuilder.length() > 0) { parseableTextBuilder.append(' '); } tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length(); parseableTextBuilder.append(token.getString(TextAnnotation.class)); parseableTextBuilder.append('_'); parseableTextBuilder.append(token.getString(LemmaAnnotation.class)); parseableTextBuilder.append('_'); parseableTextBuilder.append(token.getString(PartOfSpeechAnnotation.class)); tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length(); } else { if (!entityFound) { if (parseableTextBuilder.length() > 0) { parseableTextBuilder.append(' '); } tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length(); parseableTextBuilder.append(TypeExtractor.ENTITY_MARKING); entityFound = true; } else { tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length(); } tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length(); } ++id; } if (!entityFound) { LOGGER.error("Couldn't find the named entity (" + neStartPos + ", " + neEndPos + ") inside the document \"" + document.toString() + "\"."); return; } String parseableText = parseableTextBuilder.toString(); List<String> types = extractor.extractTypeStrings(parseableText); if (types != null) { if (LOGGER.isInfoEnabled()) { LOGGER.info("Found types " + Arrays.toString(types.toArray()) + " inside the sentence \"" + parseableText + "\"."); } generateNEsForTypes(document.get(TokensAnnotation.class), tokenPositions, parseableText, types, results); } else { LOGGER.warn("Extractor was not able to process the text \"" + parseableText + "\"."); } }