List of usage examples for edu.stanford.nlp.ling CoreLabel CoreLabel
@SuppressWarnings("rawtypes") public CoreLabel(Class[] keys, String[] values)
From source file:tr.edu.gsu.nerwip.recognition.internal.modelbased.stanford.StanfordTrainer.java
License:Open Source License
/** * Split the text and create a list of CoreLabel objects corresponding * to the words it contains. All of them have the specified type. * /* w w w . j a va2 s . c o m*/ * @param text * Text to be split. * @param type * Type of the CoreLabel object to create. * @return * A list of newly created CoreLabel objects. */ private List<CoreLabel> extractCoreLabels(String text, EntityType type) { List<CoreLabel> result = new ArrayList<CoreLabel>(); // String splitText[] = text.split("\\W+"); // \W represents all non-word characters, i.e not letters, digits or underscore // String splitText[] = text.split("[^\\p{L}0-9]+"); // this one handles diacritics but separators disappear from the array String splitText[] = text.split("((?!^)\\b)| "); // this ones uses word boundaries (\b) and keeps separators for (String word : splitText) { word = word.trim(); if (!word.isEmpty()) { String typeStr = CONVERSION_MAP.get(type); String values[] = { word, typeStr }; CoreLabel coreLabel = new CoreLabel(KEYS, values); result.add(coreLabel); } } return result; }