List of usage examples for edu.stanford.nlp.ling StringLabel StringLabel
public StringLabel(Label label)
StringLabel
with the value()
of another label as its label. From source file:qmul.align.TurnConcatSimilarityMeasure.java
License:Open Source License
/** * @param t/*from ww w . j a va 2s . c o m*/ * @return the result of concatenating all sentences linearly for transcription, as daughters of a "TURN" mother for * syntax (unless there's just one, in which case it's just copied without a TURN mother, to prevent false * positive similarity between single-sent turns) */ private DialogueSentence concatTurn(DialogueTurn t) { DialogueSentence cs = new DialogueSentence(null, 0, t, ""); System.out.print("Concatenating sentences for turn " + t.getId()); for (DialogueSentence s : t.getSents()) { System.out.print("."); if (s.getTranscription() != null) { cs.setTranscription((cs.getTranscription() + " " + s.getTranscription()).trim()); } if (s.getTokens() != null) { if (cs.getTokens() == null) { cs.setTokens(s.getTokens()); } else { cs.getTokens().addAll(s.getTokens()); } } if (s.getSyntax() != null) { Tree tree; if (cs.getSyntax() == null) { tree = s.getSyntax(); } else { ArrayList<Tree> dtrs = new ArrayList<Tree>(); if (cs.getSyntax().label().value().equals("TURN")) { for (Tree child : cs.getSyntax().getChildrenAsList()) { dtrs.add(child); } } else { dtrs.add(cs.getSyntax()); } dtrs.add(s.getSyntax()); tree = new LabeledScoredTreeNode(new StringLabel("TURN"), dtrs); } cs.setSyntax(tree); } if (!Double.isNaN(s.getSyntaxProb())) { cs.setSyntaxProb(Double.isNaN(cs.getSyntaxProb()) ? s.getSyntaxProb() : (cs.getSyntaxProb() * s.getSyntaxProb())); } } System.out.println(" done."); return cs; }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java
License:Open Source License
/** * @param reader/* ww w. ja v a2 s . c o m*/ * a {@link BufferedReader} * @return the Stanford {@link Tree} */ public static Tree makeTree(BufferedReader reader) { if (options == null) { setDefaultOptions(); } NodeFilter nodeFilter = new NodeFilter(); String line = null; boolean doingTree = false; boolean doingWords = false; HashMap<Integer, Tree> leaves = new HashMap<Integer, Tree>(); Tree currentNode = null; Tree rootNode = null; int treeLevel = 0; try { while ((line = reader.readLine()) != null) { line = line.trim(); // first we need to get the ccg/2 tree structure if (line.startsWith("ccg(")) { doingTree = true; doingWords = false; treeLevel = 1; // nothing useful on the actual ccg functor line continue; } // next the w/8 word definitions if (line.startsWith("w(")) { if (!doingTree && !doingWords) { // if we've hit the word definitions without seeing a tree, stop return null; } doingTree = false; doingWords = true; } if (doingTree) { Matcher m = LEAF_PAT.matcher(line); if (m.find()) { // System.out.println("matched leaf " + line); Tree nonTerminal = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = nonTerminal; } else { currentNode.addChild(nonTerminal); } Tree leaf = tf.newLeaf("DUMMY"); nonTerminal.addChild(leaf); leaves.put(Integer.parseInt(m.group(2)), leaf); // adjust currentNode int numOpening = line.replaceAll("[^(]", "").length(); int numClosing = line.replaceAll("\\)\\.$", "").replaceAll("[^)]", "").length(); int levelChange = numOpening - numClosing; if (levelChange > 0) { throw new RuntimeException("deepening with leaf node!"); } else if (levelChange < 0) { do { // System.out.println("cu node " + currentNode.label()); currentNode = currentNode.parent(rootNode); // System.out.println("up node " + (currentNode == null ? null : currentNode.label())); treeLevel--; levelChange++; } while (levelChange < 0); } continue; } m = RULE_PAT.matcher(line); if (m.find()) { // System.out.println("matched rule " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(2)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = LEXR_PAT.matcher(line); if (m.find()) { // System.out.println("matched lexr " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } m = CONJ_PAT.matcher(line); if (m.find()) { // System.out.println("matched conj " + line); treeLevel++; Tree node = tf.newTreeNode(getSynLabel(m.group(4)), new ArrayList<Tree>()); if (rootNode == null) { rootNode = node; } if (currentNode != null) { currentNode.addChild(node); } currentNode = node; // System.out.println("current node " + node.label()); continue; } throw new RuntimeException("no match for line " + line); } if (doingWords) { Matcher m = WORD_PAT.matcher(line); if (m.find()) { Tree leaf = leaves.get(Integer.parseInt(m.group(2))); if (leaf == null) { throw new RuntimeException("Missing leaf " + m.group(2)); } leaf.setLabel(new StringLabel(m.group(3))); leaves.remove(Integer.parseInt(m.group(2))); } else { if (line.isEmpty()) { doingWords = false; if (!leaves.isEmpty()) { throw new RuntimeException("unmatched leaves " + leaves); } continue; } else { throw new RuntimeException("strange word line " + line); } } continue; } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(0); } // prune to (optionally) remove punctuation nodes etc, then flatten to remove their dedicated parents if (rootNode != null) { // System.out.println(); // System.out.println("raw tree " + rootNode.pennString()); // System.out.println("pru tree " + rootNode.prune(nodeFilter).pennString()); // System.out.println("fla tree " + rootNode.prune(nodeFilter).flatten().pennString()); // rootNode = rootNode.prune(nodeFilter).flatten(); } return rootNode; }
From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java
License:Open Source License
/** * @param str//from w w w . j ava 2s .c o m * @return a {@link StringLabel} based on the string, optionally removing [subcats] i.e. S[dcl]/S[b] -> S/S */ private static Label getSynLabel(String str) { if (getOption(REMOVE_SQUARE_BRACKET_SUBCATS)) { str = str.replaceAll("\\[(.*?)\\]", ""); } return new StringLabel(str); }
From source file:qmul.util.parse.StanfordParser.java
License:Open Source License
/** * Convenience method: splits utt into sentences, uses {@link LexicalizedParser}'s parse() to tokenize and parse * each sentence/*from w w w . j av a2s.co m*/ * * @param utt * @return a {@link Tree} with ROOT node, with the getBestParse() trees for each sentence as children */ public Tree parse(String utt) { String[] sentences = utt.split("[.!?]"); // System.out.println("there are sentences:" + sentences.length); // LinkedList<Tree> list=new LinkedList<Tree>(); Label rootLabel = new StringLabel("ROOT"); Tree concat = new LabeledScoredTreeNode(rootLabel, new LinkedList<Tree>()); try { for (int i = 0; i < sentences.length; i++) { boolean parsed = false; if (sentences[i].length() > 0) parsed = lp.parse(sentences[i]); else continue; Tree t = lp.getBestParse(); Tree rootChild; if (t.children().length == 1) rootChild = t.removeChild(0); else rootChild = t; concat.addChild(rootChild); } if (concat.children().length > 1) return concat; else return concat.removeChild(0); } catch (Throwable t) { System.out.println(t.getMessage()); System.out.println("Reinitializing parser because of trying to parse error " + utt); this.lp = null; Runtime r = Runtime.getRuntime(); r.gc(); lp = new LexicalizedParser(System.getProperty("user.dir") + File.separator + "utils" + File.separator + "englishPCFG.ser.gz"); this.lp.setOptionFlags(new String[] { "-maxLength", "100", "-retainTmpSubcategories" }); return null; } }
From source file:reck.corpora.DocumentImpl.java
License:Open Source License
public RECKParseTreeImpl findDPHeadWord(RECKParseTreeImpl parseTree, MentionImpl mention) { /*if (mention.getId().equals("54-86") || mention.getId().equals("52-85")) System.out.println();*///from w ww .j a v a2 s . com RECKDPTreeNodeImpl DPTreeNode = parseTree.getDPParseTree(); int start = mention.getHead().getStart().intValue(); int end = mention.getHead().getEnd().intValue(); ArrayList leaves = parseTree.getDPTreeList(); int n = leaves.size(), i = 0, j = n - 1, k; int leftIndex = ((RECKDPTreeNodeImpl) leaves.get(i)).getPosition().getStart().intValue(); int rightIndex = ((RECKDPTreeNodeImpl) leaves.get(j)).getPosition().getEnd().intValue(); int leftID = leftIndex, rightID = rightIndex; RECKDPTreeNodeImpl leftNode = (RECKDPTreeNodeImpl) leaves.get(i), rightNode = (RECKDPTreeNodeImpl) leaves.get(j); while (i < n - 1 && leftIndex < start) { i++; leftID = leftIndex; leftNode = (RECKDPTreeNodeImpl) leaves.get(i); leftIndex = leftNode.getPosition().getStart().intValue(); } while (j > 0 && end < rightIndex) { j--; rightID = rightIndex; rightNode = (RECKDPTreeNodeImpl) leaves.get(j); rightIndex = rightNode.getPosition().getEnd().intValue(); } if ((leftIndex > start) && (leftID == start - 1)) { i--; leftNode = (RECKDPTreeNodeImpl) leaves.get(i); leftIndex = leftNode.getPosition().getStart().intValue(); } if ((end > rightIndex) && (rightID == end + 1)) { j++; rightNode = (RECKDPTreeNodeImpl) leaves.get(j); rightIndex = rightNode.getPosition().getEnd().intValue(); } leftID = i; rightID = j; RECKDPTreeNodeImpl terminal = null; if (leftID < rightID) { for (k = rightID; k >= leftID; k--) { terminal = (RECKDPTreeNodeImpl) leaves.get(k); // re-define the head word of the mention when a preposition exists if (terminal.role().equals("prep") && (k > leftID)) { k--; break; } } } else { k = leftID; } RECKDPTreeNodeImpl origin = (k >= leftID) ? (RECKDPTreeNodeImpl) leaves.get(k) : (RECKDPTreeNodeImpl) leaves.get(rightID); if (k >= leftID) { origin = (RECKDPTreeNodeImpl) leaves.get(k); rightIndex = origin.getPosition().getEnd().intValue(); rightID = k; } else { origin = (RECKDPTreeNodeImpl) leaves.get(rightID); } StringLabel newLabel = new StringLabel(mention.getEntity().getType()); Charseq newPosition = null; RECKDPTreeNodeImpl newNode = null; newPosition = origin.getPosition().clone(); mention.setHeadword(RECKConstants.trimReturn(noTaggedContent .substring(origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue()))); mention.setHwPosition(new Charseq(origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue() - 1)); RECKDPTreeNodeImpl upper = origin.parent(DPTreeNode); Tree[] newChildren = { origin }; int index = upper.indexOf(origin); newNode = new RECKDPTreeNodeImpl(newLabel, newChildren, newPosition); upper.setChild(index, newNode); ArrayList nodeList = (ArrayList) parseTree.getDPEntityTrees().get(mention); if (nodeList == null) { nodeList = new ArrayList(); nodeList.add(newNode); parseTree.getDPEntityTrees().put(mention, nodeList); } return parseTree; }
From source file:reck.corpora.DocumentImpl.java
License:Open Source License
public RECKParseTreeImpl findCTHeadWord(RECKParseTreeImpl parseTree, MentionImpl mention) { RECKCTTreeNodeImpl CTTreeNode = parseTree.getCTParseTree(); int start = mention.getHead().getStart().intValue(); int end = mention.getHead().getEnd().intValue(); ArrayList leaves = new ArrayList(CTTreeNode.getLeaves()); int n = leaves.size(), i = 0, j = n - 1, k; int leftIndex = ((RECKCTTreeNodeImpl) leaves.get(i)).getPosition().getStart().intValue(); int rightIndex = ((RECKCTTreeNodeImpl) leaves.get(j)).getPosition().getEnd().intValue(); int leftID = leftIndex, rightID = rightIndex; RECKCTTreeNodeImpl leftNode = (RECKCTTreeNodeImpl) leaves.get(i), rightNode = (RECKCTTreeNodeImpl) leaves.get(j); while (i < n - 1 && leftIndex < start) { i++;/*from www . j a va2s . c o m*/ leftID = leftIndex; leftNode = (RECKCTTreeNodeImpl) leaves.get(i); leftIndex = leftNode.getPosition().getStart().intValue(); } while (j > 0 && end < rightIndex) { j--; rightID = rightIndex; rightNode = (RECKCTTreeNodeImpl) leaves.get(j); rightIndex = rightNode.getPosition().getEnd().intValue(); } if ((leftIndex > start) && (leftID == start - 1)) { i--; leftNode = (RECKCTTreeNodeImpl) leaves.get(i); leftIndex = leftNode.getPosition().getStart().intValue(); } if ((end > rightIndex) && (rightID == end + 1)) { j++; rightNode = (RECKCTTreeNodeImpl) leaves.get(j); rightIndex = rightNode.getPosition().getEnd().intValue(); } leftID = i; rightID = j; RECKCTTreeNodeImpl terminal = null; RECKCTTreeNodeImpl preTerminal = null; RECKCTTreeNodeImpl prePreTerminal = null; if (leftID < rightID) { for (k = rightID; k >= leftID; k--) { terminal = (RECKCTTreeNodeImpl) leaves.get(k); preTerminal = terminal.parent(CTTreeNode); prePreTerminal = preTerminal.parent(CTTreeNode); // re-define the head word of the mention when a preposition exists if ((preTerminal.label().value().equals("IN")) && (prePreTerminal.label().value().equals("PP")) && k > leftID) { k--; break; } } } else { k = leftID; } RECKCTTreeNodeImpl origin = (k >= leftID) ? (RECKCTTreeNodeImpl) leaves.get(k) : (RECKCTTreeNodeImpl) leaves.get(rightID); if (k >= leftID) { origin = (RECKCTTreeNodeImpl) leaves.get(k); rightIndex = origin.getPosition().getEnd().intValue(); rightID = k; } else { origin = (RECKCTTreeNodeImpl) leaves.get(rightID); } RECKCTTreeNodeImpl upper = origin.parent(CTTreeNode); while ((upper.getPosition().getStart().intValue() >= leftIndex) && (upper.getPosition().getEnd().intValue() == rightIndex)) { origin = upper; upper = upper.parent(CTTreeNode); } StringLabel newLabel = new StringLabel(mention.getEntity().getType()); Charseq newPosition = null; RECKCTTreeNodeImpl newNode = null; /** The case where upper covers more than mention head */ if ((upper.getPosition().getStart().intValue() < leftIndex) && (origin.getPosition().getStart().intValue() > leftIndex)) { RECKCTTreeNodeImpl child = origin; int r = upper.indexOf(child), l = r; while (child.getPosition().getStart().intValue() > leftIndex) { l--; child = (RECKCTTreeNodeImpl) upper.getChild(l); } if (child.getPosition().getStart().intValue() == leftIndex) { RECKCTTreeNodeImpl leftChild = (RECKCTTreeNodeImpl) upper.getChild(l); RECKCTTreeNodeImpl rightChild = (RECKCTTreeNodeImpl) upper.getChild(r); leftIndex = leftChild.getPosition().getStart().intValue(); rightIndex = rightChild.getPosition().getEnd().intValue(); newPosition = new Charseq(leftIndex, rightIndex); mention.setHeadword(RECKConstants.trimReturn(noTaggedContent.substring(leftIndex, rightIndex))); mention.setHwPosition(new Charseq(leftIndex, rightIndex - 1)); Tree[] children = new Tree[r - l + 1]; for (int m = l; m < r + 1; m++) children[m - l] = upper.getChild(m); newNode = new RECKCTTreeNodeImpl(newLabel, children, newPosition); Tree[] newChildren = new Tree[upper.numChildren() - newNode.numChildren() + 1]; for (int m = 0; m < l; m++) { newChildren[m] = upper.getChild(m); } newChildren[l] = newNode; for (int m = r + 1; m < upper.numChildren(); m++) { newChildren[m - r + l] = upper.getChild(m); } upper.setChildren(newChildren); } } if (newNode == null) { newPosition = origin.getPosition().clone(); mention.setHeadword(RECKConstants.trimReturn(noTaggedContent.substring( origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue()))); mention.setHwPosition(new Charseq(origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue() - 1)); if (origin.isPreTerminal()) { Tree[] newChildren = { origin }; int index = upper.indexOf(origin); newNode = new RECKCTTreeNodeImpl(newLabel, newChildren, newPosition); upper.setChild(index, newNode); } else { newNode = new RECKCTTreeNodeImpl(newLabel, origin.children(), newPosition); Tree[] newChildren = { newNode }; origin.setChildren(newChildren); } } ArrayList nodeList = (ArrayList) parseTree.getCTEntityTrees().get(mention); if (nodeList == null) { nodeList = new ArrayList(); nodeList.add(newNode); parseTree.getCTEntityTrees().put(mention, nodeList); } return parseTree; }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public void computePosition(int start, Sentence sentence, String content) { int docIndex = start; String st = null;/*from w ww . j av a 2 s . co m*/ reckTreeList = new ArrayList(); for (int i = 0; i < sentence.size(); i++) { st = ((Word) sentence.get(i)).toString(); int index = content.indexOf(st, docIndex); if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) { if (st.indexOf("&") != -1) { String tmp = st.replaceAll("&", "&"); index = content.indexOf(tmp, docIndex); if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) { tmp = st.replaceAll("&", "&"); index = content.indexOf(tmp, docIndex); } } if (index != -1 && index - docIndex <= maxDistanceBetweenLeaves) { docIndex = index + st.length() + 4; } else { st = reConvert(st); index = content.indexOf(st, docIndex); if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) { if (st.equals("-LRB-") || st.equals("-LCB-")) { int i1 = content.indexOf("(", docIndex); int i2 = content.indexOf("[", docIndex); int i3 = content.indexOf("{", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "{"; index = i3; } else { // st = "("; index = i1; } } else { if (i3 < i2) { // st = "{"; index = i3; } else { // st = "["; index = i2; } } docIndex = index + 1; } else if (st.equals("-RRB-") || st.equals("-RCB-")) { int i1 = content.indexOf(")", docIndex); int i2 = content.indexOf("]", docIndex); int i3 = content.indexOf("}", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "}"; index = i3; } else { // st = ")"; index = i1; } } else { if (i3 < i2) { // st = "}"; index = i3; } else { // st = "]"; index = i2; } } docIndex = index + 1; } else { for (int k = 0; k < newStrings.length; k++) { st = st.replace(newStrings[k], oldStrings[k]); } String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 }); String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 }); String newSubSt = new String(new char[] { (char) 34 }); if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt1, newSubSt); else if (st.indexOf(oldSubSt2) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt2, newSubSt); int i39 = content.indexOf(39, docIndex); int i96 = content.indexOf(96, docIndex); if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 39, (char) 96); else if ((st.indexOf(96) != -1) && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 96, (char) 39); index = content.indexOf(st, docIndex); if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) System.out.println("Come here !"); else docIndex = index + st.length(); } } else docIndex = index + st.length(); } } else docIndex = index + st.length(); // Test if next node is a sentence splitter, means "." if (st.endsWith(".") && i < sentence.size() - 1) { String nextLabel = ((Word) sentence.get(i + 1)).toString(); int nextIndex = content.indexOf(nextLabel, docIndex); if (nextLabel.equals(".") && (nextIndex == -1 || nextIndex - docIndex > maxDistanceBetweenLeaves)) { docIndex--; st = st.substring(0, st.length() - 2); } } // ((Word)sentence.get(i)).setWord(st); RECKDPTreeNodeImpl reckNode = new RECKDPTreeNodeImpl(new StringLabel(st), new Charseq(index, docIndex)); reckTreeList.add(reckNode); } sentencePosition = new Charseq(start, docIndex); }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public RECKCTTreeNodeImpl convertToRECKTree(Tree root, int startSentence, String content) { RECKCTTreeNodeImpl newRoot = null;/*from w ww . j a v a 2s . c o m*/ Charseq pos = null; List nodeList = root.getLeaves(); HashSet parentSet = new HashSet(); int docIndex = startSentence; String st = null; // compute leaves' positions for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); st = oldNode.toString(); int start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.indexOf("&") != -1) { String tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { tmp = st.replaceAll("&", "&"); start = content.indexOf(tmp, docIndex); } } if (start != -1 && start - docIndex <= maxDistanceBetweenLeaves) { docIndex = start + st.length() + 4; } else { st = reConvert(st); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) { if (st.equals("-LRB-") || st.equals("-LCB-")) { int i1 = content.indexOf("(", docIndex); int i2 = content.indexOf("[", docIndex); int i3 = content.indexOf("{", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "{"; start = i3; } else { // st = "("; start = i1; } } else { if (i3 < i2) { // st = "{"; start = i3; } else { // st = "["; start = i2; } } docIndex = start + 1; } else if (st.equals("-RRB-") || st.equals("-RCB-")) { int i1 = content.indexOf(")", docIndex); int i2 = content.indexOf("]", docIndex); int i3 = content.indexOf("}", docIndex); if (i1 == -1) i1 = content.length(); if (i2 == -1) i2 = content.length(); if (i3 == -1) i3 = content.length(); if ((i1 == i2) && (i1 == i3)) System.out.println("Come here !"); else if (i1 < i2) { if (i3 < i1) { // st = "}"; start = i3; } else { // st = ")"; start = i1; } } else { if (i3 < i2) { // st = "}"; start = i3; } else { // st = "]"; start = i2; } } docIndex = start + 1; } else { for (int k = 0; k < newStrings.length; k++) { st = st.replace(newStrings[k], oldStrings[k]); } String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 }); String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 }); String newSubSt = new String(new char[] { (char) 34 }); if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt1, newSubSt); else if (st.indexOf(oldSubSt2) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1) st = st.replace(oldSubSt2, newSubSt); int i39 = content.indexOf(39, docIndex); int i96 = content.indexOf(96, docIndex); if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 39, (char) 96); else if ((st.indexOf(96) != -1) && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves)) st = st.replace((char) 96, (char) 39); start = content.indexOf(st, docIndex); if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) System.out.println("Come here !"); else docIndex = start + st.length(); } } else docIndex = start + st.length(); } } else docIndex = start + st.length(); // Test if next node is a sentence splitter, means "." if (st.endsWith(".") && i < nodeList.size() - 1) { Tree nextNode = (Tree) nodeList.get(i + 1); String nextLabel = nextNode.label().value(); int nextStart = content.indexOf(nextLabel, docIndex); if (nextLabel.equals(".") && (nextStart == -1 || nextStart - docIndex > maxDistanceBetweenLeaves)) { docIndex--; oldNode.setLabel(new StringLabel(st.substring(0, st.length() - 1))); } } pos = new Charseq(start, docIndex); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(new StringLabel(st), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); } nodeList.clear(); nodeList.addAll(parentSet); // compute upper nodes' positions while (!nodeList.isEmpty()) { parentSet = new HashSet(); for (int i = 0; i < nodeList.size(); i++) { Tree oldNode = (Tree) nodeList.get(i); Iterator nodeIter = oldNode.getChildrenAsList().iterator(); Tree node = (Tree) nodeIter.next(); while (node instanceof RECKCTTreeNodeImpl && nodeIter.hasNext()) { node = (Tree) nodeIter.next(); } if (node instanceof RECKCTTreeNodeImpl) { Long start = ((RECKCTTreeNodeImpl) oldNode.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) oldNode.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(oldNode.label(), (List) oldNode.getChildrenAsList(), pos); Tree parent = oldNode.parent(root); parent.setChild(parent.indexOf(oldNode), newNode); parentSet.add(parent); // if oldNode is in parentSet, remove it if (parentSet.contains(oldNode)) { parentSet.remove(oldNode); } } else { parentSet.add(oldNode); } } nodeList.clear(); if (parentSet.size() == 1 && parentSet.contains(root)) { Long start = ((RECKCTTreeNodeImpl) root.firstChild()).getPosition().getStart(); Long end = ((RECKCTTreeNodeImpl) root.lastChild()).getPosition().getEnd(); pos = new Charseq(start, end); newRoot = new RECKCTTreeNodeImpl(root.label(), (List) root.getChildrenAsList(), pos); } else { nodeList.addAll(parentSet); } } return newRoot; }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public RECKCTTreeNodeImpl[] splitHyphenSt_Constituent(RECKCTTreeNodeImpl node, RECKCTTreeNodeImpl parent) { String label = node.label().value(); String subSt[] = label.split("-"); int n = subSt.length; long index = node.getPosition().getStart(); RECKCTTreeNodeImpl preTerminalNode[] = new RECKCTTreeNodeImpl[2 * n - 1]; for (int i = 0; i < n; i++) { StringLabel leafLb = new StringLabel(subSt[i]); Charseq leafPos = new Charseq(index, index + subSt[i].length()); RECKCTTreeNodeImpl leafNode = new RECKCTTreeNodeImpl(leafLb, leafPos); preTerminalNode[2 * i] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()), new RECKCTTreeNodeImpl[] { leafNode }, leafPos); index += subSt[i].length();//w w w . j ava2 s. co m if (i < n - 1) { StringLabel hyphenLb = new StringLabel("-"); Charseq hyphenPos = new Charseq(index, index + 1); RECKCTTreeNodeImpl hyphenNode = new RECKCTTreeNodeImpl(hyphenLb, hyphenPos); preTerminalNode[2 * i + 1] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()), new RECKCTTreeNodeImpl[] { hyphenNode }, hyphenPos); index++; } } return preTerminalNode; }
From source file:reck.parser.lexparser.RECKLexicalizedParser.java
License:Open Source License
public RECKCTTreeNodeImpl[] splitPointSt_Constituent(RECKCTTreeNodeImpl node, RECKCTTreeNodeImpl parent) { String label = node.label().value(); int startNode = node.getPosition().getStart().intValue(); int endNode = node.getPosition().getEnd().intValue(); int lenNode = label.length(); RECKCTTreeNodeImpl preTerminalNode[] = new RECKCTTreeNodeImpl[2]; StringLabel leafLb = new StringLabel(label.substring(0, lenNode - 1)); Charseq leafPos = new Charseq(startNode, endNode - 1); RECKCTTreeNodeImpl leafNode = new RECKCTTreeNodeImpl(leafLb, leafPos); preTerminalNode[0] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()), new RECKCTTreeNodeImpl[] { leafNode }, leafPos); StringLabel pointLb = new StringLabel("."); Charseq pointPos = new Charseq(endNode - 1, endNode); RECKCTTreeNodeImpl pointNode = new RECKCTTreeNodeImpl(pointLb, pointPos); preTerminalNode[1] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()), new RECKCTTreeNodeImpl[] { pointNode }, pointPos); return preTerminalNode; }