Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.ctakes.temporal.ae.feature; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor; import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.cleartk.ml.Feature; import org.cleartk.timeml.util.TimeWordsExtractor; import org.springframework.util.StringUtils; import com.google.common.base.Charsets; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import com.google.common.io.Resources; public class CheckSpecialWordRelationExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation, IdentifiedAnnotation> { //final static List<String> specialWd = Arrays.asList("before","prior","previous","previously","ago","soon","earlier","early","after","later","subsequent","follow","following","followed","post","since","back","start","started","by","past","starting"); private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/TimeLexicon.csv"; private Multimap<String, String> specialWd; public CheckSpecialWordRelationExtractor() { this.specialWd = ArrayListMultimap.create(); URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH); try { for (String line : Resources.readLines(url, Charsets.US_ASCII)) { String[] WordAndType = line.split(","); if (WordAndType.length != 2) { throw new IllegalArgumentException("Expected '<word>,<type>', found: " + line); } this.specialWd.put(WordAndType[0], WordAndType[1]); } } catch (IOException e) { System.err.println("TimeLexicon resource initialization error."); } } @Override public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { List<Feature> feats = new ArrayList<>(); // swap the order if necessary: if (isBefore(arg2, arg1)) { IdentifiedAnnotation temp = arg1; arg1 = arg2; arg2 = temp; } else if (isBefore(arg1, arg2)) { //keep the order of arg1 arg2 } else { return feats; //don't do anything if arg1 overlap arg2 } //1 get covering sentence: // Map<IdentifiedAnnotation, Collection<Sentence>> coveringMap = // JCasUtil.indexCovering(jcas, IdentifiedAnnotation.class, Sentence.class); // int begin = arg1.getEnd(); int end = arg2.getBegin(); // int window = 30; // // //get two covering sentences for arg1 and arg2, two arguments could come from different sentences. // List<Sentence> sentList = new ArrayList<>(); // sentList.addAll(coveringMap.get(arg1)); // if(sentList.isEmpty()) return feats; // Sentence arg1Sent = sentList.get(0); // // sentList = new ArrayList<>(); // sentList.addAll(coveringMap.get(arg2)); // if(sentList.isEmpty()) return feats; // Sentence arg2Sent = sentList.get(0); String textInBetween = null; // String textAfterArg1 = null; // String textBeforeArg2 = null; // if(end-begin <= 2* window){ textInBetween = jcas.getDocumentText().substring(begin, end).replaceAll("[\r\n]", " ").toLowerCase(); // }else{ // int arg1tail = Math.min(begin + window, arg1Sent.getEnd()); // textAfterArg1 = jcas.getDocumentText().substring(begin, arg1tail).replaceAll("[\r\n]", " ").toLowerCase(); // int arg2head = Math.max(end - window, arg2Sent.getBegin()); // textBeforeArg2 = jcas.getDocumentText().substring(arg2head, end).replaceAll("[\r\n]", " ").toLowerCase(); // } // int arg1head = Math.max(arg1.getBegin()-window, arg1Sent.getBegin()); // String textBeforeArg1 = jcas.getDocumentText().substring(arg1head, arg1.getBegin()).replaceAll("[\r\n]", " ").toLowerCase(); // int arg2tail = Math.min(arg2.getEnd()+window, arg2Sent.getEnd()); // String textAfterArg2 = jcas.getDocumentText().substring(arg2.getEnd(), arg2tail).replaceAll("[\r\n]", " ").toLowerCase(); String textInArg1 = jcas.getDocumentText().substring(arg1.getBegin(), arg1.getEnd()) .replaceAll("[\r\n]", " ").toLowerCase(); String textInArg2 = jcas.getDocumentText().substring(arg2.getBegin(), arg2.getEnd()) .replaceAll("[\r\n]", " ").toLowerCase(); for (String lexicon : specialWd.keySet()) { if (textInBetween != null && textInBetween.matches(".*\\b" + lexicon + "\\b.*")) { String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); Feature feature = new Feature("SpecialWd_InBetween", type); feats.add(feature); } // if( textBeforeArg1.matches(".*\\b"+lexicon+"\\b.*")){ // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); // Feature feature = new Feature("SpecialWd_BeforeArg1", type); // feats.add(feature); // } if (textInArg1.matches(".*\\b" + lexicon + "\\b.*")) { String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); Feature feature = new Feature("SpecialWd_InArg1", type); feats.add(feature); } // if( textAfterArg1 != null && textAfterArg1.matches(".*\\b"+lexicon+"\\b.*")){ // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); // Feature feature = new Feature("SpecialWd_AfterArg1", type); // feats.add(feature); // } // if( textBeforeArg2 != null && textBeforeArg2.matches(".*\\b"+lexicon+"\\b.*")){ // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); // Feature feature = new Feature("SpecialWd_BeforeArg2", type); // feats.add(feature); // } if (textInArg2.matches(".*\\b" + lexicon + "\\b.*")) { String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); Feature feature = new Feature("SpecialWd_InArg2", type); feats.add(feature); } // if( textAfterArg2.matches(".*\\b"+lexicon+"\\b.*")){ // String type = StringUtils.collectionToCommaDelimitedString(specialWd.get(lexicon)); // Feature feature = new Feature("SpecialWd_AfterArg2", type); // feats.add(feature); // } } //logger.info("found nearby verb's pos tag: "+ verbTP); return feats; } private static boolean isBefore(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) { if (arg1.getBegin() < arg2.getBegin()) { if (arg1.getEnd() < arg2.getBegin()) { return true; } } return false; } }