Java tutorial
/* * Copyright: (c) 2004-2006 Mayo Foundation for Medical Education and * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the * triple-shield Mayo logo are trademarks and service marks of MFMER. * * Except as contained in the copyright notice above, the trade names, * trademarks, service marks, or product names of the copyright holder shall * not be used in advertising, promotion or otherwise in connection with * this Software without prior written authorization of the copyright holder. * * Licensed under the Eclipse Public License, Version 1.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.eclipse.org/legal/epl-v10.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.barcelonamedia.uima; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.Iterator; import java.util.List; import weka.core.Attribute; import weka.core.DenseInstance; import weka.core.Instances; import weka.core.SparseInstance; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FSIndex; import org.apache.uima.cas.FSIterator; import org.apache.uima.jcas.JFSIndexRepository; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.barcelonamedia.uima.types.AttributeValue; import org.barcelonamedia.uima.types.DateAttributeValue; import org.barcelonamedia.uima.types.NominalAttributeValue; import org.barcelonamedia.uima.types.NumericAttributeValue; import org.barcelonamedia.uima.types.StringAttributeValue; /** * @author Philip Ogren */ public class CAS2WekaInstance { /** * This method creates a weka data Instance based on the AttributeValue annotations * that are in the passed in CAS. The AttributeValue annotations must also have corresponding * attributes defined in the passed Weka Instances object. * @param cas * @param wekaInstances this should be instantiated using an ARFF Header file generated by ARFFHeaderFileCasConsumer * @return a Weka instance populated with features that corresponds to the cas's AttributeValue annotations. * @throws CASException * @see edu.mayo.bmi.uima.weka.cc.ARFFHeaderFileCasConsumer */ public static DenseInstance toWekaInstance(CAS cas, Instances wekaInstances) throws CASException { JCas jcas; jcas = cas.getJCas(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); FSIndex<Annotation> fsIndex = indexes.getAnnotationIndex(AttributeValue.type); FSIterator<Annotation> attributeIterator = fsIndex.iterator(); List<AttributeValue> attributeValues = new ArrayList<AttributeValue>(); while (attributeIterator.hasNext()) { AttributeValue attributeValue = (AttributeValue) attributeIterator.next(); attributeValues.add(attributeValue); } return toWekaInternalInstance(attributeValues, wekaInstances); } /** * This method creates a weka data Instance based on the AttributeValue annotations * that are in the passed in CAS but constrained to a given span. The AttributeValue annotations must also have corresponding * attributes defined in the passed Weka Instances object. * @param cas * @param wekaInstances this should be instantiated using an ARFF Header file generated by ARFFHeaderFileCasConsumer * @param begin Span begin * @param end Span End * @return a Weka instance populated with features that corresponds to the cas's AttributeValue annotations. * @throws CASException * @see edu.mayo.bmi.uima.weka.cc.ARFFHeaderFileCasConsumer */ public static DenseInstance toWekaInstance(CAS cas, Instances wekaInstances, int begin, int end) throws CASException { JCas jcas; jcas = cas.getJCas(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); FSIndex<Annotation> fsIndex = indexes.getAnnotationIndex(AttributeValue.type); FSIterator<Annotation> attributeIterator = fsIndex.iterator(); List<AttributeValue> attributeValues = new ArrayList<AttributeValue>(); while (attributeIterator.hasNext()) { AttributeValue attributeValue = (AttributeValue) attributeIterator.next(); if (attributeValue.getBegin() >= begin && attributeValue.getEnd() <= end) attributeValues.add(attributeValue); } return toWekaInternalInstance(attributeValues, wekaInstances); } private static DenseInstance toWekaInternalInstance(List<AttributeValue> attributeValues, Instances wekaInstances) throws CASException { double[] zeroValues = new double[wekaInstances.numAttributes()]; Arrays.fill(zeroValues, 0.0d); DenseInstance wekaInstance = new DenseInstance(1.0d, zeroValues); wekaInstance.setDataset(wekaInstances); Iterator<AttributeValue> attributeValuesIterator = attributeValues.iterator(); while (attributeValuesIterator.hasNext()) { String value = null; String attributeName = null; AttributeValue attributeValue = attributeValuesIterator.next(); attributeName = attributeValue.getAttributeName(); Attribute attribute = wekaInstances.attribute(attributeName); if (attribute == null) continue; if (attributeValue instanceof NumericAttributeValue) { value = ((NumericAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, Double.parseDouble(value)); } else if (attributeValue instanceof DateAttributeValue) { //this isn't actually very smart.... I need to understand this better //any volunteers for the four lines of code I need here? value = ((DateAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, value); } else if (attributeValue instanceof NominalAttributeValue) { value = ((NominalAttributeValue) attributeValue).getValue(); int valueIndex = attribute.indexOfValue(value); wekaInstance.setValue(attribute, (double) valueIndex); } else if (attributeValue instanceof StringAttributeValue) { value = ((StringAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, value); } } Enumeration attributes = wekaInstances.enumerateAttributes(); while (attributes.hasMoreElements()) { Attribute attribute = (Attribute) attributes.nextElement(); if (attribute.isNumeric() && wekaInstance.isMissing(attribute)) { wekaInstance.setValue(attribute, 0); } } return wekaInstance; } }