List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java
License:Open Source License
/** * This method gets the headers of the features. * @param features a List of features//from www . jav a 2 s. c om * @return header with features headers */ public Instances getHeader(List<String> features) { int capacity = 100000; List<String> featuresCopy = null; ArrayList<Attribute> atts = new ArrayList<Attribute>(); ArrayList<String> defect = new ArrayList<String>(); defect.add("true"); defect.add("false"); if (features != null) { featuresCopy = new ArrayList<String>(features); for (int i = 0; i < featuresCopy.size(); i++) { String rest = featuresCopy.get(i).substring(1); char first = featuresCopy.get(i).charAt(0); first = Character.toLowerCase(first); featuresCopy.set(i, (first + rest).replaceAll(" ", "")); } } for (int j = 0; j < ftStandard.getHead().length; j++) { if (features == null || featuresCopy.contains(ftStandard.getHead()[j])) atts.add(new Attribute(ftStandard.getHead()[j])); } for (int j = 0; j < ftStandardSaliency.getHead().length; j++) { if (features == null || featuresCopy.contains(ftStandard.getHead()[j] + "(S)")) atts.add(new Attribute(ftStandardSaliency.getHead()[j] + "(S)")); } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralick.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i])) atts.add(new Attribute(ftHaralick.getHead()[i] + "_mean" + j)); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralick.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i])) atts.add(new Attribute(ftHaralick.getHead()[i] + "_range" + j)); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)")) atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_mean" + j + "(S)")); } } for (int j = 1; j < 6; j++) { for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) { if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)")) atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_range" + j + "(S)")); } } for (int j = 1; j < 60; j++) { if (features == null || featuresCopy.contains(ftLbp.getHead() + "_" + j)) atts.add(new Attribute(ftLbp.getHead() + "(" + j + ")")); } for (int j = 1; j < 60; j++) { if (features == null || featuresCopy.contains(ftLbpSaliency.getHead() + "_" + j + "(S)")) atts.add(new Attribute(ftLbpSaliency.getHead() + "(" + j + ")(S)")); } atts.add(new Attribute("Defecto", defect)); // Capacidad es el nmero de instancias. Instances header = new Instances("NuevaInstancia", atts, capacity); // Establecer la clase header.setClassIndex(header.numAttributes() - 1); return header; }
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param csvFilePath//from www . j a v a 2s . co m * @return * @throws Exception */ private Instances getDataFromCSV(String csvFilePath) throws Exception { DataSource source = new DataSource(csvFilePath); Instances data = source.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java
License:Open Source License
/** * @param cls/* ww w .j a va 2 s . c om*/ * @param trainingData * @param testData * @param leba * @return [0] = pctCorrect, [1] = pctIncorrect * @throws Exception */ public double[] getValidation(Classifier cls, Instances trainingData, Instances testData, int leba) throws Exception { Instances testDataWithLEBA = new Instances(testData); for (int j = 0; j < leba; j++) { if (j < testDataWithLEBA.numAttributes() - 1) { for (int i = 0; i < testDataWithLEBA.numInstances(); i++) { testDataWithLEBA.instance(i).setMissing(j); } } } Evaluation eval; try { eval = new Evaluation(trainingData); logger.fine("Evaluating model with leba: " + leba); eval.evaluateModel(cls, testDataWithLEBA); double[] results = new double[2]; results[0] = eval.pctCorrect() / 100; results[1] = eval.pctIncorrect() / 100; return results; } catch (Exception e) { logger.severe("Problems evaluating model for " + cls.getClass().getSimpleName()); logger.severe(e.getMessage()); e.printStackTrace(); throw e; } }
From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java
License:Open Source License
/** * @param csvFilePath// ww w .java2 s . c o m * @return * @throws Exception */ public static Instances getDataFromCSV(String csvFilePath) throws Exception { DataSource source = new DataSource(csvFilePath); Instances data = source.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
From source file:etc.aloe.cscw2013.FeatureGenerationImpl.java
License:Open Source License
@Override public FeatureSpecification generateFeatures(ExampleSet basicExamples) { ExampleSet examples = basicExamples.copy(); FeatureSpecification spec = new FeatureSpecification(); System.out.print("Configuring features over " + examples.size() + " examples... "); try {/*from w w w . j a va2 s .c om*/ spec.addFilter(getPronounsFilter(examples)); spec.addFilter(getPunctuationFilter(examples)); spec.addFilter(getSpecialWordsFilter(examples)); spec.addFilter(getSpellingFilter(examples)); spec.addFilter(getEmoticonsFilter(examples)); spec.addFilter(getBagOfWordsFilter(examples)); spec.addFilter(getRemoveIDFilter(examples)); if (this.getParticipantFeatureCount() > 0) { spec.addFilter(getParticipantsFilter(examples)); } else { spec.addFilter(getRemoveParticipantFilter(examples)); } Instances output = spec.getOutputFormat(); int numAttrs = output.numAttributes(); System.out.println("generated " + (numAttrs - 1) + " features."); } catch (Exception e) { System.err.println("Error generating features."); System.err.println("\t" + e.getMessage()); } return spec; }
From source file:etc.aloe.data.SegmentSet.java
License:Open Source License
/** * Convert the segment set into an ExampleSet (ready for feature * extraction). The returned example set includes an id attribute, the * message text, a label attribute, and several basic features extracted * from the segment.// w ww. ja v a 2s . com * * @return */ public ExampleSet getBasicExamples() { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME)); attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" }))); attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null)); attributes.add(new Attribute(DURATION_ATTR_NAME)); attributes.add(new Attribute(LENGTH_ATTR_NAME)); attributes.add(new Attribute(CPS_ATTR_NAME)); attributes.add(new Attribute(RATE_ATTR_NAME)); Instances instances = new Instances("BasicExamples", attributes, 0); instances.setClassIndex(2); Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME); Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME); Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME); Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME); Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME); Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME); Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME); Attribute rateAttr = instances.attribute(RATE_ATTR_NAME); for (int i = 0; i < size(); i++) { Segment segment = get(i); Instance instance = new DenseInstance(instances.numAttributes()); String messageStr = segment.concatMessages(); String participantStr = segment.concatParticipants(); instance.setValue(idAttr, segment.getId()); instance.setValue(messageAttr, messageStr); instance.setValue(participantAttr, participantStr); if (segment.hasTrueLabel()) { instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false"); } computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr); instances.add(instance); } return new ExampleSet(instances); }
From source file:etc.aloe.filters.AbstractRegexFilter.java
License:Open Source License
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (stringAttributeName == null) { throw new IllegalStateException("String attribute name not set"); }/*from w w w . j a v a2 s.c o m*/ Instances outputFormat = new Instances(inputFormat, 0); Attribute stringAttr = inputFormat.attribute(stringAttributeName); stringAttributeIndex = stringAttr.index(); //Add the new columns. There is one for each regex feature. NamedRegex[] regexFeatures = getRegexFeatures(); for (int i = 0; i < regexFeatures.length; i++) { String name = regexFeatures[i].getName(); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); if (countRegexLengths) { name = name + "_L"; attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); } } return outputFormat; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (getStringAttribute() == null) { throw new IllegalStateException("String attribute name not set"); }/*from ww w. j a v a 2s . co m*/ stringAttributeIndex = inputFormat.attribute(getStringAttribute()).index(); inputFormat = getInputFormat(); //This generates m_selectedTerms and m_DocsCounts int[] docsCountsByTermIdx = determineDictionary(inputFormat); //Initialize the output format to be just like the input Instances outputFormat = new Instances(inputFormat, 0); //Set up the map from attr index to document frequency m_DocsCounts = new int[m_selectedTerms.size()]; //And add the new attributes for (int i = 0; i < m_selectedTerms.size(); i++) { int attrIdx = outputFormat.numAttributes(); int docsCount = docsCountsByTermIdx[i]; m_DocsCounts[i] = docsCount; outputFormat.insertAttributeAt(new Attribute(m_Prefix + m_selectedTerms.get(i)), attrIdx); } return outputFormat; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {//from w w w . j a v a 2 s . c o m ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
private Instances generateOutputFormat(Instances inputFormat) { Instances outputFormat = new Instances(inputFormat, 0); //Add the new columns. There is one for each unigram and each bigram. for (int i = 0; i < unigrams.size(); i++) { String name = "uni_" + unigrams.get(i); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); }/* w w w . j a va2 s . c o m*/ for (int i = 0; i < bigrams.size(); i++) { String name = "bi_" + bigrams.get(i); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); } return outputFormat; }