List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:activeSegmentation.feature.FeatureExtraction.java
License:Open Source License
/** * Add training samples from a rectangular roi * /*from w w w . j a va2 s .co m*/ * @param trainingData set of instances to add to * @param classIndex class index value * @param sliceNum number of 2d slice being processed * @param r shape roi * @return number of instances added */ private Instances addRectangleRoiInstances(int sliceNum, List<String> classLabels, int classes) { Instances testingData; ArrayList<Attribute> attributes = createFeatureHeader(); attributes.add(new Attribute(Common.CLASS, addClasstoHeader(classes, classLabels))); System.out.println(attributes.toString()); // create initial set of instances testingData = new Instances(Common.INSTANCE_NAME, attributes, 1); // Set the index of the class attribute testingData.setClassIndex(filterManager.getNumOfFeatures()); for (int x = 0; x < originalImage.getWidth(); x++) { for (int y = 0; y < originalImage.getHeight(); y++) { testingData.add(filterManager.createInstance(x, y, 0, sliceNum)); } } // increase number of instances for this class System.out.println("SIZe" + testingData.size()); System.out.println(testingData.get(1).toString()); return testingData; }
From source file:adams.data.conversion.SpreadSheetToWekaInstances.java
License:Open Source License
/** * Performs the actual conversion.//w ww. j ava 2 s . c o m * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Instances result; SpreadSheet sheet; DenseInstance inst; ArrayList<Attribute> atts; HashSet<String> unique; ArrayList<String> labels; Row row; Cell cell; int i; int n; double[] values; Collection<ContentType> types; ContentType type; boolean added; int[] classIndices; sheet = (SpreadSheet) m_Input; // create header atts = new ArrayList<>(); for (i = 0; i < sheet.getColumnCount(); i++) { added = false; types = sheet.getContentTypes(i); if (types.contains(ContentType.DOUBLE)) types.remove(ContentType.LONG); if (types.contains(ContentType.LONG)) { types.add(ContentType.DOUBLE); types.remove(ContentType.LONG); } if (types.size() == 1) { type = (ContentType) types.toArray()[0]; if (type == ContentType.DOUBLE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent())); added = true; } else if (type == ContentType.DATE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIMESTAMP_FORMAT)); added = true; } else if (type == ContentType.TIME) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIME_FORMAT)); added = true; } } if (!added) { unique = new HashSet<>(); for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); cell = row.getCell(i); if ((cell != null) && !cell.isMissing()) unique.add(cell.getContent()); } if ((unique.size() > m_MaxLabels) || (m_MaxLabels < 1)) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), (FastVector) null)); } else { labels = new ArrayList<>(unique); Collections.sort(labels); atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels)); } } } result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount()); if (sheet.hasName()) result.setRelationName(sheet.getName()); // add data for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); values = new double[result.numAttributes()]; for (i = 0; i < result.numAttributes(); i++) { cell = row.getCell(i); values[i] = weka.core.Utils.missingValue(); if ((cell != null) && !cell.isMissing()) { if (result.attribute(i).type() == Attribute.DATE) { if (cell.isTime()) values[i] = cell.toTime().getTime(); else values[i] = cell.toDate().getTime(); } else if (result.attribute(i).isNumeric()) { values[i] = Utils.toDouble(cell.getContent()); } else if (result.attribute(i).isString()) { values[i] = result.attribute(i).addStringValue(cell.getContent()); } else { values[i] = result.attribute(i).indexOfValue(cell.getContent()); } } } inst = new DenseInstance(1.0, values); result.add(inst); } if (sheet instanceof Dataset) { classIndices = ((Dataset) sheet).getClassAttributeIndices(); if (classIndices.length > 0) result.setClassIndex(classIndices[0]); } return result; }
From source file:adams.data.instancesanalysis.PCA.java
License:Open Source License
/** * Performs the actual analysis./*w ww .jav a2s .c o m*/ * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Remove remove; PublicPrincipalComponents pca; int i; Capabilities caps; PartitionedMultiFilter2 part; Range rangeUnsupported; Range rangeSupported; TIntList listNominal; Range rangeNominal; ArrayList<ArrayList<Double>> coeff; Instances filtered; SpreadSheet transformed; WekaInstancesToSpreadSheet conv; String colName; result = null; m_Loadings = null; m_Scores = null; if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing PCA..."); listNominal = new TIntArrayList(); if (m_SkipNominal) { for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; if (data.attribute(i).isNominal()) listNominal.add(i); } } // check for unsupported attributes caps = new PublicPrincipalComponents().getCapabilities(); m_Supported = new TIntArrayList(); m_Unsupported = new TIntArrayList(); for (i = 0; i < data.numAttributes(); i++) { if (!caps.test(data.attribute(i)) || (i == data.classIndex()) || (listNominal.contains(i))) m_Unsupported.add(i); else m_Supported.add(i); } data.setClassIndex(-1); m_NumAttributes = m_Supported.size(); // the principal components will delete the attributes without any distinct values. // this checks which instances will be kept. m_Kept = new ArrayList<>(); for (i = 0; i < m_Supported.size(); i++) { if (data.numDistinctValues(m_Supported.get(i)) > 1) m_Kept.add(m_Supported.get(i)); } // build a model using the PublicPrincipalComponents pca = new PublicPrincipalComponents(); pca.setMaximumAttributes(m_MaxAttributes); pca.setVarianceCovered(m_Variance); pca.setMaximumAttributeNames(m_MaxAttributeNames); part = null; if (m_Unsupported.size() > 0) { rangeUnsupported = new Range(); rangeUnsupported.setMax(data.numAttributes()); rangeUnsupported.setIndices(m_Unsupported.toArray()); rangeSupported = new Range(); rangeSupported.setMax(data.numAttributes()); rangeSupported.setIndices(m_Supported.toArray()); part = new PartitionedMultiFilter2(); part.setFilters(new Filter[] { pca, new AllFilter(), }); part.setRanges(new weka.core.Range[] { new weka.core.Range(rangeSupported.getRange()), new weka.core.Range(rangeUnsupported.getRange()), }); } try { if (part != null) part.setInputFormat(data); else pca.setInputFormat(data); } catch (Exception e) { result = Utils.handleException(this, "Failed to set data format", e); } transformed = null; if (result == null) { try { if (part != null) filtered = weka.filters.Filter.useFilter(data, part); else filtered = weka.filters.Filter.useFilter(data, pca); } catch (Exception e) { result = Utils.handleException(this, "Failed to apply filter", e); filtered = null; } if (filtered != null) { conv = new WekaInstancesToSpreadSheet(); conv.setInput(filtered); result = conv.convert(); if (result == null) { transformed = (SpreadSheet) conv.getOutput(); // shorten column names again if (part != null) { for (i = 0; i < transformed.getColumnCount(); i++) { colName = transformed.getColumnName(i); colName = colName.replaceFirst("filtered-[0-9]*-", ""); transformed.getHeaderRow().getCell(i).setContentAsString(colName); } } } } } if (result == null) { // get the coefficients from the filter m_Scores = transformed; coeff = pca.getCoefficients(); m_Loadings = extractLoadings(data, coeff); m_Loadings.setName("Loadings for " + data.relationName()); } return result; }
From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format/*w w w . j a va 2 s.co m*/ * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { ArrayList<Attribute> atts; String prefix; int i; Instances result; List<String> classes; // collect classes m_ClassAttributeIndices = new TIntArrayList(); classes = new ArrayList<>(); for (i = 0; i < input.numAttributes(); i++) { if (m_ClassAttributes.isMatch(input.attribute(i).name())) { classes.add(input.attribute(i).name()); m_ClassAttributeIndices.add(i); } } if (!classes.contains(input.classAttribute().name())) { classes.add(input.classAttribute().name()); m_ClassAttributeIndices.add(input.classAttribute().index()); } // generate header atts = new ArrayList<>(); prefix = getClass().getSimpleName(); for (i = 0; i < getNumComponents(); i++) atts.add(new Attribute(prefix + "_" + (i + 1))); for (String cls : classes) atts.add(new Attribute(cls)); result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); m_OutputFormat = result; return result; }
From source file:adams.data.instancesanalysis.pls.AbstractSingleClassPLS.java
License:Open Source License
/** * Determines the output format based on the input format and returns this. * * @param input the input format to base the output format on * @return the output format//ww w.j a v a2s. c o m * @throws Exception in case the determination goes wrong */ @Override public Instances determineOutputFormat(Instances input) throws Exception { ArrayList<Attribute> atts; String prefix; int i; Instances result; // generate header atts = new ArrayList<>(); prefix = getClass().getSimpleName(); for (i = 0; i < getNumComponents(); i++) atts.add(new Attribute(prefix + "_" + (i + 1))); atts.add(new Attribute(input.classAttribute().name())); result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); m_OutputFormat = result; return result; }
From source file:adams.data.outlier.AbstractInstanceOutlierDetectorTestCase.java
License:Open Source License
/** * Loads the data to process.//from w ww . jav a 2 s . co m * * @param filename the filename to load (without path) * @return the first instance in the dataset, or null if it failed * to load; class attribute is always the last */ protected Instance load(String filename) { Instance result; Instances data; result = new Instance(); try { m_TestHelper.copyResourceToTmp(filename); data = DataSource.read(new TmpFile(filename).getAbsolutePath()); data.setClassIndex(data.numAttributes() - 1); result.set(data.instance(0)); } catch (Exception e) { e.printStackTrace(); result = null; } finally { m_TestHelper.deleteFileFromTmp(filename); } return result; }
From source file:adams.flow.source.WekaNewInstances.java
License:Open Source License
/** * Executes the flow item.// w ww.java 2 s.c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; ArrayList<Attribute> atts; Instances data; int i; int index; String[] types; String[] names; String name; Attribute att; result = null; m_OutputToken = null; try { types = m_AttributeTypes.listValue(); names = m_AttributeNames.listValue(); m_ClassIndex.setMax(types.length); index = m_ClassIndex.getIntIndex(); atts = new ArrayList<Attribute>(); for (i = 0; i < types.length; i++) { // determine name of attribute if (i >= names.length) { if (i == index) { if (m_ClassName.length() == 0) name = DEFAULT_CLASS; else name = m_ClassName; } else { name = ATTRIBUTE_PREFIX + (i + 1); } } else { name = names[i]; } if (types[i].equals(AttributeTypeList.ATT_NUMERIC)) att = new Attribute(name); else if (types[i].equals(AttributeTypeList.ATT_NOMINAL)) att = new Attribute(name, new ArrayList<String>()); else if (types[i].equals(AttributeTypeList.ATT_STRING)) att = new Attribute(name, (ArrayList<String>) null); else if (types[i].equals(AttributeTypeList.ATT_DATE)) att = new Attribute(name, DEFAULT_DATE_FORMAT); else throw new IllegalStateException("Unhandled attribute type: " + types[i]); atts.add(att); } if (m_RelationName.length() == 0) name = getFullName(); else name = m_RelationName; data = new Instances(name, atts, 0); data.setClassIndex(index); m_OutputToken = new Token(data); updateProvenance(m_OutputToken); } catch (Exception e) { result = handleException("Failed to create new dataset: ", e); } return result; }
From source file:adams.flow.transformer.WekaBootstrapping.java
License:Open Source License
/** * Executes the flow item.//from w ww .j a va 2 s. c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Row row; Evaluation evalAll; Evaluation eval; WekaEvaluationContainer cont; TIntList indices; Random random; int i; int iteration; int size; List<Prediction> preds; Instances header; Instances data; ArrayList<Attribute> atts; Instance inst; boolean numeric; int classIndex; Double[] errors; Double[] errorsRev; Percentile<Double> perc; Percentile<Double> percRev; TIntList subset; result = null; if (m_InputToken.getPayload() instanceof Evaluation) { evalAll = (Evaluation) m_InputToken.getPayload(); } else { cont = (WekaEvaluationContainer) m_InputToken.getPayload(); evalAll = (Evaluation) cont.getValue(WekaEvaluationContainer.VALUE_EVALUATION); } if ((evalAll.predictions() == null) || (evalAll.predictions().size() == 0)) result = "No predictions available!"; if (result == null) { // init spreadsheet sheet = new DefaultSpreadSheet(); row = sheet.getHeaderRow(); row.addCell("S").setContentAsString("Subsample"); for (EvaluationStatistic s : m_StatisticValues) row.addCell(s.toString()).setContentAsString(s.toString()); for (i = 0; i < m_Percentiles.length; i++) { switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContentAsString("Percentile-Abs-" + m_Percentiles[i]); break; case BOTH: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // set up bootstrapping preds = evalAll.predictions(); random = new Random(m_Seed); indices = new TIntArrayList(); size = (int) Math.round(preds.size() * m_Percentage); header = evalAll.getHeader(); numeric = header.classAttribute().isNumeric(); m_ClassIndex.setData(header.classAttribute()); if (numeric) classIndex = -1; else classIndex = m_ClassIndex.getIntIndex(); for (i = 0; i < preds.size(); i++) indices.add(i); // create fake evalutions subset = new TIntArrayList(); for (iteration = 0; iteration < m_NumSubSamples; iteration++) { if (isStopped()) { sheet = null; break; } // determine subset.clear(); if (m_WithReplacement) { for (i = 0; i < size; i++) subset.add(indices.get(random.nextInt(preds.size()))); } else { indices.shuffle(random); for (i = 0; i < size; i++) subset.add(indices.get(i)); } // create dataset from predictions errors = new Double[size]; errorsRev = new Double[size]; atts = new ArrayList<>(); atts.add(header.classAttribute().copy("Actual")); data = new Instances(header.relationName() + "-" + (iteration + 1), atts, size); data.setClassIndex(0); for (i = 0; i < subset.size(); i++) { inst = new DenseInstance(preds.get(subset.get(i)).weight(), new double[] { preds.get(subset.get(i)).actual() }); data.add(inst); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); break; case PREDICTED_MINUS_ACTUAL: errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; case ABSOLUTE: errors[i] = Math .abs(preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted()); break; case BOTH: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // perform "fake" evaluation try { eval = new Evaluation(data); for (i = 0; i < subset.size(); i++) { if (numeric) eval.evaluateModelOnceAndRecordPrediction( new double[] { preds.get(subset.get(i)).predicted() }, data.instance(i)); else eval.evaluateModelOnceAndRecordPrediction( ((NominalPrediction) preds.get(subset.get(i))).distribution().clone(), data.instance(i)); } } catch (Exception e) { result = handleException( "Failed to create 'fake' Evaluation object (iteration: " + (iteration + 1) + ")!", e); break; } // add row row = sheet.addRow(); row.addCell("S").setContent(iteration + 1); for (EvaluationStatistic s : m_StatisticValues) { try { row.addCell(s.toString()).setContent(EvaluationHelper.getValue(eval, s, classIndex)); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to calculate statistic in iteration #" + (iteration + 1) + ": " + s, e); row.addCell(s.toString()).setMissing(); } } for (i = 0; i < m_Percentiles.length; i++) { perc = new Percentile<>(); perc.addAll(errors); percRev = new Percentile<>(); percRev.addAll(errorsRev); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case BOTH: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } } if ((result == null) && (sheet != null)) m_OutputToken = new Token(sheet); } return result; }
From source file:adams.flow.transformer.WekaClassSelector.java
License:Open Source License
/** * Executes the flow item./* w w w. jav a 2 s. co m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Object o; weka.core.Instances inst; boolean isInstances; List<Attribute> atts; int i; result = null; // dataset o = m_InputToken.getPayload(); isInstances = false; inst = null; if (o instanceof weka.core.Instances) { inst = (weka.core.Instances) o; inst = new weka.core.Instances(inst); isInstances = true; } else if (o instanceof adams.data.instance.Instance) { inst = ((adams.data.instance.Instance) o).getDatasetHeader(); } else if (o instanceof weka.core.Instance) { inst = ((weka.core.Instance) o).dataset(); } else { result = "Cannot handle object of type " + o.getClass().getName() + "!"; } if (result == null) { if (m_Unset) { inst.setClassIndex(-1); } else { // determine the attributes that fit the regular expression atts = new ArrayList<Attribute>(); for (i = 0; i < inst.numAttributes(); i++) { if (m_RegexName.isEmpty() || m_RegexName.isMatch(inst.attribute(i).name())) atts.add(inst.attribute(i)); } // class index m_ClassIndex.setMax(atts.size()); if (m_Override || (inst.classIndex() == -1)) inst.setClassIndex(atts.get(m_ClassIndex.getIntIndex()).index()); } // output if (isInstances) m_OutputToken = new Token(inst); else m_OutputToken = new Token(o); updateProvenance(m_OutputToken); } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Prefixes the attributes./* w w w . j a v a 2s. c o m*/ * * @param index the index of the dataset * @param inst the data to process * @return the processed data */ protected Instances prefixAttributes(Instances inst, int index) { Instances result; String prefix; ArrayList<Attribute> atts; int i; prefix = createPrefix(inst, index); // header atts = new ArrayList<>(); for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).copy(prefix + inst.attribute(i).name())); // data result = new Instances(inst.relationName(), atts, inst.numInstances()); result.setClassIndex(inst.classIndex()); for (i = 0; i < inst.numInstances(); i++) result.add((Instance) inst.instance(i).copy()); return result; }