List of usage examples for weka.filters.unsupervised.attribute Remove setInputFormat
@Override public boolean setInputFormat(Instances instanceInfo) throws Exception
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {/* w w w .j a va 2 s . c o m*/ FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:WrapperSubset.java
License:Open Source License
/** * Evaluates a subset of attributes//from w w w . j a va2 s. c o m * * @param subset a bitset representing the attribute subset to be evaluated * @return the error rate * @throws Exception if the subset could not be evaluated */ @Override public double evaluateSubset(BitSet subset) throws Exception { // if (subset.isEmpty()) // return 0.0; double evalMetric = 0; double[] repError = new double[5]; int numAttributes = 0; int i, j; Random Rnd = new Random(m_seed); Remove delTransform = new Remove(); delTransform.setInvertSelection(true); // copy the instances Instances trainCopy = new Instances(m_trainInstances); // count attributes set in the BitSet for (i = 0; i < m_numAttribs; i++) { if (subset.get(i)) { numAttributes++; } } // set up an array of attribute indexes for the filter (+1 for the class) int[] featArray = new int[numAttributes + 1]; for (i = 0, j = 0; i < m_numAttribs; i++) { if (subset.get(i)) { featArray[j++] = i; } } featArray[j] = m_classIndex; delTransform.setAttributeIndicesArray(featArray); delTransform.setInputFormat(trainCopy); trainCopy = Filter.useFilter(trainCopy, delTransform); // max of 5 repetitions of cross validation for (i = 0; i < 5; i++) { m_Evaluation = new Evaluation(trainCopy); m_Evaluation.crossValidateModel(m_BaseClassifier, trainCopy, m_folds, Rnd); switch (m_evaluationMeasure) { case EVAL_DEFAULT: repError[i] = m_Evaluation.errorRate(); // if (m_trainInstances.classAttribute().isNominal()) { // repError[i] = 1.0 - repError[i]; // } break; case EVAL_ACCURACY: repError[i] = m_Evaluation.errorRate(); // if (m_trainInstances.classAttribute().isNominal()) { // repError[i] = 1.0 - repError[i]; // } break; case EVAL_RMSE: repError[i] = m_Evaluation.rootMeanSquaredError(); break; case EVAL_MAE: repError[i] = m_Evaluation.meanAbsoluteError(); break; case EVAL_FMEASURE: if (m_IRClassVal < 0) { repError[i] = m_Evaluation.weightedFMeasure(); } else { repError[i] = m_Evaluation.fMeasure(m_IRClassVal); } break; case EVAL_AUC: if (m_IRClassVal < 0) { repError[i] = m_Evaluation.weightedAreaUnderROC(); } else { repError[i] = m_Evaluation.areaUnderROC(m_IRClassVal); } break; case EVAL_AUPRC: if (m_IRClassVal < 0) { repError[i] = m_Evaluation.weightedAreaUnderPRC(); } else { repError[i] = m_Evaluation.areaUnderPRC(m_IRClassVal); } break; case EVAL_NEW: repError[i] = (1.0 - m_Evaluation.errorRate()) + m_IRfactor * m_Evaluation.weightedFMeasure(); break; } // check on the standard deviation if (!repeat(repError, i + 1)) { i++; break; } } for (j = 0; j < i; j++) { evalMetric += repError[j]; } evalMetric /= i; m_Evaluation = null; switch (m_evaluationMeasure) { case EVAL_DEFAULT: case EVAL_ACCURACY: case EVAL_RMSE: case EVAL_MAE: if (m_trainInstances.classAttribute().isNominal() && (m_evaluationMeasure == EVAL_DEFAULT || m_evaluationMeasure == EVAL_ACCURACY)) { evalMetric = 1 - evalMetric; } else { evalMetric = -evalMetric; // maximize } break; } return evalMetric; }
From source file:adams.data.instancesanalysis.FastICA.java
License:Open Source License
/** * Performs the actual analysis.//from ww w . ja v a2 s . co m * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Matrix matrix; Remove remove; result = null; m_Components = null; m_Sources = null; if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing ICA..."); matrix = m_ICA.transform(MatrixHelper.wekaToMatrixAlgo(MatrixHelper.getAll(data))); if (matrix != null) { m_Components = MatrixHelper.matrixToSpreadSheet(MatrixHelper.matrixAlgoToWeka(m_ICA.getComponents()), "Component-"); m_Sources = MatrixHelper.matrixToSpreadSheet(MatrixHelper.matrixAlgoToWeka(m_ICA.getSources()), "Source-"); } else { result = "Failed to transform data!"; } return result; }
From source file:adams.data.instancesanalysis.PCA.java
License:Open Source License
/** * Performs the actual analysis.//from w w w. j a v a2 s . com * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Remove remove; PublicPrincipalComponents pca; int i; Capabilities caps; PartitionedMultiFilter2 part; Range rangeUnsupported; Range rangeSupported; TIntList listNominal; Range rangeNominal; ArrayList<ArrayList<Double>> coeff; Instances filtered; SpreadSheet transformed; WekaInstancesToSpreadSheet conv; String colName; result = null; m_Loadings = null; m_Scores = null; if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing PCA..."); listNominal = new TIntArrayList(); if (m_SkipNominal) { for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; if (data.attribute(i).isNominal()) listNominal.add(i); } } // check for unsupported attributes caps = new PublicPrincipalComponents().getCapabilities(); m_Supported = new TIntArrayList(); m_Unsupported = new TIntArrayList(); for (i = 0; i < data.numAttributes(); i++) { if (!caps.test(data.attribute(i)) || (i == data.classIndex()) || (listNominal.contains(i))) m_Unsupported.add(i); else m_Supported.add(i); } data.setClassIndex(-1); m_NumAttributes = m_Supported.size(); // the principal components will delete the attributes without any distinct values. // this checks which instances will be kept. m_Kept = new ArrayList<>(); for (i = 0; i < m_Supported.size(); i++) { if (data.numDistinctValues(m_Supported.get(i)) > 1) m_Kept.add(m_Supported.get(i)); } // build a model using the PublicPrincipalComponents pca = new PublicPrincipalComponents(); pca.setMaximumAttributes(m_MaxAttributes); pca.setVarianceCovered(m_Variance); pca.setMaximumAttributeNames(m_MaxAttributeNames); part = null; if (m_Unsupported.size() > 0) { rangeUnsupported = new Range(); rangeUnsupported.setMax(data.numAttributes()); rangeUnsupported.setIndices(m_Unsupported.toArray()); rangeSupported = new Range(); rangeSupported.setMax(data.numAttributes()); rangeSupported.setIndices(m_Supported.toArray()); part = new PartitionedMultiFilter2(); part.setFilters(new Filter[] { pca, new AllFilter(), }); part.setRanges(new weka.core.Range[] { new weka.core.Range(rangeSupported.getRange()), new weka.core.Range(rangeUnsupported.getRange()), }); } try { if (part != null) part.setInputFormat(data); else pca.setInputFormat(data); } catch (Exception e) { result = Utils.handleException(this, "Failed to set data format", e); } transformed = null; if (result == null) { try { if (part != null) filtered = weka.filters.Filter.useFilter(data, part); else filtered = weka.filters.Filter.useFilter(data, pca); } catch (Exception e) { result = Utils.handleException(this, "Failed to apply filter", e); filtered = null; } if (filtered != null) { conv = new WekaInstancesToSpreadSheet(); conv.setInput(filtered); result = conv.convert(); if (result == null) { transformed = (SpreadSheet) conv.getOutput(); // shorten column names again if (part != null) { for (i = 0; i < transformed.getColumnCount(); i++) { colName = transformed.getColumnName(i); colName = colName.replaceFirst("filtered-[0-9]*-", ""); transformed.getHeaderRow().getCell(i).setContentAsString(colName); } } } } } if (result == null) { // get the coefficients from the filter m_Scores = transformed; coeff = pca.getCoefficients(); m_Loadings = extractLoadings(data, coeff); m_Loadings.setName("Loadings for " + data.relationName()); } return result; }
From source file:adams.data.instancesanalysis.PLS.java
License:Open Source License
/** * Performs the actual analysis.//from w w w.j ava2 s .c o m * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Remove remove; weka.filters.supervised.attribute.PLS pls; WekaInstancesToSpreadSheet conv; SpreadSheet transformed; Matrix matrix; SpreadSheet loadings; Row row; int i; int n; m_Loadings = null; m_Scores = null; data = new Instances(data); data.deleteWithMissingClass(); if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing PLS..."); pls = new weka.filters.supervised.attribute.PLS(); pls.setAlgorithm(m_Algorithm); pls.setInputFormat(data); data = Filter.useFilter(data, pls); conv = new WekaInstancesToSpreadSheet(); conv.setInput(data); result = conv.convert(); if (result == null) { transformed = (SpreadSheet) conv.getOutput(); matrix = pls.getLoadings(); loadings = new DefaultSpreadSheet(); for (i = 0; i < matrix.getColumnDimension(); i++) loadings.getHeaderRow().addCell("L-" + (i + 1)).setContentAsString("Loading-" + (i + 1)); for (n = 0; n < matrix.getRowDimension(); n++) { row = loadings.addRow(); for (i = 0; i < matrix.getColumnDimension(); i++) row.addCell("L-" + (i + 1)).setContent(matrix.get(n, i)); } m_Loadings = loadings; m_Scores = transformed; } return result; }
From source file:adams.flow.transformer.AbstractWekaPredictionsTransformer.java
License:Open Source License
/** * Filters the data accordingly to the selected attribute range. * * @param data the data to filter/*from w ww .j a va 2s . co m*/ * @return the filtered data, null if filtering failed */ protected Instances filterTestData(Instances data) { int[] indices; Remove remove; try { m_TestAttributes.setMax(data.numAttributes()); indices = m_TestAttributes.getIntIndices(); remove = new Remove(); remove.setAttributeIndicesArray(indices); remove.setInvertSelection(true); remove.setInputFormat(data); return Filter.useFilter(data, remove); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to filter test data using range: " + m_TestAttributes, e); return null; } }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Excludes attributes from the data./*from www . ja v a 2s .com*/ * * @param inst the data to process * @return the processed data */ protected Instances excludeAttributes(Instances inst) { Instances result; StringBuilder atts; int i; Remove filter; // determine attribute indices atts = new StringBuilder(); for (i = 0; i < inst.numAttributes(); i++) { if (inst.attribute(i).name().matches(m_ExcludedAttributes)) { if (atts.length() > 0) atts.append(","); atts.append((i + 1)); } } // filter data try { filter = new Remove(); filter.setAttributeIndices(atts.toString()); filter.setInvertSelection(m_InvertMatchingSense); filter.setInputFormat(inst); result = weka.filters.Filter.useFilter(inst, filter); } catch (Exception e) { result = inst; handleException("Error filtering data:", e); } return result; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Executes the flow item./*from ww w .ja v a 2 s . com*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; Instances output; Instances[] orig; Instances[] inst; Instance[] rows; HashSet ids; int max; TIntList uniqueList; Remove remove; result = null; // get filenames files = null; orig = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instance[]) { rows = (Instance[]) m_InputToken.getPayload(); orig = new Instances[rows.length]; for (i = 0; i < rows.length; i++) { orig[i] = new Instances(rows[i].dataset(), 1); orig[i].add((Instance) rows[i].copy()); } } else if (m_InputToken.getPayload() instanceof Instances[]) { orig = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } try { output = null; // simple merge if (m_UniqueID.length() == 0) { if (files != null) { inst = new Instances[1]; for (i = 0; i < files.length; i++) { if (isStopped()) break; inst[0] = DataSource.read(files[i].getAbsolutePath()); inst[0] = prepareData(inst[0], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger().info("Merging with file #" + (i + 1) + ": " + files[i]); output = Instances.mergeInstances(output, inst[0]); } } } else if (orig != null) { inst = new Instances[1]; for (i = 0; i < orig.length; i++) { if (isStopped()) break; inst[0] = prepareData(orig[i], i); if (i == 0) { output = inst[0]; } else { if (isLoggingEnabled()) getLogger() .info("Merging with dataset #" + (i + 1) + ": " + orig[i].relationName()); output = Instances.mergeInstances(output, inst[0]); } } } } // merge based on row IDs else { m_AttType = -1; max = 0; m_UniqueIDAtts = new ArrayList<>(); if (files != null) { orig = new Instances[files.length]; for (i = 0; i < files.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Loading file #" + (i + 1) + ": " + files[i]); orig[i] = DataSource.read(files[i].getAbsolutePath()); max = Math.max(max, orig[i].numInstances()); } } else if (orig != null) { for (i = 0; i < orig.length; i++) max = Math.max(max, orig[i].numInstances()); } inst = new Instances[orig.length]; ids = new HashSet(max); for (i = 0; i < orig.length; i++) { if (isStopped()) break; if (isLoggingEnabled()) getLogger().info("Updating IDs #" + (i + 1)); updateIDs(i, orig[i], ids); if (isLoggingEnabled()) getLogger().info("Preparing dataset #" + (i + 1)); inst[i] = prepareData(orig[i], i); } output = merge(orig, inst, ids); // remove unnecessary unique ID attributes if (m_KeepOnlySingleUniqueID) { uniqueList = new TIntArrayList(); for (String att : m_UniqueIDAtts) uniqueList.add(output.attribute(att).index()); if (uniqueList.size() > 0) { if (isLoggingEnabled()) getLogger().info("Removing duplicate unique ID attributes: " + m_UniqueIDAtts); remove = new Remove(); remove.setAttributeIndicesArray(uniqueList.toArray()); remove.setInputFormat(output); output = Filter.useFilter(output, remove); } } } if (!isStopped()) { m_OutputToken = new Token(output); updateProvenance(m_OutputToken); } } catch (Exception e) { result = handleException("Failed to merge: ", e); } return result; }
From source file:adams.flow.transformer.WekaMultiLabelSplitter.java
License:Open Source License
/** * Returns the generated token./* ww w .ja v a 2s. co m*/ * * @return the generated token */ @Override public Token output() { Token result; int index; Remove remove; Reorder reorder; StringBuilder indices; int i; int newIndex; Instances processed; result = null; index = m_AttributesToProcess.remove(0); remove = new Remove(); indices = new StringBuilder(); for (i = 0; i < m_ClassAttributes.size(); i++) { if (m_ClassAttributes.get(i) == index) continue; if (indices.length() > 0) indices.append(","); indices.append("" + (m_ClassAttributes.get(i) + 1)); } remove.setAttributeIndices(indices.toString()); try { remove.setInputFormat(m_Dataset); processed = weka.filters.Filter.useFilter(m_Dataset, remove); if (m_UpdateRelationName) processed.setRelationName(m_Dataset.attribute(index).name()); result = new Token(processed); } catch (Exception e) { processed = null; handleException( "Failed to process dataset with following filter setup:\n" + OptionUtils.getCommandLine(remove), e); } if (m_MakeClassLast && (processed != null)) { newIndex = processed.attribute(m_Dataset.attribute(index).name()).index(); indices = new StringBuilder(); for (i = 0; i < processed.numAttributes(); i++) { if (i == newIndex) continue; if (indices.length() > 0) indices.append(","); indices.append("" + (i + 1)); } if (indices.length() > 0) indices.append(","); indices.append("" + (newIndex + 1)); reorder = new Reorder(); try { reorder.setAttributeIndices(indices.toString()); reorder.setInputFormat(processed); processed = weka.filters.Filter.useFilter(processed, reorder); if (m_UpdateRelationName) processed.setRelationName(m_Dataset.attribute(index).name()); result = new Token(processed); } catch (Exception e) { handleException("Failed to process dataset with following filter setup:\n" + OptionUtils.getCommandLine(reorder), e); } } return result; }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Returns a spreadsheet containing only output columns, i.e., the class * columns.// w ww .j a va 2s. com * * @return the output features, null if data has no class columns */ @Override public SpreadSheet getOutputs() { Instances data; Remove remove; if (m_Data.classIndex() == -1) return null; data = new Instances(m_Data); data.setClassIndex(-1); remove = new Remove(); remove.setAttributeIndicesArray(new int[] { m_Data.classIndex() }); remove.setInvertSelection(true); try { remove.setInputFormat(data); data = Filter.useFilter(data, remove); return new InstancesView(data); } catch (Exception e) { throw new IllegalStateException("Failed to apply Remove filter!", e); } }