List of usage examples for weka.attributeSelection AttributeSelection AttributeSelection
public AttributeSelection()
From source file:FeatureSelectionClass.java
public AttributeSelection withGainRatio(String path) throws Exception { int N;//from ww w. j av a 2 s .c o m PreparingSteps pr = new PreparingSteps(); N = pr.getReadFileData(path).numAttributes(); Instances data = pr.getReadFileData(path); AttributeSelection selector = new AttributeSelection(); InfoGainAttributeEval evaluator = new InfoGainAttributeEval(); Ranker ranker = new Ranker(); ranker.setNumToSelect(Math.min(500, N - 1)); selector.setEvaluator(evaluator); selector.setSearch(ranker); selector.SelectAttributes(data); return selector; }
From source file:FeatureSelectionClass.java
public AttributeSelection withInfoGain(String path) throws Exception { int N;/*from w w w . ja v a 2 s . c om*/ PreparingSteps pr = new PreparingSteps(); N = pr.getReadFileData(path).numAttributes(); Instances data = pr.getReadFileData(path); AttributeSelection selector = new AttributeSelection(); GainRatioAttributeEval evaluator = new GainRatioAttributeEval(); Ranker ranker = new Ranker(); ranker.setNumToSelect(Math.min(500, N - 1)); selector.setEvaluator(evaluator); selector.setSearch(ranker); selector.SelectAttributes(data); return selector; }
From source file:FeatureSelectionClass.java
public AttributeSelection withChiSquare(String path) throws Exception { int N;/* w w w .j a v a2 s. c om*/ PreparingSteps pr = new PreparingSteps(); N = pr.getReadFileData(path).numAttributes(); Instances data = pr.getReadFileData(path); AttributeSelection selector = new AttributeSelection(); ChiSquaredAttributeEval evaluator = new ChiSquaredAttributeEval(); Ranker ranker = new Ranker(); ranker.setNumToSelect(Math.min(500, N - 1)); selector.setEvaluator(evaluator); selector.setSearch(ranker); selector.SelectAttributes(data); return selector; }
From source file:RunExhaustiveSearch.java
License:Open Source License
protected static void runAttributeSelection(Instances data, int n) throws Exception { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function. ExhaustiveSearch algorithm = new ExhaustiveSearch(); // ES algorithm. cost_function.buildEvaluator(data);//from w w w .j a v a2 s . com attsel.setEvaluator(cost_function); attsel.setSearch(algorithm); attsel.SelectAttributes(data); int[] indices = attsel.selectedAttributes(); System.out.println("Selected features:\n" + Utils.arrayToString(indices)); }
From source file:task2.java
/** * Processes requests for both HTTP <code>GET</code> and <code>POST</code> * methods./* ww w .j a v a 2 s. c om*/ * * @param request servlet request * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); try (PrintWriter out = response.getWriter()) { /* TODO output your page here. You may use following sample code. */ out.println("<!DOCTYPE html>"); out.println("<html>"); out.println("<head>"); out.println("<title>Servlet selection</title>"); out.println("</head>"); out.println("<body>"); CSVLoader loader = new CSVLoader(); loader.setSource(new File("C:/Users//Raguvinoth/Desktop/5339.csv")); Instances data = loader.getDataSet(); //Save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File("\"C:/Users/Raguvinoth/Desktop/5339_converted.arff")); saver.writeBatch(); BufferedReader reader = new BufferedReader( new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_nominal.arff")); Instances data1 = new Instances(reader); if (data1.classIndex() == -1) data1.setClassIndex(data1.numAttributes() - 14); // 1. meta-classifier // useClassifier(data); // 2. AttributeSelector try { AttributeSelection attsel = new AttributeSelection(); GreedyStepwise search = new GreedyStepwise(); CfsSubsetEval eval = new CfsSubsetEval(); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(data); int[] indices = attsel.selectedAttributes(); System.out.println("selected attribute indices:\n" + Utils.arrayToString(indices)); System.out.println("\n 4. Linear-Regression on above selected attributes"); long time1 = System.currentTimeMillis(); long sec1 = time1 / 1000; BufferedReader reader1 = new BufferedReader( new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_linear2.arff")); Instances data2 = new Instances(reader1); data2.setClassIndex(0); LinearRegression lr = new LinearRegression(); lr.buildClassifier(data2); System.out.println(lr.toString()); long time2 = System.currentTimeMillis(); long sec2 = time2 / 1000; long timeTaken = sec2 - sec1; System.out.println("Total time taken for building the model: " + timeTaken + " seconds"); for (int i = 0; i < 5; i++) { out.println("<p>" + "selected attribute indices:\n" + Utils.arrayToString(indices[i]) + "</p>"); } out.println("<p>" + "\n 4. Linear-Regression on above selected attributes" + "</p>"); out.println("<p>" + lr.toString() + "</p>"); out.println("<p>" + "Total time taken for building the model: " + timeTaken + " seconds" + "</p>"); out.println("</body>"); out.println("</html>"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
From source file:RunBestFirstSearch.java
License:Open Source License
protected static void runAttributeSelection(Instances data, int n) throws Exception { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function. BestFirst algorithm = new BestFirst(); // BFS algorithm. cost_function.buildEvaluator(data);/*from ww w .j a v a 2s . co m*/ algorithm.setLookupCacheSize(n); // BFS with forward direction and terminating search after five // non-improving nodes. // String[] parameters = { "-D 1", "-N 5" }; algorithm.setOptions(parameters); cost_function.setLocallyPredictive(false); attsel.setEvaluator(cost_function); attsel.setSearch(algorithm); attsel.SelectAttributes(data); int[] indices = attsel.selectedAttributes(); System.out.println("Selected features:\n" + Utils.arrayToString(indices)); }
From source file:PCADetector.java
License:Apache License
public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) { try {/* w w w . j av a 2 s. co m*/ if (m_nDims == 0) { m_nDims = nAttrs; for (int i = 0; i < this.m_nDims; i++) { m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute } } verifyData(newData); this.c_alpha = cAlpha; if (false == prepareData(newData, slidewdSz)) return false; Instances oriDataInsts = getInstances(); if (oriDataInsts != null) { // standardization + PCA covariance matrix m_scaledInstances = new Instances(oriDataInsts); Standardize filter = new Standardize(); filter.setInputFormat(m_scaledInstances); m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization PrincipalComponents PCA = new PrincipalComponents(); PCA.setVarianceCovered(1.0); // means 100% PCA.setMaximumAttributeNames(-1); PCA.setCenterData(true); Ranker ranker = new Ranker(); AttributeSelection selector = new AttributeSelection(); selector.setSearch(ranker); selector.setEvaluator(PCA); selector.SelectAttributes(m_scaledInstances); // Instances transformedData = selector.reduceDimensionality(m_scaledInstances); // get sorted eigens double[] eigenValues = PCA.getEigenValues(); // eigenVectors[i][j] i: rows; j: cols double[][] eigenVectors = PCA.getUnsortedEigenVectors(); Sort(eigenValues, eigenVectors); setEigens(eigenValues); // get residual start dimension int residualStartDimension = -1; double sum = 0; double major = 0; for (int ss = 0; ss < eigenValues.length; ss++) { sum += eigenValues[ss]; } for (int ss = 0; ss < eigenValues.length; ss++) { major += eigenValues[ss]; if ((residualStartDimension < 0) && (major / sum > 0.95)) { residualStartDimension = ss + 1; break; } } // System.out.println("residualStartDim: "+residualStartDimension); m_threshold = computeThreshold(eigenValues, residualStartDimension); // check new data abnormal or not boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData); computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo if (bAbnormal) { // anomaly, now to diagnosis // check original space using all the lists diagnosis(eigenVectors, residualStartDimension, newData); } } } catch (Exception exc) { } return true; }
From source file:adams.flow.transformer.WekaAttributeSelection.java
License:Open Source License
/** * Executes the flow item.// w ww.j a v a2 s . c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instances reduced; Instances transformed; AttributeSelection eval; boolean crossValidate; int fold; Instances train; WekaAttributeSelectionContainer cont; SpreadSheet stats; int i; Row row; int[] selected; double[][] ranked; Range range; String rangeStr; boolean useReduced; result = null; try { if (m_InputToken.getPayload() instanceof Instances) data = (Instances) m_InputToken.getPayload(); else data = (Instances) ((WekaTrainTestSetContainer) m_InputToken.getPayload()) .getValue(WekaTrainTestSetContainer.VALUE_TRAIN); if (result == null) { crossValidate = (m_Folds >= 2); // setup evaluation eval = new AttributeSelection(); eval.setEvaluator(m_Evaluator); eval.setSearch(m_Search); eval.setFolds(m_Folds); eval.setSeed((int) m_Seed); eval.setXval(crossValidate); // select attributes if (crossValidate) { Random random = new Random(m_Seed); data = new Instances(data); data.randomize(random); if ((data.classIndex() > -1) && data.classAttribute().isNominal()) { if (isLoggingEnabled()) getLogger().info("Stratifying instances..."); data.stratify(m_Folds); } for (fold = 0; fold < m_Folds; fold++) { if (isLoggingEnabled()) getLogger().info("Creating splits for fold " + (fold + 1) + "..."); train = data.trainCV(m_Folds, fold, random); if (isLoggingEnabled()) getLogger().info("Selecting attributes using all but fold " + (fold + 1) + "..."); eval.selectAttributesCVSplit(train); } } else { eval.SelectAttributes(data); } // generate reduced/transformed dataset reduced = null; transformed = null; if (!crossValidate) { reduced = eval.reduceDimensionality(data); if (m_Evaluator instanceof AttributeTransformer) transformed = ((AttributeTransformer) m_Evaluator).transformedData(data); } // generated stats stats = null; if (!crossValidate) { stats = new DefaultSpreadSheet(); row = stats.getHeaderRow(); useReduced = false; if (m_Search instanceof RankedOutputSearch) { i = reduced.numAttributes(); if (reduced.classIndex() > -1) i--; ranked = eval.rankedAttributes(); useReduced = (ranked.length == i); } if (useReduced) { for (i = 0; i < reduced.numAttributes(); i++) row.addCell("" + i).setContent(reduced.attribute(i).name()); row = stats.addRow(); for (i = 0; i < reduced.numAttributes(); i++) row.addCell(i).setContent(0.0); } else { for (i = 0; i < data.numAttributes(); i++) row.addCell("" + i).setContent(data.attribute(i).name()); row = stats.addRow(); for (i = 0; i < data.numAttributes(); i++) row.addCell(i).setContent(0.0); } if (m_Search instanceof RankedOutputSearch) { ranked = eval.rankedAttributes(); for (i = 0; i < ranked.length; i++) row.getCell((int) ranked[i][0]).setContent(ranked[i][1]); } else { selected = eval.selectedAttributes(); for (i = 0; i < selected.length; i++) row.getCell(selected[i]).setContent(1.0); } } // selected attributes rangeStr = null; if (!crossValidate) { range = new Range(); range.setIndices(eval.selectedAttributes()); rangeStr = range.getRange(); } // setup container if (crossValidate) cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, m_Seed, m_Folds); else cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, stats, rangeStr); m_OutputToken = new Token(cont); } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process data:", e); } return result; }
From source file:ca.uottawa.balie.WekaAttributeSelection.java
License:Open Source License
/** * Select the top attributes/*from w ww.j a va2s. c om*/ */ public void Select(boolean pi_Debug) { Instances insts = m_DummyLearner.GetTrainInstances(); try { ASEvaluation eval = null; ASSearch search = null; if (m_Evaluator == WEKA_CHI_SQUARE) { eval = new ChiSquaredAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_INFO_GAIN) { eval = new InfoGainAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_WRAPPER) { eval = new ClassifierSubsetEval(); ((ClassifierSubsetEval) eval).setClassifier(new NaiveBayes()); search = new Ranker(); // TODO: use something else than ranker ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_SYM_UNCERT) { eval = new SymmetricalUncertAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_SVM) { eval = new SVMAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_RELIEF) { eval = new ReliefFAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } else if (m_Evaluator == WEKA_ONER) { eval = new OneRAttributeEval(); search = new Ranker(); ((Ranker) search).setNumToSelect(m_NumAttributes); } m_AttributeSelection = new AttributeSelection(); m_AttributeSelection.setEvaluator(eval); m_AttributeSelection.setSearch(search); m_AttributeSelection.SelectAttributes(insts); if (pi_Debug) System.out.println(m_AttributeSelection.toResultsString()); } catch (Exception e) { System.err.println(e.getMessage()); } }
From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java
License:Apache License
private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception { Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1]; IntStream.range(0, counts.length).forEach(val -> counts[val] = 0); for (Instances traindata : traindataSet) { J48 decisionTree = new J48(); decisionTree.buildClassifier(traindata); int k = 0; for (int j = 0; j < traindata.numAttributes(); j++) { if (j != traindata.classIndex()) { if (decisionTree.toString().contains(traindata.attribute(j).name())) { counts[k] = counts[k] + 1; }// w w w . j a va2s . c o m k++; } } } int[] topkIndex = new int[counts.length]; IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val); SortUtils.quicksort(counts, topkIndex, true); // get CFSs for each training set List<Set<Integer>> cfsSets = new LinkedList<>(); for (Instances traindata : traindataSet) { boolean selectionSuccessful = false; boolean secondAttempt = false; Instances traindataCopy = null; do { try { if (secondAttempt) { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindataCopy); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } else { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindata); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } } catch (IllegalArgumentException e) { String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(e.getMessage()); if (!m.find()) { // cannot treat problem, rethrow exception throw e; } String attributeName = m.group(1); int attrIndex = traindata.attribute(attributeName).index(); if (secondAttempt) { traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex); } else { traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); } Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training"); secondAttempt = true; continue; } } while (!selectionSuccessful); // dummy loop for internal continue } double[] coverages = new double[topkIndex.length]; for (Set<Integer> cfsSet : cfsSets) { Set<Integer> topkSet = new HashSet<>(); for (int k = 0; k < topkIndex.length; k++) { topkSet.add(topkIndex[k]); coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size()); } } double bestCoverageValue = Double.MIN_VALUE; int bestCoverageIndex = 0; for (int i = 0; i < coverages.length; i++) { if (coverages[i] > bestCoverageValue) { bestCoverageValue = coverages[i]; bestCoverageIndex = i; } } // build correlation matrix SpearmansCorrelation corr = new SpearmansCorrelation(); double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex]; for (Instances traindata : traindataSet) { double[][] vectors = new double[bestCoverageIndex][traindata.size()]; for (int i = 0; i < traindata.size(); i++) { for (int j = 0; j < bestCoverageIndex; j++) { vectors[j][i] = traindata.get(i).value(topkIndex[j]); } } for (int j = 0; j < bestCoverageIndex; j++) { for (int k = j + 1; k < bestCoverageIndex; k++) { correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k])); } } } Set<Integer> topkSetIndexSet = new TreeSet<>(); // j<30 ensures that the computational time does not explode since the powerset is 2^n in // complexity for (int j = 0; j < bestCoverageIndex && j < 30; j++) { topkSetIndexSet.add(j); } Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet); double bestOptCoverage = Double.MIN_VALUE; Set<Integer> opttopkSetIndexSet = null; for (Set<Integer> combination : allCombinations) { if (isUncorrelated(correlationMatrix, combination)) { double currentCoverage = 0.0; Set<Integer> topkCombination = new TreeSet<>(); for (Integer index : combination) { topkCombination.add(topkIndex[index]); } for (Set<Integer> cfsSet : cfsSets) { currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size()); } if (currentCoverage > bestOptCoverage) { bestOptCoverage = currentCoverage; opttopkSetIndexSet = combination; } } } Set<Integer> opttopkIndex = new TreeSet<>(); for (Integer index : opttopkSetIndexSet) { opttopkIndex.add(topkIndex[index]); } Console.traceln(Level.FINE, "selected the following metrics:"); for (Integer index : opttopkIndex) { Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name()); } // finally remove attributes for (int j = testdata.numAttributes() - 1; j >= 0; j--) { if (j != testdata.classIndex() && !opttopkIndex.contains(j)) { testdata.deleteAttributeAt(j); for (Instances traindata : traindataSet) { traindata.deleteAttributeAt(j); } } } }