Java tutorial
/* * Copyright 2013 SciFY NPO <info@scify.org>. * * This product is part of the NewSum Free Software. * For more information about NewSum visit * * http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * If this code or its output is used, extended, re-engineered, integrated, * or embedded to any extent in another software or hardware, there MUST be * an explicit attribution to this work in the resulting source code, * the packaging (where such packaging exists), or user interface * (where such an interface exists). * The attribution must be of the form "Powered by NewSum, SciFY" */ package org.scify.NewSumServer.Server.MachineLearning; import org.scify.NewSumServer.Server.MachineLearning.vector; import gr.demokritos.iit.jinsect.storage.INSECTDB; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import weka.core.Attribute; import weka.core.DenseInstance; import weka.core.Instances; import weka.core.Utils; /** * * @author panagiotis giotis * * Includes methods which creates the train and test dataset * */ public class dataSets { /** * Generate the train dataset * * @param file the path for the InsectDB file * @return the train dataset as Instance */ public static Instances trainingSet(INSECTDB file) { ArrayList<Attribute> atts; ArrayList<String> attVals = new ArrayList<String>(); ArrayList<String> vectors; Instances data; double[] vals; atts = new ArrayList<Attribute>(); // Set up attributes HashSet<String> hasGnames = new HashSet<String>(); //create a HashSet with all class graph names hasGnames.addAll(Arrays.asList(file.getObjectList("cg"))); for (String index : hasGnames) { // for each class graph name add a attribute atts.add(new Attribute(index)); attVals.add(index); } atts.add(new Attribute("Class", attVals)); // fill the attribute class with with given class graph name data = new Instances("train Set for Category Classification ", atts, 0);//create Instances object vectors = vector.trainingVector(file); // take all instance vectors // fill with data for (String vi : vectors) { // for each instance String[] vectorTable = vi.trim().split(","); vals = new double[data.numAttributes()]; for (int i = 0; i < vectorTable.length - 2; i++) { vals[i] = Double.parseDouble(vectorTable[i]); } vals[vectorTable.length - 1] = attVals.indexOf(vectorTable[vectorTable.length - 1]); //Class name data.add(new DenseInstance(1.0, vals)); // add data to Instance } return data; } /** * Generate the label dataset * * @param file path for insectDB file * @param ClassGname The name for current class * @param Ivector the similarity vector between given mail and all class * graphs * @return the label dataset as instance */ public static Instances labelingSet(INSECTDB file, String Ivector) { ArrayList<Attribute> atts; ArrayList<String> attVals = new ArrayList<String>(); Instances data; double[] vals; atts = new ArrayList<Attribute>(); // Set up attributes HashSet<String> hasGnames = new HashSet<String>(); // create a HashSet with all class graph names hasGnames.addAll(Arrays.asList(file.getObjectList("cg"))); for (String index : hasGnames) { atts.add(new Attribute(index)); attVals.add(index); } atts.add(new Attribute("Class", attVals)); // fill the attribute with the given class graph name data = new Instances("label Set for Category Classification ", atts, 0);//create Instances object //fill with data String[] vectorTable = Ivector.trim().split(","); vals = new double[data.numAttributes()]; int count = 0; for (String value : vectorTable) { //for each vector vals[count] = Double.parseDouble(value); count++; } vals[count] = Utils.missingValue(); //add missingValue in place for the class graph name data.add(new DenseInstance(1.0, vals)); // add data to Instance return data; } }