Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package DiversifyTopKShaepelet; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.awt.Point; import java.lang.Math; import java.util.Iterator; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Scanner; import java.util.TreeMap; import java.util.HashSet; import java.util.HashMap; import java.util.Map; import java.util.AbstractMap; import java.util.Comparator; import java.util.Random; import weka.associations.CARuleMiner; import weka.core.*; import weka.core.shapelet.*; import weka.filters.SimpleBatchFilter; /** * * @author sun */ public class DiversifyTopKShaepelet extends SimpleBatchFilter { @Override public String globalInfo() { throw new UnsupportedOperationException("Not supported yet."); } protected boolean supressOutput = false; protected int minShapeletLength; protected int maxShapeletLength; protected int numShapelets; protected boolean shapeletsTrained; protected ArrayList<LegacyShapelet> shapelets; protected String outputFileLocation = "defaultShapeletOutput.txt"; protected boolean recordShapelets = true; public static int DEFAULT_NUMSHAPELETS = 10; public static int DEFAULT_MINSHAPELETLENGTH = 3; public static int DEFAULT_MAXSHAPELETLENGTH = 30; public static HashMap<Integer, USAXElmentType> USAXMap = new HashMap<Integer, USAXElmentType>(); public static ArrayList<Map.Entry<Integer, Double>> scoreList = new ArrayList<>(); public static ArrayList<Dresult> DResultSet = new ArrayList<Dresult>(); protected QualityMeasures.ShapeletQualityMeasure qualityMeasure; protected QualityMeasures.ShapeletQualityChoice qualityChoice; protected boolean useCandidatePruning; protected int candidatePruningStartPercentage; protected static final double ROUNDING_ERROR_CORRECTION = 0.000000000000001; protected int[] dataSourceIDs; //Variables for experiments private static long subseqDistOpCount; /** * Default constructor; Quality measure defaults to information gain. */ public DiversifyTopKShaepelet() { this(DEFAULT_NUMSHAPELETS, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Single param constructor: Quality measure defaults to information gain. * * @param k the number of shapelets to be generated */ public DiversifyTopKShaepelet(int k) { this(k, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Full constructor to create a usable filter. Quality measure defaults to * information gain. * * @param k the number of shapelets to be generated * @param minShapeletLength minimum length of shapelets * @param maxShapeletLength maximum length of shapelets */ public DiversifyTopKShaepelet(int k, int minShapeletLength, int maxShapeletLength) { this(k, minShapeletLength, maxShapeletLength, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Full, exhaustive, constructor for a filter. Quality measure set via enum, * invalid selection defaults to information gain. * * @param k the number of shapelets to be generated * @param minShapeletLength minimum length of shapelets * @param maxShapeletLength maximum length of shapelets * @param qualityChoice the shapelet quality measure to be used with this * filter */ public DiversifyTopKShaepelet(int k, int minShapeletLength, int maxShapeletLength, weka.core.shapelet.QualityMeasures.ShapeletQualityChoice qualityChoice) { this.minShapeletLength = minShapeletLength; this.maxShapeletLength = maxShapeletLength; this.numShapelets = k; this.shapelets = new ArrayList<>(); this.shapeletsTrained = false; this.useCandidatePruning = false; this.qualityChoice = qualityChoice; switch (qualityChoice) { case F_STAT: this.qualityMeasure = new QualityMeasures.FStat(); break; case KRUSKALL_WALLIS: this.qualityMeasure = new QualityMeasures.KruskalWallis(); break; case MOODS_MEDIAN: this.qualityMeasure = new QualityMeasures.MoodsMedian(); break; default: this.qualityMeasure = new QualityMeasures.InformationGain(); } } /** * Supresses filter output to the console; useful when running timing * experiments. */ public void supressOutput() { this.supressOutput = true; } /** * Use candidate pruning technique when checking candidate quality. This * speeds up the transform processing time. */ public void useCandidatePruning() { this.useCandidatePruning = true; this.candidatePruningStartPercentage = 10; } /** * * @param f */ public void setCandidatePruning(boolean f) { this.useCandidatePruning = f; if (f) { this.candidatePruningStartPercentage = 10; } else //Not necessary { this.candidatePruningStartPercentage = 100; } } /** * Use candidate pruning technique when checking candidate quality. This * speeds up the transform processing time. * * @param percentage the percentage of data to be precocessed before pruning * is initiated. In most cases the higher the percentage the less effective * pruning becomes */ public void useCandidatePruning(int percentage) { this.useCandidatePruning = true; this.candidatePruningStartPercentage = percentage; } /** * Mutator method to set the number of shapelets to be stored by the filter. * * @param k the number of shapelets to be generated */ public void setNumberOfShapelets(int k) { this.numShapelets = k; } /** * * @return */ public int getNumberOfShapelets() { return numShapelets; } /** * Mutator method to set the minimum and maximum shapelet lengths for the * filter. * * @param minShapeletLength minimum length of shapelets * @param maxShapeletLength maximum length of shapelets */ public void setShapeletMinAndMax(int minShapeletLength, int maxShapeletLength) { this.minShapeletLength = minShapeletLength; this.maxShapeletLength = maxShapeletLength; } /** * Mutator method to set the quality measure used by the filter. As with * constructors, default selection is information gain unless another valid * selection is specified. * * @return */ public QualityMeasures.ShapeletQualityChoice getQualityMeasure() { return qualityChoice; } /** * * @param qualityChoice */ public void setQualityMeasure(QualityMeasures.ShapeletQualityChoice qualityChoice) { this.qualityChoice = qualityChoice; switch (qualityChoice) { case F_STAT: this.qualityMeasure = new QualityMeasures.FStat(); break; case KRUSKALL_WALLIS: this.qualityMeasure = new QualityMeasures.KruskalWallis(); break; case MOODS_MEDIAN: this.qualityMeasure = new QualityMeasures.MoodsMedian(); break; default: this.qualityMeasure = new QualityMeasures.InformationGain(); } } /** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format * @throws Exception if all required parameters of the filter are not * initialised correctly */ @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (this.numShapelets < 1) { throw new Exception( "ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1"); } //Set up instances size and format. //int length = this.numShapelets; int length = this.shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; } @Override public Instances process(Instances data) throws Exception { if (this.numShapelets < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); } int maxPossibleLength = data.instance(0).numAttributes() - 1; if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } if (this.minShapeletLength < 1 || this.maxShapeletLength < 1 || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) { throw new Exception("Shapelet length parameters initialised incorrectly"); } //Sort data in round robin order dataSourceIDs = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { dataSourceIDs[i] = i; } // data = roundRobinData(data, dataSourceIDs); if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength, this.maxShapeletLength); // get k shapelets ATTENTION this.shapeletsTrained = true; if (!supressOutput) { System.out.println(shapelets.size() + " Shapelets have been generated"); } } Instances output = determineOutputFormat(data); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(this.shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : this.shapelets) { double dist = subseqDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(this.shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; } /** * Set file path for the filter log. Filter log includes shapelet quality, * seriesId, startPosition, and content for each shapelet. * * @param fileName the updated file path of the filter log */ public void setLogOutputFile(String fileName) { this.recordShapelets = true; this.outputFileLocation = fileName; } /** * Turns off log saving; useful for timing experiments where speed is * essential. */ public void turnOffLog() { this.recordShapelets = false; } public ArrayList<LegacyShapelet> findDiversityTopKShapelets(int numShapelets, Instances data, int minShaepeletLength, int maxShapeletLength) throws Exception { ArrayList<LegacyShapelet> kShapelets = new ArrayList<LegacyShapelet>(); //store up to k shapeles overall ArrayList<LegacyShapelet> tempKShapelets; //store temporary k shapelets each iteration ArrayList<LegacyShapelet> seriesShapelets = new ArrayList<LegacyShapelet>(); //store all temporary k shapelets each itreration for diversifying process int saxLENGTH = 15; int w = 4; int R = 10; double percentMask = 0.25; int topK = 10; TreeMap<Double, Integer> classDistributions = getClassDistributions(data); //calc info gain//calc info gain//calc info gain//calc info gain int numClass = classDistributions.size(); if (!supressOutput) { System.out.println("Processing data: "); } int numInstances = data.numInstances(); for (int length = minShaepeletLength; length <= maxShapeletLength; length++) { createSAXList(length, saxLENGTH, w, data); randomProjection(R, percentMask, saxLENGTH); scoreAllSAX(R, numClass, data); tempKShapelets = findBestTopKSAX(length, topK, data, numClass); for (int i = 0; i < tempKShapelets.size(); i++) { seriesShapelets.add(tempKShapelets.get(i)); } USAXMap.clear(); scoreList.clear(); } ArrayList<GraphNode> Graph = new ArrayList<GraphNode>(); Graph = constructShapeletGraph(seriesShapelets, data); kShapelets = DiversifyTopKQuery(Graph, numShapelets); return kShapelets; // return seriesShapelets; } public ArrayList<LegacyShapelet> DiversifyTopKQuery(ArrayList<GraphNode> graph, int k) { ArrayList<Dresult> resultsList = new ArrayList<Dresult>(); resultsList = divAstar(graph, k); for (int i = k; i <= 1; i--) { if (resultsList.get(i).resultShapelets.size() == k) { return resultsList.get(i).resultShapelets; } } return null; } public ArrayList<LegacyShapelet> findBestTopKSAX(int subsequenceLength, int top_k, Instances data, int numClass) { int numObject = data.numInstances(); ArrayList<Point> Dist = new ArrayList<>(numObject); int word; int kk; double gain, distanceThreshold, gap; int qObject, qPosition; USAXElmentType usax; TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain double[] candidate = new double[subsequenceLength]; ArrayList<LegacyShapelet> shapelets = new ArrayList<LegacyShapelet>(); if (top_k > 0) { Collections.sort(scoreList, new Comparator<Map.Entry<Integer, Double>>() { @Override public int compare(Map.Entry<Integer, Double> a, Map.Entry<Integer, Double> b) { return ((Double) b.getValue()).compareTo((Double) a.getValue()); } }); } for (int k = 0; k < Math.min(top_k, (int) scoreList.size()); k++) { word = scoreList.get(k).getKey(); usax = USAXMap.get(word); for (kk = 0; kk < Math.min((int) usax.SAXIdArrayList.size(), 1); kk++) { qObject = usax.SAXIdArrayList.get(kk).x; qPosition = usax.SAXIdArrayList.get(kk).y; for (int i = 0; i < subsequenceLength; i++) { candidate[i] = data.instance(qObject).value(qPosition + i); } candidate = zNorm(candidate, false); LegacyShapelet candidateShapelet = checkCandidate(candidate, data, qObject, qPosition, classDistributions, null); shapelets.add(candidateShapelet); } } return shapelets; } public int sortScore(Map.Entry a, Map.Entry b) { return ((Double) a.getValue()).compareTo((Double) b.getValue()); } public ArrayList<Dresult> divAstar(ArrayList<GraphNode> G, int k) { MaxHeap<Entry> H = new MaxHeap<Entry>(); H.insert(new Entry()); for (int i = 0; i < 1110; i++) { Dresult d = new Dresult(); d.score = -1; DResultSet.add(d); } // for (int j = k; j >= 1; j--) { AStarSearch(G, H, k); // ArrayList<Entry> arrayEntrys = H.getArray(); // for (int m = 0; m < H.getCurrentSize(); m++) { // Entry entry=arrayEntrys.get(m); // // if(entry==null) continue; // // double bound = AstarBound(G,entry , k); // entry.setBound(bound); // H.update(arrayEntrys, m, entry); // } // } return DResultSet; } public void AStarSearch(ArrayList<GraphNode> G, MaxHeap<Entry> H, int k) { while ((!H.isEmpty()) && H.getMax().getBound() > maxDresultSet(DResultSet)) { Entry e = new Entry(); e = H.deleteMax(); for (int i = e.pos + 1; i < G.size(); i++) { if (!andSet(G.get(i).getAdjShapelets(), e.solution)) { Entry e_ = new Entry(); e_.solution = e.solution; e_.solution.add(G.get(i).getVertexShapelet()); e_.pos = i; e_.score = e.score + G.get(i).getVertexShapelet().qualityValue; e_.bound = AstarBound(G, e_, k); H.insert(e_); if (DResultSet.get(e_.solution.size()).score < e_.score) { DResultSet.get(e_.solution.size()).resultShapelets = e_.solution; DResultSet.get(e_.solution.size()).score = e_.score; } } } } } public double AstarBound(ArrayList<GraphNode> G, Entry e, int k) { int p, i; double bound; p = e.solution.size(); i = e.pos + 1; bound = e.score; while (p < k && i < G.size()) { if (!andSet(G.get(i).getAdjShapelets(), e.solution)) { bound = bound + G.get(i).getVertexShapelet().qualityValue; p = p + 1; } i = i + 1; } return bound; } public double maxDresultSet(ArrayList<Dresult> dresultSet) { double max = -2; for (int i = 0; i < dresultSet.size(); i++) { if (max < dresultSet.get(i).score) { max = dresultSet.get(i).score; } } return max; } public boolean andSet(ArrayList<LegacyShapelet> a, ArrayList<LegacyShapelet> b) { if (a == null || b == null) { return false; } for (int i = 0; i < a.size(); i++) { for (int j = 0; j < b.size(); j++) { if (a.get(i) == b.get(j)) { return true; } } } return false; } //shapelet??shapelets??? public ArrayList<GraphNode> constructShapeletGraph(ArrayList<LegacyShapelet> seriesShapelets, Instances data) { ArrayList<GraphNode> Graph = new ArrayList<GraphNode>(); Collections.sort(seriesShapelets); //??? for (int i = 0; i < seriesShapelets.size(); i++) { GraphNode node = new GraphNode(); node.setVertexShapelet(seriesShapelets.get(i)); Graph.add(node); } for (int i = 0; i < seriesShapelets.size(); i++) { for (int j = i + 1; j < seriesShapelets.size(); j++) { if (seriesShapelets.get(i).isSimilar(seriesShapelets.get(j), data)) { if (Graph.get(i).getAdjShapelets() == null) { ArrayList<LegacyShapelet> adjecentShapelets = new ArrayList<LegacyShapelet>(); adjecentShapelets.add(seriesShapelets.get(j)); Graph.get(i).setAdjShapelet(adjecentShapelets); } else { Graph.get(i).getAdjShapelets().add(seriesShapelets.get(j)); } if (Graph.get(j).getAdjShapelets() == null) { ArrayList<LegacyShapelet> adjecentShapelets = new ArrayList<LegacyShapelet>(); adjecentShapelets.add(seriesShapelets.get(i)); Graph.get(j).setAdjShapelet(adjecentShapelets); } else { Graph.get(j).getAdjShapelets().add(seriesShapelets.get(i)); } } } } return Graph; } protected void createSAXList(int subsequenceLength, int saxLength, int w, Instances data) { w = (int) Math.ceil((double) subsequenceLength / saxLength); saxLength = (int) Math.ceil((double) subsequenceLength / w); double ex, ex2, mean, std; double[] sumSegment = new double[saxLength]; //sumsegment?? int[] elementSegment = new int[saxLength]; int j, jSt, k, slot, objectId; double dataPoint; int word, previousWord; for (k = 0; k < saxLength; k++) { elementSegment[k] = w; } elementSegment[saxLength - 1] = subsequenceLength - (saxLength - 1) * w; // w for (objectId = 0; objectId < data.numInstances(); objectId++) { ex = ex2 = 0; previousWord = -1; for (k = 0; k < saxLength; k++) { sumSegment[k] = 0; } double[] timeSeriesObject = data.instance(objectId).toDoubleArray(); //case 1: Initial for (j = 0; (j < timeSeriesObject.length - 1) && (j < subsequenceLength); j++) { dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; slot = (int) Math.floor(j / w); //slotw? sumSegment[slot] += dataPoint; // } //case 2: slightly update for (j = j; j <= timeSeriesObject.length - 1; j++) { jSt = j - subsequenceLength; mean = ex / subsequenceLength; std = Math.sqrt(ex2 / subsequenceLength - mean * mean); //create SAX from sumSegment word = createSAXWord(sumSegment, elementSegment, mean, std, saxLength); if (word != previousWord) { previousWord = word; if (!(USAXMap.containsKey(word))) { USAXMap.put(word, null); USAXElmentType usax = new USAXElmentType(); usax.objectHashSet.add(objectId); usax.SAXIdArrayList.add(new Point(objectId, jSt)); USAXMap.put(word, usax); } else { USAXMap.get(word).objectHashSet.add(objectId); USAXMap.get(word).SAXIdArrayList.add(new Point(objectId, jSt)); //////// } } /// for next updata if (j < timeSeriesObject.length - 1) { ex -= timeSeriesObject[jSt]; ex2 -= timeSeriesObject[jSt] * timeSeriesObject[jSt]; for (k = 0; k < saxLength - 1; k++) { sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + (k + 1) * w]; } sumSegment[k] -= timeSeriesObject[jSt + k * w]; sumSegment[k] += timeSeriesObject[jSt + Math.min((k + 1) * w, subsequenceLength)]; dataPoint = timeSeriesObject[j]; ex += dataPoint; ex2 += dataPoint * dataPoint; } } } } protected int createSAXWord(double[] sumSegment, int[] eleSegment, double mean, double std, int saxLength) { int word = 0, val = 0; double d = 0; for (int i = 0; i < saxLength; i++) { d = (sumSegment[i] / eleSegment[i] - mean) / std; if (d < 0) { if (d < -0.67) { val = 0; } else { val = 1; } } else if (d < 0.67) { val = 2; } else { val = 3; } word = (word << 2) | (val); } return word; } protected int createMaskWord(int numMask, int wordLength) { int a, b; a = 0; for (int i = 0; i < numMask; i++) { Random random = new Random(); b = 1 << (random.nextInt(wordLength)); a = a | b; } return a; } protected void randomProjection(int R, double percentMask, int saxLength) { HashMap<Integer, HashSet<Integer>> hashMarkMap = new HashMap<>(); Iterator it = USAXMap.entrySet().iterator(); int word, maskWord, newWord; HashSet<Integer> objectSet; int numMask = (int) Math.ceil(percentMask * saxLength); for (int r = 0; r < R; r++) { maskWord = createMaskWord(numMask, saxLength); /// random projection and mark non-duplicate boject for (Map.Entry<Integer, USAXElmentType> entrySet : USAXMap.entrySet()) { word = entrySet.getKey(); objectSet = entrySet.getValue().objectHashSet; newWord = word | maskWord; if (!(hashMarkMap.containsKey(newWord))) { HashSet<Integer> temp = new HashSet<>(); temp.addAll(objectSet); hashMarkMap.put(newWord, temp); } else { hashMarkMap.get(newWord).addAll(objectSet); } } /// hash again for keep the count for (Map.Entry<Integer, USAXElmentType> entrySet : USAXMap.entrySet()) { word = entrySet.getKey(); newWord = word | maskWord; objectSet = hashMarkMap.get(newWord); Iterator objIt = objectSet.iterator(); while (objIt.hasNext()) { int mappedValue = (Integer) objIt.next(); if (entrySet.getValue().objectCountHashMap.containsKey(mappedValue)) { int temp = entrySet.getValue().objectCountHashMap.get(mappedValue); temp++; entrySet.getValue().objectCountHashMap.put(mappedValue, temp); } else { entrySet.getValue().objectCountHashMap.put(mappedValue, 1); } } } hashMarkMap.clear(); } } public void scoreAllSAX(int R, int numClass, Instances data) { Iterator it = USAXMap.entrySet().iterator(); int word; double score; USAXElmentType usax; while (it.hasNext()) { Map.Entry entry = (Map.Entry) it.next(); word = (Integer) entry.getKey(); usax = (USAXElmentType) entry.getValue(); score = calcScore(usax, R, numClass, data); Map.Entry<Integer, Double> tempPair = new AbstractMap.SimpleEntry<>(word, score); scoreList.add(tempPair); } } public double calcScore(USAXElmentType usax, int R, int numClass, Instances data) { // double score = -1; int cid, count; Iterator objectIt = usax.getObjectCountHashMap().entrySet().iterator(); ArrayList<Double> cIn = new ArrayList<>(); ArrayList<Double> cOut = new ArrayList<>(); for (int i = 0; i < numClass; i++) { cIn.add(0.0); cOut.add(0.0); } while (objectIt.hasNext()) { Map.Entry entry = (Map.Entry) objectIt.next(); cid = (int) data.instance((int) entry.getKey()).classValue(); count = (int) entry.getValue(); cIn.set(cid, cIn.get(cid) + count); cOut.set(cid, cOut.get(cid) + (R - count)); } score = calScoreFromObjectCount(cIn, cOut, numClass); return score; } public double calScoreFromObjectCount(ArrayList<Double> cIn, ArrayList<Double> cOut, int numClass) { //2 classes only //return Math.abs((cIn.get(0)+cOut.get(1))-(cOut.get(0)+cIn.get(1))); //multi-class double diff, sum = 0, maxValue = -Double.MAX_VALUE, minValue = Double.MIN_VALUE; for (int i = 0; i < numClass; i++) { diff = cIn.get(i) - cOut.get(i); if (diff > maxValue) { maxValue = diff; } if (diff < minValue) { minValue = diff; } sum += Math.abs(diff); } return (sum - Math.abs(maxValue) - Math.abs(minValue) + Math.abs(maxValue - minValue)); } /** * protected method to check a candidate shapelet. Functions by passing in * the raw data, and returning an assessed ShapeletTransform object. * * @param candidate the data from the candidate ShapeletTransform * @param data the entire data set to compare the candidate to * @param seriesId series id from the dataset that the candidate came from * @param startPos start position in the series where the candidate came * from * @param classDistribution a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> to describe the dataset composition * @param qualityBound * @return a fully-computed ShapeletTransform, including the quality of this * candidate */ protected LegacyShapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, TreeMap classDistribution, QualityBound.ShapeletQualityBound qualityBound) { // create orderline by looping through data set and calculating the subsequence // distance from candidate to all data, inserting in order. ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>(); boolean pruned = false; for (int i = 0; i < data.numInstances(); i++) { //Check if it is possible to prune the candidate if (qualityBound != null) { if (qualityBound.pruneCandidate()) { pruned = true; break; } } double distance = 0.0; if (i != seriesId) { distance = subseqDistance(candidate, data.instance(i)); } double classVal = data.instance(i).classValue(); // without early abandon, it is faster to just add and sort at the end orderline.add(new OrderLineObj(distance, classVal)); //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure. if (qualityBound != null) { qualityBound.updateOrderLine(orderline.get(orderline.size() - 1)); } } // note: early abandon entropy pruning would appear here, but has been ommitted // in favour of a clear multi-class information gain calculation. Could be added in // this method in the future for speed up, but distance early abandon is more important //If shapelet is pruned then it should no longer be considered in further processing if (pruned) { return null; } else { // create a shapelet object to store all necessary info, i.e. LegacyShapelet shapelet = new LegacyShapelet(candidate, seriesId, startPos, this.qualityMeasure); shapelet.calculateQuality(orderline, classDistribution); shapelet.calcInfoGainAndThreshold(orderline, classDistribution); return shapelet; } } /** * Calculate the distance between a candidate series and an Instance object * * @param candidate a double[] representation of a shapelet candidate * @param timeSeriesIns an Instance object of a whole time series * @return the distance between a candidate and a time series */ protected double subseqDistance(double[] candidate, Instance timeSeriesIns) { return subsequenceDistance(candidate, timeSeriesIns); } /** * * @param candidate * @param timeSeriesIns * @return */ public static double subsequenceDistance(double[] candidate, Instance timeSeriesIns) { double[] timeSeries = timeSeriesIns.toDoubleArray(); return subsequenceDistance(candidate, timeSeries); } /** * Calculate the distance between a shapelet candidate and a full time * series (both double[]). * * @param candidate a double[] representation of a shapelet candidate * @param timeSeries a double[] representation of a whole time series (inc. * class value) * @return the distance between a candidate and a time series */ public static double subsequenceDistance(double[] candidate, double[] timeSeries) { double bestSum = Double.MAX_VALUE; double sum; double[] subseq; // for all possible subsequences of two for (int i = 0; i <= timeSeries.length - candidate.length - 1; i++) { sum = 0; // get subsequence of two that is the same lenght as one subseq = new double[candidate.length]; for (int j = i; j < i + candidate.length; j++) { subseq[j - i] = timeSeries[j]; //Keep count of fundamental ops for experiment subseqDistOpCount++; } subseq = zNormalise(subseq, false); // Z-NORM HERE //Keep count of fundamental ops for experiment subseqDistOpCount += 3 * subseq.length; for (int j = 0; j < candidate.length; j++) { sum += (candidate[j] - subseq[j]) * (candidate[j] - subseq[j]); //Keep count of fundamental ops for experiment subseqDistOpCount++; } if (sum < bestSum) { bestSum = sum; } } return (bestSum == 0.0) ? 0.0 : (1.0 / candidate.length * bestSum); } /** * * @param input * @param classValOn * @return */ protected double[] zNorm(double[] input, boolean classValOn) { return DiversifyTopKShaepelet.zNormalise(input, classValOn); } /** * Z-Normalise a time series * * @param input the input time series to be z-normalised * @param classValOn specify whether the time series includes a class value * (e.g. an full instance might, a candidate shapelet wouldn't) * @return a z-normalised version of input */ public static double[] zNormalise(double[] input, boolean classValOn) { double mean; double stdv; double classValPenalty = 0; if (classValOn) { classValPenalty = 1; } double[] output = new double[input.length]; double seriesTotal = 0; for (int i = 0; i < input.length - classValPenalty; i++) { seriesTotal += input[i]; } mean = seriesTotal / (input.length - classValPenalty); stdv = 0; for (int i = 0; i < input.length - classValPenalty; i++) { stdv += (input[i] - mean) * (input[i] - mean); } stdv = stdv / (input.length - classValPenalty); if (stdv < ROUNDING_ERROR_CORRECTION) { stdv = 0.0; } else { stdv = Math.sqrt(stdv); } for (int i = 0; i < input.length - classValPenalty; i++) { if (stdv == 0.0) { output[i] = 0.0; } else { output[i] = (input[i] - mean) / stdv; } } if (classValOn == true) { output[output.length - 1] = input[input.length - 1]; } return output; } /** * Private method to calculate the class distributions of a dataset. Main * purpose is for computing shapelet qualities. * * @param data the input data set that the class distributions are to be * derived from * @return a TreeMap<Double, Integer> in the form of * <Class Value, Frequency> */ public static TreeMap<Double, Integer> getClassDistributions(Instances data) { TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>(); double classValue; for (int i = 0; i < data.numInstances(); i++) { classValue = data.instance(i).classValue(); boolean classExists = false; for (Double d : classDistribution.keySet()) { if (d == classValue) { int temp = classDistribution.get(d); temp++; classDistribution.put(classValue, temp); classExists = true; } } if (classExists == false) { classDistribution.put(classValue, 1); } } return classDistribution; } /** * Load a set of Instances from an ARFF * * @param fileName the file name of the ARFF * @return a set of Instances from the ARFF */ public static Instances loadData(String fileName) { Instances data = null; try { FileReader r; r = new FileReader(fileName); data = new Instances(r); data.setClassIndex(data.numAttributes() - 1); } catch (Exception e) { System.out.println(" Error =" + e + " in method loadData"); e.printStackTrace(); } return data; } /** * An example use of a ShapeletTransform * * @param args command line args. arg[0] should spcify a set of training * instances to transform */ public static void main(String[] args) { try { // mandatory requirements: numShapelets (k), min shapelet length, max shapelet length, input data // additional information: log output dir // example filter, k = 10, minLength = 20, maxLength = 40, data = , output = exampleOutput.txt int k = 10; int minLength = 20; int maxLength = 4; // Instances data= ShapeletTransform.loadData("ItalyPowerDemand_TRAIN.arff"); // for example Instances data = DiversifyTopKShaepelet.loadData(args[0]); DiversifyTopKShaepelet dtks = new DiversifyTopKShaepelet(k, minLength, maxLength); dtks.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); dtks.setLogOutputFile("exampleOutput.txt"); // log file stores shapelet output // Note: sf.process returns a transformed set of Instances. The first time that // thisFilter.process(data) is called, shapelet extraction occurs. Subsequent calls to process // uses the previously extracted shapelets to transform the data. For example: // ArrayList<LegacyShapelet> finalShapelets = dtks.findDiversityTopKShapelets(k, data, minLength, maxLength); // Instances transformedTrain = sf.process(trainingData); -> extracts shapelets and can be used to transform training data // Instances transformedTest = sf.process(testData); -> uses shapelets extracted from trainingData to transform testData System.out.println("-------------------------shapelets---------------------\n"); for (int i = 0; i < finalShapelets.size(); i++) { System.out.println("" + i + "shapelets\n"); for (int j = 0; j < finalShapelets.get(i).content.length; j++) { System.out.println(finalShapelets.get(i).content[j]); } } } catch (Exception e) { e.printStackTrace(); } } }