acromusashi.stream.ml.anomaly.lof.LofCalculator.java Source code

Java tutorial

Introduction

Here is the source code for acromusashi.stream.ml.anomaly.lof.LofCalculator.java

Source

/**
* Copyright (c) Acroquest Technology Co, Ltd. All Rights Reserved.
* Please read the associated COPYRIGHTS file for more details.
*
* THE SOFTWARE IS PROVIDED BY Acroquest Technolog Co., Ltd.,
* WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
* BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDER BE LIABLE FOR ANY
* CLAIM, DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING
* OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*/
package acromusashi.stream.ml.anomaly.lof;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections.ComparatorUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.math.util.MathUtils;

import acromusashi.stream.ml.anomaly.lof.entity.DistanceResult;
import acromusashi.stream.ml.anomaly.lof.entity.DistanceResultComparator;
import acromusashi.stream.ml.anomaly.lof.entity.KDistanceResult;
import acromusashi.stream.ml.anomaly.lof.entity.LofDataSet;
import acromusashi.stream.ml.anomaly.lof.entity.LofPoint;
import acromusashi.stream.ml.anomaly.lof.entity.LofPointComparator;

/**
 * Local Outlier Factor??
 * 
 * @author kimura
 */
public class LofCalculator {
    /**
     * ????
     */
    private LofCalculator() {
    }

    /**
     * ??K??????????????<br>
     * ??????????<br>
     * 
     * @param kn K
     * @param targetPoint 
     * @param dataSet 
     * @return LOF
     */
    public static double calculateLofNoIntermediate(int kn, LofPoint targetPoint, LofDataSet dataSet) {
        // ?K??K???
        KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet);

        LofPoint tmpTargetPoint = targetPoint.deepCopy();
        tmpTargetPoint.setkDistance(kResult.getkDistance());
        tmpTargetPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor());

        // ????
        LofDataSet tmpDataSet = dataSet.deepCopy();
        initDataSet(kn, tmpDataSet);

        updateLrd(tmpTargetPoint, tmpDataSet);

        // ??
        double lof = calculateLof(tmpTargetPoint, tmpDataSet);
        return lof;
    }

    /**
     * ?????<br>
     * ???????????????????????????Id?
     * 
     * @param max ??
     * @param addedPoint 
     * @param dataSet 
     * @return ???Id???????null
     */
    public static String addPointToDataSet(int max, LofPoint addedPoint, LofDataSet dataSet) {
        // ??????????????
        boolean dateDelete = false;
        if (max <= dataSet.getDataIdList().size()) {
            dateDelete = true;
        }

        // ?ON???????
        String deleteId = null;
        if (dateDelete) {
            deleteId = dataSet.getDataIdList().get(0);
            dataSet.deleteData(deleteId);
        }

        dataSet.addData(addedPoint);

        return deleteId;
    }

    /**
     * ??K????<br>
     * ??????????<br>
     * ????????????????
     * 
     * @param kn K
     * @param targetPoint 
     * @param dataSet 
     * @return LOF
     */
    public static double calculateLofWithoutUpdate(int kn, LofPoint targetPoint, LofDataSet dataSet) {
        // ?K??K???
        KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet);

        LofPoint tmpPoint = targetPoint.deepCopy();
        tmpPoint.setkDistance(kResult.getkDistance());
        tmpPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor());

        updateLrd(tmpPoint, dataSet);

        // ??
        double lof = calculateLof(tmpPoint, dataSet);
        return lof;
    }

    /**
     * ??K????<br>
     * ????????????????????????
     * 
     * @param kn K
     * @param max ??
     * @param addedPoint 
     * @param dataSet 
     * @return LOF
     */
    public static double calculateLofWithUpdate(int kn, int max, LofPoint addedPoint, LofDataSet dataSet) {
        // ??????????????
        String deleteId = addPointToDataSet(max, addedPoint, dataSet);

        // K??K??????????
        Set<String> updateTargets = generateUpdateTargets(addedPoint, dataSet, deleteId);

        Collection<LofPoint> targetList = dataSet.getDataMap().values();
        // K??K??????????????2????
        for (LofPoint targetPoint : targetList) {
            if (updateTargets.contains(targetPoint.getDataId())) {
                // ?K??K???
                updateKDistance(kn, targetPoint, dataSet);
            }
        }

        // K?????K?????K????????/???
        // ???????????/????????????? 
        for (LofPoint targetPoint : targetList) {
            // ????
            updateLrd(targetPoint, dataSet);
        }

        // ??addedPoint???K??K?????????????????????
        double lof = calculateLof(addedPoint, dataSet);
        return lof;
    }

    /**
     * ??K?????<br>
     * ??????
    * <ol>
    * <li>K?</li>
    * <li>K???ID</li>
    * <li>??</li>
    * </ol>
     * 
     * @param kn K
     * @param dataSet 
     */
    public static void initDataSet(int kn, LofDataSet dataSet) {
        Collection<LofPoint> pointList = dataSet.getDataMap().values();
        // K??K??????????????2????
        for (LofPoint targetPoint : pointList) {
            // ?K??K???
            updateKDistance(kn, targetPoint, dataSet);
        }

        for (LofPoint targetPoint : pointList) {
            // ????
            updateLrd(targetPoint, dataSet);
        }
    }

    /**
     * ??<br>
     * ?????????????{@link #initDataSet(int, LofDataSet)}???
     * 
     * @param baseDataSet ?
     * @param targetDataSet ?
     * @param max ??
     * @return ?
     */
    public static LofDataSet mergeDataSet(LofDataSet baseDataSet, LofDataSet targetDataSet, int max) {
        Collection<LofPoint> basePointList = baseDataSet.getDataMap().values();
        Collection<LofPoint> targetPointList = targetDataSet.getDataMap().values();

        // LOF??????
        List<LofPoint> mergedList = new ArrayList<>();
        mergedList.addAll(basePointList);
        mergedList.addAll(targetPointList);
        Collections.sort(mergedList, new LofPointComparator());

        // ?????????
        Collections.reverse(mergedList);

        // ?????????
        // ??????????????ID??????
        // ??????ID?????????????????
        Set<String> registeredId = new HashSet<>();
        int addedCount = 0;
        LofDataSet resultDataSet = new LofDataSet();

        for (LofPoint targetPoint : mergedList) {
            if (registeredId.contains(targetPoint.getDataId()) == true) {
                continue;
            }

            registeredId.add(targetPoint.getDataId());
            resultDataSet.addData(targetPoint);
            addedCount++;

            if (addedCount >= max) {
                break;
            }
        }

        return resultDataSet;
    }

    /**
     * ?????DataId???<br>
     * ????????????????
     * <ol>
     * <li>??K????</li>
     * <li>????K????</li>
     * </ol>
     * 
     * @param addedPoint 
     * @param dataSet 
     * @param deleteId ?Id
     * @return ?????DataId?
     */
    protected static Set<String> generateUpdateTargets(LofPoint addedPoint, LofDataSet dataSet, String deleteId) {
        Set<String> updateTargets = new HashSet<>();
        // ??????????????K??K??????????
        // ????????????????????
        // 1.??K????
        // 2.??????K????
        updateTargets.add(addedPoint.getDataId());
        Collection<LofPoint> pointList = dataSet.getDataMap().values();
        for (LofPoint targetPoint : pointList) {
            boolean isDeteted = false;
            boolean kDistUpdate = false;

            // ???
            if (StringUtils.equals(addedPoint.getDataId(), targetPoint.getDataId()) == true) {
                continue;
            }

            // 1.??K?????
            // K?2030?????????List??contains?????????
            if (deleteId != null && targetPoint.getkDistanceNeighbor().contains(deleteId) == true) {
                isDeteted = true;
            }

            // 2.??????K?????
            if (MathUtils.distance(addedPoint.getDataPoint(), targetPoint.getDataPoint()) < targetPoint
                    .getkDistance()) {
                kDistUpdate = true;
            }

            if (isDeteted || kDistUpdate) {
                updateTargets.add(targetPoint.getDataId());
            }
        }

        return updateTargets;
    }

    /**
     * ????
     * 
     * @param targetPoint 
     * @param dataSet 
     */
    protected static void updateLrd(LofPoint targetPoint, LofDataSet dataSet) {
        // ????
        double lrd = calculateLrd(targetPoint, dataSet);
        targetPoint.setLrd(lrd);
    }

    /**
     * ?K??K???ID?
     * 
     * @param kn K
     * @param targetPoint 
     * @param dataSet 
     */
    protected static void updateKDistance(int kn, LofPoint targetPoint, LofDataSet dataSet) {
        // ?K??K???
        KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet);
        targetPoint.setkDistance(kResult.getkDistance());
        targetPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor());
    }

    /**
     * ??K???K??K???ID?
     * 
     * @param kn K
     * @param targetPoint 
     * @param dataSet 
     * @return K??K???ID?????K??????????????????
     */
    protected static KDistanceResult calculateKDistance(int kn, LofPoint targetPoint, LofDataSet dataSet) {
        // ????
        List<DistanceResult> distances = calculateDistances(targetPoint, dataSet);

        // ?????K??K???ID
        KDistanceResult kResult = calculateKDistance(kn, distances);
        return kResult;
    }

    /**
     * basePoint?targetPoint?????(Reachability distance)?
     * 
     * @param basePoint 
     * @param targetPoint 
     * @return ???
     */
    protected static double calculateReachDistance(LofPoint basePoint, LofPoint targetPoint) {
        double distance = MathUtils.distance(basePoint.getDataPoint(), targetPoint.getDataPoint());

        double reachDistance = (double) ComparatorUtils.max(distance, targetPoint.getkDistance(),
                ComparatorUtils.NATURAL_COMPARATOR);
        return reachDistance;
    }

    /**
     * basePoint????(Local reachability density)?
     * 
     * @param basePoint 
     * @param dataSet 
     * @return ??
     */
    protected static double calculateLrd(LofPoint basePoint, LofDataSet dataSet) {
        int countedData = 0;
        double totalAmount = 0.0d;

        for (String targetDataId : basePoint.getkDistanceNeighbor()) {
            LofPoint targetPoint = dataSet.getDataMap().get(targetDataId);

            if (targetPoint == null) {
                continue;
            }

            double reachDist = calculateReachDistance(basePoint, targetPoint);
            totalAmount = totalAmount + reachDist;
            countedData++;
        }

        if (totalAmount == 0.0d) {
            return totalAmount;
        }

        return (countedData) / totalAmount;
    }

    /**
     * basePoint?(Local outlier factor)?
     * 
     * @param basePoint 
     * @param dataSet 
     * @return 
     */
    protected static double calculateLof(LofPoint basePoint, LofDataSet dataSet) {
        int countedData = 0;
        double totalAmount = 0.0d;
        for (String targetDataId : basePoint.getkDistanceNeighbor()) {
            LofPoint targetPoint = dataSet.getDataMap().get(targetDataId);

            totalAmount = totalAmount + (targetPoint.getLrd() / basePoint.getLrd());
            countedData++;
        }

        if (countedData == 0) {
            return totalAmount;
        }

        return totalAmount / (countedData);
    }

    /**
     * ??????????????????
     * 
     * @param targetPoint 
     * @param dataSet 
     * @return ???ID??????
     */
    protected static List<DistanceResult> calculateDistances(LofPoint targetPoint, LofDataSet dataSet) {
        List<DistanceResult> distances = new ArrayList<>();
        for (Map.Entry<String, LofPoint> targetEntry : dataSet.getDataMap().entrySet()) {
            // ????
            if (StringUtils.equals(targetEntry.getKey(), targetPoint.getDataId()) == true) {
                continue;
            }

            double distance = MathUtils.distance(targetEntry.getValue().getDataPoint(), targetPoint.getDataPoint());

            DistanceResult result = new DistanceResult(targetEntry.getKey(), distance);

            // ?????
            distances.add(result);
        }

        Collections.sort(distances, new DistanceResultComparator());
        return distances;
    }

    /**
     * ???K??K???ID?
     * 
     * @param kn K
     * @param distances ??
     * @return K??K???ID?????K??????????????????
     */
    protected static KDistanceResult calculateKDistance(int kn, List<DistanceResult> distances) {
        // ?????K??K???ID
        int countedDataNum = 0;
        List<String> idList = new ArrayList<>();
        double nowDistance = 0.0d;

        // K???ID?????
        for (DistanceResult distanceResult : distances) {
            nowDistance = distanceResult.getDistance();
            idList.add(distanceResult.getDataId());
            countedDataNum++;

            if (kn <= countedDataNum) {
                break;
            }
        }

        KDistanceResult kResult = new KDistanceResult();
        kResult.setkDistance(nowDistance);
        kResult.setkDistanceNeighbor(idList);
        return kResult;
    }
}