org.deidentifier.arx.ARFF2ARX.java Source code

Java tutorial

Introduction

Here is the source code for org.deidentifier.arx.ARFF2ARX.java

Source

/*
 * ARX: Weka Anonymization Filter
 * Copyright (C) 2014 Christian Windolf, Simon Koennecke, Andre Breitenfeld
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package org.deidentifier.arx;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

import org.deidentifier.arx.AttributeType.Hierarchy;

import cern.colt.Arrays;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;

/**
 * This class converts an ARFF instance to instance as input for ARX.
 *
 * @author Andre Breitenfeld
 * @author Simon Koennecke
 * @author Christian Windolf
 *
 */
public class ARFF2ARX {

    protected Instances instances;

    protected String[] qi = new String[] {};

    protected String[] sensitive = new String[] {};

    protected String[] header;

    protected Map<String, Hierarchy> hierarchyMap;

    protected Data data;

    /**
     * This constructor exists only for JUNIT tests
     * Don't invoke it, if you want to work productively with
     * this class
     */
    protected ARFF2ARX() {
    }

    /**
     * Default constructor of this converter.
     * @param instances The ARFF instance.
     */
    public ARFF2ARX(Instances instances) {
        this.instances = instances;

        header = new String[instances.numAttributes()];
        for (int i = 0; i < header.length; i++) {
            header[i] = instances.attribute(i).name();
        }
    }

    public ARFF2ARX setQi(String qi) {
        this.qi = splitToArray(qi);
        return this;
    }

    public ARFF2ARX setQi(String[] qi) {
        this.qi = qi;
        return this;
    }

    public ARFF2ARX setSensitive(String sensitive) {
        this.sensitive = splitToArray(sensitive);
        return this;
    }

    public ARFF2ARX setSensitive(String[] sensitive) {
        this.sensitive = sensitive;
        return this;
    }

    /**
     *
     * @param dir
     * @param relation
     * @return
     * @throws IOException
     */
    public ARFF2ARX init(File dir, String relation) throws IOException {
        if (this.qi == null) {
            throw new IllegalStateException("please set quasi identifying attributes first!");
        }

        if (relation == null) {
            relation = "";
        }
        relation = relation.trim();

        hierarchyMap = new HashMap<String, Hierarchy>();
        HierarchyBuilder hBuilder = new HierarchyBuilder(dir, relation);
        for (String q : qi) {
            hierarchyMap.put(q, hBuilder.getHierarchy(instances.attribute(q), instances));
        }

        List<String[]> rawData = new ArrayList<String[]>(instances.numInstances() + 1);
        rawData.add(header);

        for (int i = 0; i < instances.numInstances(); i++) {
            String[] row = convertRow(instances.instance(i));
            if (row != null) {
                rawData.add(row);
            }
        }

        data = Data.create(rawData);
        DataDefinition definition = data.getDefinition();
        for (String attr : header) {
            if (isQiAttribute(attr)) {
                definition.setAttributeType(attr, hierarchyMap.get(attr));
            } else if (isSensitive(attr)) {
                definition.setAttributeType(attr, AttributeType.SENSITIVE_ATTRIBUTE);
            } else {
                definition.setAttributeType(attr, AttributeType.INSENSITIVE_ATTRIBUTE);
            }
        }
        return this;
    }

    /**
     *
     * @param instance
     * @return
     */
    protected String[] convertRow(Instance instance) {
        String[] row = new String[instance.numAttributes()];
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (instance.attribute(i).type() == Attribute.NOMINAL
                    || instance.attribute(i).type() == Attribute.STRING) {
                row[i] = instance.stringValue(i);
            } else {
                row[i] = String.valueOf((int) instance.value(i));
            }
        }
        return row;
    }

    /**
     * Determines if a attribute is a sensitive attribute.
     * @param attr The attribute to check.
     * @return Returns true if the given attribute is a sensitive attribute.
     */
    public boolean isSensitive(String attr) {
        return contains(attr, sensitive);
    }

    /**
     * Determines if a attribute is a sensitive attribute.
     * @param attr The attribute to check.
     * @return Returns true if the given attribute is an qasi-identifier.
     */
    public boolean isQiAttribute(String attr) {
        return contains(attr, qi);
    }

    /**
     *
     * @return Returns the instance.
     */
    public Instances getInstances() {
        return instances;
    }

    /**
     *
     * @return Returns the header as string array.
     */
    public String[] getHeader() {
        return header;
    }

    /**
     *
     * @return Returns the quasi-identifier as string array.
     */
    public String[] getQi() {
        return qi;
    }

    public Map<String, Hierarchy> getHierarchyMap() {
        return hierarchyMap;
    }

    public Data getData() {
        return data;
    }

    /**
     *
     * @return Returns the sensitive attributes as string array.
     */
    public String[] getSensitive() {
        return sensitive;
    }

    /**
     *
     * @param s
     * @return
     */
    protected String[] splitToArray(String s) {
        if (s == null || s.trim().equals("")) {
            return new String[0];
        } else {
            String[] array = s.split(",");
            List<String> list = new LinkedList<String>();
            for (int i = 0; i < array.length; i++) {
                String value = array[i].replaceAll("^\\s+", "").replaceAll("\\s+$", "");
                if (!value.trim().equals("")) {
                    if (instances != null && instances.attribute(value) == null) {
                        throw new IllegalArgumentException("The attribute \"" + value + "\" does not exist!");
                    }
                    list.add(value);
                }
            }
            String[] result = new String[list.size()];
            ListIterator<String> iterator = list.listIterator();
            while (iterator.hasNext()) {
                result[iterator.nextIndex()] = iterator.next();
            }
            return result;
        }
    }

    @Override
    public String toString() {
        StringBuilder builder = new StringBuilder("ARFF2ARX{instances: ");
        if (instances == null) {
            builder.append("null, ");
        } else {
            builder.append("{rows: ").append(instances.numInstances());
            builder.append(", columns: ").append(instances.numAttributes()).append("}, ");
        }
        builder.append("qi: ");
        if (qi == null || qi.length == 0) {
            builder.append("null, ");
        } else {
            builder.append(Arrays.toString(qi)).append(", ");
        }
        builder.append("sensitive: ");
        if (sensitive == null || sensitive.length == 0) {
            builder.append("null}");
        } else {
            builder.append(Arrays.toString(sensitive)).append("}");
        }

        return builder.toString();
    }

    /**
     * This method determines if an string is element of an array.
     * @param s Element to locate in the array.
     * @param array Array with string entries.
     * @return Returns true is the given string is in the array.
     */
    protected boolean contains(String s, String[] array) {
        for (String value : array) {
            if (value.equals(s)) {
                return true;
            }
        }
        return false;
    }

}