eu.amidst.huginlink.examples.learning.ParallelPCExample.java Source code

Introduction

Here is the source code for eu.amidst.huginlink.examples.learning.ParallelPCExample.java
Source

/*
 *
 *
 *    Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.
 *    See the NOTICE file distributed with this work for additional information regarding copyright ownership.
 *    The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use
 *    this file except in compliance with the License.  You may obtain a copy of the License at
 *
 *            http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software distributed under the License is
 *    distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and limitations under the License.
 *
 *
 */

package eu.amidst.huginlink.examples.learning;

import com.google.common.base.Stopwatch;
import eu.amidst.core.datastream.DataInstance;
import eu.amidst.core.datastream.DataStream;
import eu.amidst.core.io.BayesianNetworkLoader;
import eu.amidst.core.learning.parametric.ParallelMaximumLikelihood;
import eu.amidst.core.models.BayesianNetwork;
import eu.amidst.core.utils.BayesianNetworkSampler;
import eu.amidst.huginlink.learning.ParallelPC;

import java.util.ArrayList;
import java.util.Arrays;

/**
 * This class provides a link to the <a href="https://www.hugin.com">Hugin</a>'s functionality to learn in parallel
 * the structure of a Bayesian network model from data using the PC algorithm.
 *
 * An important remark is that Hugin only allows to apply the PC algorithm over a data set completely loaded into RAM
 * memory. The case where our data set does not fit into memory, it solved in AMIDST in the following way. We learn
 * the structure using a smaller data set produced by <a href="https://en.wikipedia.org/wiki/Reservoir_sampling">Reservoir sampling</a>
 * and, then, we use AMIDST's {@link ParallelMaximumLikelihood} to learn the parameters of the BN model over the
 * whole data set.
 *
 * <p> For further details about the implementation of the parallel PC algorithm look at the following paper: </p>
 *
 * <i> Madsen, A. L., Jensen, F., Salmern, A., Langseth, H., Nielsen, T. D. (2015). Parallelization of the PC
 * Algorithm (2015). The XVI Conference of the Spanish Association for Artificial Intelligence (CAEPIA'15), pages 14-24 </i>
 */
public class ParallelPCExample {

    public static void main(String[] args) throws Exception {

        //We load a Bayesian network to generate a data stream
        //using BayesianNewtorkSampler class.
        int sampleSize = 100000;
        BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("networks/dataWeka/Pigs.bn");
        BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn);

        //We fix the number of samples in memory used for performing the structural learning.
        //They are randomly sub-sampled using Reservoir sampling.
        int samplesOnMemory = 5000;

        //We make different trials with different number of cores
        ArrayList<Integer> vNumCores = new ArrayList(Arrays.asList(1, 2, 3, 4));

        for (Integer numCores : vNumCores) {
            System.out
                    .println("Learning PC: " + samplesOnMemory + " samples on memory, " + numCores + " core/s ...");
            DataStream<DataInstance> data = sampler.sampleToDataStream(sampleSize);

            //The class ParallelTAN is created
            ParallelPC parallelPC = new ParallelPC();

            //We activate the parallel mode.
            parallelPC.setParallelMode(true);

            //We set the number of cores to be used for the structural learning
            parallelPC.setNumCores(numCores);

            //We set the number of samples to be used for the learning the structure
            parallelPC.setNumSamplesOnMemory(samplesOnMemory);

            Stopwatch watch = Stopwatch.createStarted();

            //We just invoke this mode to learn a BN model for the data stream
            BayesianNetwork model = parallelPC.learn(data);

            System.out.println(watch.stop());
        }
    }
}