at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.illecker.hama.hybrid.examples.onlinecf;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.bsp.BSPPeer;
import org.apache.hama.bsp.FileOutputFormat;
import org.apache.hama.bsp.SequenceFileInputFormat;
import org.apache.hama.bsp.SequenceFileOutputFormat;
import org.apache.hama.bsp.gpu.HybridBSP;
import org.apache.hama.bsp.sync.SyncException;
import org.apache.hama.commons.io.PipesVectorWritable;
import org.apache.hama.commons.io.VectorWritable;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleVector;
import org.apache.hama.ml.recommendation.Preference;
import org.trifort.rootbeer.runtime.Context;
import org.trifort.rootbeer.runtime.Rootbeer;
import org.trifort.rootbeer.runtime.StatsRow;
import org.trifort.rootbeer.runtime.ThreadConfig;
import org.trifort.rootbeer.runtime.util.Stopwatch;

public class OnlineCFTrainHybridBSP
        extends HybridBSP<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> {

    private static final Log LOG = LogFactory.getLog(OnlineCFTrainHybridBSP.class);

    private static final Path CONF_TMP_DIR = new Path(
            "output/hama/hybrid/examples/onlinecf/hybrid-" + System.currentTimeMillis());
    private static final Path CONF_INPUT_DIR = new Path(CONF_TMP_DIR, "input");
    private static final Path CONF_OUTPUT_DIR = new Path(CONF_TMP_DIR, "output");

    public static final String CONF_BLOCKSIZE = "onlinecf.hybrid.blockSize";
    public static final String CONF_GRIDSIZE = "onlinecf.hybrid.gridSize";
    public static final String CONF_DEBUG = "onlinecf.is.debugging";

    // gridSize = amount of blocks and multiprocessors
    public static final int GRID_SIZE = 14;
    // blockSize = amount of threads
    public static final int BLOCK_SIZE = 256; // 1024;

    public static final double ALPHA = 0.01;

    public long m_setupTimeCpu = 0;
    public long m_setupTimeGpu = 0;
    public long m_bspTimeCpu = 0;
    public long m_bspTimeGpu = 0;

    private Configuration m_conf;
    private boolean m_isDebuggingEnabled;
    private FSDataOutputStream m_logger;

    private int m_gridSize;
    private int m_blockSize;

    // OnlineCF members
    private int m_maxIterations = 0;
    private int m_matrixRank = 0;
    private int m_skipCount = 0;

    // Input Preferences
    private ArrayList<Preference<Long, Long>> m_preferences = new ArrayList<Preference<Long, Long>>();
    private ArrayList<Integer> m_indexes = new ArrayList<Integer>();

    // Randomly generated depending on matrix rank,
    // will be computed runtime and represents trained model
    // userId, factorized value
    private HashMap<Long, PipesVectorWritable> m_usersMatrix = new HashMap<Long, PipesVectorWritable>();
    // itemId, factorized value
    private HashMap<Long, PipesVectorWritable> m_itemsMatrix = new HashMap<Long, PipesVectorWritable>();

    private Random m_rand = new Random(32L);

    /********************************* CPU *********************************/
    // **********************************************************************
    // setup
    // **********************************************************************
    @Override
    public void setup(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
            throws IOException {

        long startTime = System.currentTimeMillis();

        this.m_conf = peer.getConfiguration();
        this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);

        this.m_maxIterations = m_conf.getInt(OnlineCF.CONF_ITERATION_COUNT, OnlineCF.DFLT_ITERATION_COUNT);

        this.m_matrixRank = m_conf.getInt(OnlineCF.CONF_MATRIX_RANK, OnlineCF.DFLT_MATRIX_RANK);

        this.m_skipCount = m_conf.getInt(OnlineCF.CONF_SKIP_COUNT, OnlineCF.DFLT_SKIP_COUNT);

        // Init logging
        if (m_isDebuggingEnabled) {
            try {
                FileSystem fs = FileSystem.get(m_conf);
                m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                        + "/BSP_" + peer.getTaskId() + ".log"));

            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        this.m_setupTimeCpu = System.currentTimeMillis() - startTime;
    }

    // **********************************************************************
    // bsp
    // **********************************************************************
    @Override
    public void bsp(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
            throws IOException, SyncException, InterruptedException {

        long startTime = System.currentTimeMillis();

        // Fetch inputs
        collectInput(peer);

        // Sync tasks after input has been collected
        peer.sync();

        // DEBUG
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("collected: " + this.m_usersMatrix.size() + " users, " + this.m_itemsMatrix.size()
                    + " items, " + this.m_preferences.size() + " preferences\n");

            m_logger.writeChars("preferences: length: " + this.m_preferences.size() + "\n");
            for (Preference<Long, Long> p : this.m_preferences) {
                m_logger.writeChars("userId: '" + p.getUserId() + "' itemId: '" + p.getItemId() + "' value: '"
                        + p.getValue().get() + "'\n");
            }
            m_logger.writeChars("indexes: length: " + this.m_indexes.size() + " indexes: "
                    + Arrays.toString(this.m_indexes.toArray()) + "\n");

            m_logger.writeChars("usersMatrix: length: " + this.m_usersMatrix.size() + "\n");
            for (Map.Entry<Long, PipesVectorWritable> e : this.m_usersMatrix.entrySet()) {
                m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n");
            }
            m_logger.writeChars("itemsMatrix: length: " + this.m_itemsMatrix.size() + "\n");
            for (Map.Entry<Long, PipesVectorWritable> e : this.m_itemsMatrix.entrySet()) {
                m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n");
            }
        }

        // calculation steps
        for (int i = 0; i < m_maxIterations; i++) {

            computeAllValues();

            if ((i + 1) % m_skipCount == 0) {
                normalizeWithBroadcastingValues(peer);
            }
        }

        // save users
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("saving " + m_usersMatrix.size() + " users\n");
        }
        for (Map.Entry<Long, PipesVectorWritable> user : m_usersMatrix.entrySet()) {
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("user: " + user.getKey() + " vector: " + user.getValue().getVector() + "\n");
            }
            peer.write(new Text("u" + user.getKey()), user.getValue());
        }
        // save items
        // TODO duplicated item saves, but one item may belong to one task only
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("saving " + m_itemsMatrix.size() + " items\n");
        }
        for (Map.Entry<Long, PipesVectorWritable> item : m_itemsMatrix.entrySet()) {
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("item: " + item.getKey() + " vector: " + item.getValue().getVector() + "\n");
            }
            peer.write(new Text("i" + item.getKey()), item.getValue());
        }

        this.m_bspTimeCpu = System.currentTimeMillis() - startTime;

        // Logging
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms\n");
            m_logger.writeChars(
                    "OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds\n");
            m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms\n");
            m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds\n");
            m_logger.close();
        }
        LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms");
        LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds");
        LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms");
        LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds");

    }

    // **********************************************************************
    // collectInput
    // **********************************************************************
    private void collectInput(
            BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
            throws IOException {

        LongWritable key = new LongWritable();
        PipesVectorWritable value = new PipesVectorWritable();
        int counter = 0;

        while (peer.readNext(key, value)) {
            long actualId = key.get();

            // parse as <k:userId, v:(itemId, score)>
            long itemId = (long) value.getVector().get(0);
            double score = value.getVector().get(1);

            if (m_usersMatrix.containsKey(actualId) == false) {
                DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
                for (int i = 0; i < m_matrixRank; i++) {
                    vals.set(i, m_rand.nextDouble());
                }
                m_usersMatrix.put(actualId, new PipesVectorWritable(vals));
            }

            if (m_itemsMatrix.containsKey(itemId) == false) {
                DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
                for (int i = 0; i < m_matrixRank; i++) {
                    vals.set(i, m_rand.nextDouble());
                }
                m_itemsMatrix.put(itemId, new PipesVectorWritable(vals));
            }
            m_preferences.add(new Preference<Long, Long>(actualId, itemId, score));
            m_indexes.add(counter);
            counter++;
        }
    }

    // **********************************************************************
    // computeAllValues
    // **********************************************************************
    private void computeAllValues() throws IOException {
        // shuffling indexes
        int idx = 0;
        int idxValue = 0;
        int tmp = 0;
        for (int i = m_indexes.size(); i > 0; i--) {
            idx = Math.abs(m_rand.nextInt()) % i;
            idxValue = m_indexes.get(idx);
            tmp = m_indexes.get(i - 1);
            m_indexes.set(i - 1, idxValue);
            m_indexes.set(idx, tmp);
        }

        // compute values
        for (Integer prefIdx : m_indexes) {
            Preference<Long, Long> pref = m_preferences.get(prefIdx);
            DoubleVector alpha = m_usersMatrix.get(pref.getUserId()).getVector();
            DoubleVector beta = m_itemsMatrix.get(pref.getItemId()).getVector();

            // calculated score
            double calculatedScore = alpha.multiply(beta).sum();
            double expectedScore = pref.getValue().get();
            double loss = expectedScore - calculatedScore;
            // DEBUG
            // m_logger.writeChars("expectedScore: " + expectedScore
            // + " calculatedScore: " + calculatedScore + " loss: " + loss + "\n");

            // update A
            DoubleVector newAlpha = alpha.add(beta.multiply(2 * ALPHA * loss));
            // DEBUG
            // m_logger.writeChars("UPDATE alpa: " + newBeta.toString() + "\n");
            m_usersMatrix.put(pref.getUserId(), new PipesVectorWritable(new VectorWritable(newAlpha)));

            // update B
            DoubleVector newBeta = beta.add(alpha.multiply(2 * ALPHA * loss));
            // DEBUG
            // m_logger.writeChars("UPDATE beta: " + newBeta.toString() + "\n");
            m_itemsMatrix.put(pref.getItemId(), new PipesVectorWritable(new VectorWritable(newBeta)));
        }
    }

    // **********************************************************************
    // normalize and broadcast values
    // **********************************************************************
    private void normalizeWithBroadcastingValues(
            BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
            throws IOException, SyncException, InterruptedException {

        int peerCount = peer.getNumPeers();
        int peerId = peer.getPeerIndex();
        String[] allPeerNames = peer.getAllPeerNames();

        if (peerCount > 1) {
            // DEBUG
            // m_logger.writeChars("normalizeWithBroadcastingValues peerCount: "
            // + peerCount + " peerId: " + peerId + "\n");

            HashMap<Long, LinkedList<Integer>> senderList = new HashMap<Long, LinkedList<Integer>>();
            HashMap<Long, DoubleVector> normalizedValues = new HashMap<Long, DoubleVector>();
            HashMap<Long, Integer> normalizedValueCount = new HashMap<Long, Integer>();

            // Step 1)
            // send item matrices to selected peers
            for (Map.Entry<Long, PipesVectorWritable> item : m_itemsMatrix.entrySet()) {

                int toPeerId = item.getKey().hashCode() % peerCount;
                // don't send item to itself
                if (toPeerId != peerId) {
                    // m_logger.writeChars("sendItem itemId: " + item.getKey()
                    // + " toPeerId: " + toPeerId + " value: "
                    // + item.getValue().getVector() + "\n");

                    peer.send(allPeerNames[toPeerId],
                            new ItemMessage(peerId, item.getKey().longValue(), item.getValue().getVector()));

                } else {
                    normalizedValues.put(item.getKey(), item.getValue().getVector());
                    normalizedValueCount.put(item.getKey(), 1);
                    senderList.put(item.getKey(), new LinkedList<Integer>());
                }
            }
            peer.sync();

            // Step 2)
            // receive item matrices if this peer is selected
            ItemMessage msg;
            while ((msg = peer.getCurrentMessage()) != null) {
                int senderId = msg.getSenderId();
                long itemId = msg.getItemId();
                DoubleVector vector = msg.getVector();

                // m_logger.writeChars("receiveItem itemId: " + itemId + " fromPeerId: "
                // + senderId + " value: " + vector + "\n");

                if (normalizedValues.get(itemId) != null) {
                    normalizedValues.put(itemId, normalizedValues.get(itemId).add(vector));
                    normalizedValueCount.put(itemId, normalizedValueCount.get(itemId) + 1);
                    senderList.get(itemId).add(senderId);
                }
            }

            // Step 3)
            // normalize
            for (Map.Entry<Long, DoubleVector> e : normalizedValues.entrySet()) {
                double count = normalizedValueCount.get(e.getKey());
                e.setValue(e.getValue().multiply(1.0 / count));
                // m_logger.writeChars("normalize itemId: " + e.getKey() + " NewValue: "
                // + e.getValue() + "\n");
            }

            // Step 4)
            // send back normalized values to senders
            for (Map.Entry<Long, DoubleVector> e : normalizedValues.entrySet()) {
                msg = new ItemMessage(peerId, e.getKey(), e.getValue());

                // send to interested peers
                Iterator<Integer> iter = senderList.get(e.getKey()).iterator();
                while (iter.hasNext()) {
                    int toPeerId = iter.next();
                    // m_logger.writeChars("sendNormalizedBack itemId: " + e.getKey()
                    // + " toPeerId: " + toPeerId + " value: " + e.getValue() + "\n");
                    peer.send(allPeerNames[toPeerId], msg);
                }

                // update items matrix
                m_itemsMatrix.put(e.getKey(), new PipesVectorWritable(e.getValue()));
                // m_logger.writeChars("updateItems itemId: " + e.getKey() + " value: "
                // + e.getValue() + "\n");
            }
            peer.sync();

            // Step 5)
            // receive already normalized and update data
            while ((msg = peer.getCurrentMessage()) != null) {
                // m_logger.writeChars("updateItems itemId: " + msg.getItemId()
                // + " fromPeerId: " + msg.getSenderId() + " value: "
                // + msg.getVector() + "\n");
                m_itemsMatrix.put(msg.getItemId(), new PipesVectorWritable(msg.getVector()));
            }

        }
    }

    /********************************* GPU *********************************/
    // **********************************************************************
    // setupGpu
    // **********************************************************************
    @Override
    public void setupGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer)
            throws IOException, SyncException, InterruptedException {

        long startTime = System.currentTimeMillis();

        this.m_conf = peer.getConfiguration();
        this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);

        this.m_maxIterations = m_conf.getInt(OnlineCF.CONF_ITERATION_COUNT, OnlineCF.DFLT_ITERATION_COUNT);

        this.m_matrixRank = m_conf.getInt(OnlineCF.CONF_MATRIX_RANK, OnlineCF.DFLT_MATRIX_RANK);

        this.m_skipCount = m_conf.getInt(OnlineCF.CONF_SKIP_COUNT, OnlineCF.DFLT_SKIP_COUNT);

        this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE));
        this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE));

        // Init logging
        if (m_isDebuggingEnabled) {
            try {
                FileSystem fs = FileSystem.get(m_conf);
                m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                        + "/BSP_" + peer.getTaskId() + ".log"));

            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        this.m_setupTimeGpu = System.currentTimeMillis() - startTime;
    }

    // **********************************************************************
    // bspGpu
    // **********************************************************************
    @Override
    public void bspGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer,
            Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

        long startTime = System.currentTimeMillis();

        // **********************************************************************
        // Collect inputs
        // **********************************************************************
        Map<Long, HashMap<Long, Double>> preferencesMap = new HashMap<Long, HashMap<Long, Double>>();
        Map<Long, Long> userRatingCount = new HashMap<Long, Long>();
        Map<Long, Long> itemRatingCount = new HashMap<Long, Long>();

        LongWritable key = new LongWritable();
        PipesVectorWritable value = new PipesVectorWritable();
        int counter = 0;

        while (peer.readNext(key, value)) {
            // parse as <k:userId, v:(itemId, score)>
            long userId = key.get();
            long itemId = (long) value.getVector().get(0);
            double score = value.getVector().get(1);

            // Add User vector
            if (m_usersMatrix.containsKey(userId) == false) {
                DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
                for (int i = 0; i < m_matrixRank; i++) {
                    vals.set(i, m_rand.nextDouble());
                }
                m_usersMatrix.put(userId, new PipesVectorWritable(vals));
                userRatingCount.put(userId, 1l);
            } else {
                userRatingCount.put(userId, userRatingCount.get(userId) + 1);
            }

            // Add Item vector
            if (m_itemsMatrix.containsKey(itemId) == false) {
                DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank);
                for (int i = 0; i < m_matrixRank; i++) {
                    vals.set(i, m_rand.nextDouble());
                }
                m_itemsMatrix.put(itemId, new PipesVectorWritable(vals));
                itemRatingCount.put(itemId, 1l);
            } else {
                itemRatingCount.put(itemId, itemRatingCount.get(itemId) + 1);
            }

            // Add preference
            m_preferences.add(new Preference<Long, Long>(userId, itemId, score));

            if (preferencesMap.containsKey(userId) == false) {
                HashMap<Long, Double> map = new HashMap<Long, Double>();
                map.put(itemId, score);
                preferencesMap.put(userId, map);
            } else {
                preferencesMap.get(userId).put(itemId, score);
            }

            // Add counter
            m_indexes.add(counter);
            counter++;
        }

        // DEBUG
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("collected: " + m_usersMatrix.size() + " users, " + m_itemsMatrix.size()
                    + " items, " + m_preferences.size() + " preferences\n");
        }

        // **********************************************************************
        // Prepare input for GPU
        // **********************************************************************
        Map<Long, Long> sortedUserRatingCount = sortByValues(userRatingCount);
        Map<Long, Long> sortedItemRatingCount = sortByValues(itemRatingCount);

        // Convert preferences to userItemMatrix double[][]
        // sortedUserRatingCount.size() x sortedItemRatingCount.size()
        double[][] userItemMatrix = new double[m_usersMatrix.size()][m_itemsMatrix.size()];

        // Mappers
        Map<Long, Integer> userItemMatrixUserRowMap = new HashMap<Long, Integer>();
        GpuIntegerMap userItemMatrixItemColMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0
        GpuIntegerMap userItemMatrixColItemMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0

        // Create userHelper to int[][]
        // userHelper[userId][0] = userRatingCount
        // userHelper[userId][1] = colId of userItemMatrix
        int[][] userHelper = null;
        // Create itemHelper to int[][]
        // itemHelper[itemId][0] = itemRatingCount
        // itemHelper[userId][1] = rowId of userItemMatrix
        int[][] itemHelper = null;
        Map<Long, Integer> itemHelperId = new HashMap<Long, Integer>();

        // Debug
        if (m_isDebuggingEnabled) {
            m_logger.writeChars(
                    "userItemMatrix: (m x n): " + m_usersMatrix.size() + " x " + m_itemsMatrix.size() + "\n");
        }

        int rowId = 0;
        for (Long userId : sortedUserRatingCount.keySet()) {

            // Map userId to rowId in userItemMatrixUserRowMap
            userItemMatrixUserRowMap.put(userId, rowId);

            // Setup userHelper
            if (userHelper == null) {
                // TODO sortedUserRatingCount.size()
                userHelper = new int[m_usersMatrix.size()][sortedUserRatingCount.get(userId).intValue() + 1];
            }
            userHelper[rowId][0] = sortedUserRatingCount.get(userId).intValue();

            int colId = 0;
            int userHelperId = 1;
            for (Long itemId : sortedItemRatingCount.keySet()) {

                // Map itemId to colId in userItemMatrixItemColMap
                if (rowId == 0) {
                    userItemMatrixItemColMap.put(itemId.intValue(), colId);
                    userItemMatrixColItemMap.put(colId, itemId.intValue());
                }

                // Setup itemHelper
                if (itemHelper == null) {
                    // TODO sortedItemRatingCount.size()
                    itemHelper = new int[m_itemsMatrix.size()][sortedItemRatingCount.get(itemId).intValue() + 1];
                }
                itemHelper[colId][0] = sortedItemRatingCount.get(itemId).intValue();

                if (preferencesMap.get(userId).containsKey(itemId)) {
                    // Add userItemMatrix
                    userItemMatrix[rowId][colId] = preferencesMap.get(userId).get(itemId);

                    // Add userHelper
                    userHelper[rowId][userHelperId] = colId;
                    userHelperId++;

                    // Add itemHelper
                    if (itemHelperId.containsKey(itemId)) {
                        int idx = itemHelperId.get(itemId);
                        itemHelper[colId][idx] = rowId;
                        itemHelperId.put(itemId, idx + 1);
                    } else {
                        itemHelper[colId][1] = rowId;
                        itemHelperId.put(itemId, 2);
                    }

                }

                colId++;
            }

            // Debug userItemMatrix
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("userItemMatrix userId: " + userId + " row[" + rowId + "]: "
                        + Arrays.toString(userItemMatrix[rowId]) + " userRatings: "
                        + sortedUserRatingCount.get(userId) + "\n");
            }
            rowId++;
        }

        // Debug userHelper and itemHelper
        if (m_isDebuggingEnabled) {
            // TODO sortedUserRatingCount.size()
            for (int i = 0; i < m_usersMatrix.size(); i++) {
                m_logger.writeChars("userHelper row " + i + ": " + Arrays.toString(userHelper[i]) + "\n");
            }
            // TODO sortedItemRatingCount.size()
            for (int i = 0; i < m_itemsMatrix.size(); i++) {
                m_logger.writeChars("itemHelper row " + i + ": " + Arrays.toString(itemHelper[i]) + "\n");
            }
        }

        // Convert usersMatrix to double[][]
        double[][] userMatrix = new double[m_usersMatrix.size()][m_matrixRank];
        rowId = 0;
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("userMatrix: length: " + m_usersMatrix.size() + "\n");
        }
        for (Long userId : sortedUserRatingCount.keySet()) {
            DoubleVector vector = m_usersMatrix.get(userId).getVector();
            for (int i = 0; i < m_matrixRank; i++) {
                userMatrix[rowId][i] = vector.get(i);
            }
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("userId: " + userId + " " + Arrays.toString(vector.toArray()) + "\n");
            }
            rowId++;
        }

        // Convert itemsMatrix to double[][]
        double[][] itemMatrix = new double[m_itemsMatrix.size()][m_matrixRank];
        rowId = 0;
        GpuIntegerMap counterMap = new GpuIntegerMap(m_itemsMatrix.size());
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("itemMatrix: length: " + m_itemsMatrix.size() + "\n");
        }
        for (Long itemId : sortedItemRatingCount.keySet()) {
            counterMap.put(itemId.intValue(), 0);

            DoubleVector vector = m_itemsMatrix.get(itemId).getVector();
            for (int i = 0; i < m_matrixRank; i++) {
                itemMatrix[rowId][i] = vector.get(i);
            }
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("itemId: " + itemId + " " + Arrays.toString(vector.toArray()) + "\n");
            }
            rowId++;
        }

        // Sync tasks after input has been collected
        peer.sync();

        // **********************************************************************
        // Run GPU Kernels
        // **********************************************************************
        OnlineCFTrainHybridKernel kernel = new OnlineCFTrainHybridKernel(userItemMatrix, userHelper, itemHelper,
                userItemMatrixItemColMap, userItemMatrixColItemMap, userMatrix, itemMatrix, m_usersMatrix.size(),
                m_itemsMatrix.size(), ALPHA, m_matrixRank, m_maxIterations, counterMap, m_skipCount,
                peer.getNumPeers(), peer.getPeerIndex(), peer.getAllPeerNames());

        Context context = rootbeer.createDefaultContext();
        Stopwatch watch = new Stopwatch();
        watch.start();
        rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context);
        watch.stop();

        // **********************************************************************
        // Save Model
        // **********************************************************************
        // save users
        for (Entry<Long, Integer> userMap : userItemMatrixUserRowMap.entrySet()) {
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("user: " + userMap.getKey() + " vector: "
                        + Arrays.toString(kernel.m_usersMatrix[userMap.getValue()]) + "\n");
            }
            peer.write(new Text("u" + userMap.getKey()),
                    new PipesVectorWritable(new DenseDoubleVector(kernel.m_usersMatrix[userMap.getValue()])));
        }
        // TODO duplicated item saves, but one item may belong to one task only
        // save items
        for (GpuIntIntPair itemMap : userItemMatrixItemColMap.getList()) {
            if (itemMap != null) {
                if (m_isDebuggingEnabled) {
                    m_logger.writeChars("item: " + itemMap.getKey() + " vector: "
                            + Arrays.toString(kernel.m_itemsMatrix[itemMap.getValue()]) + "\n");
                }
                peer.write(new Text("i" + itemMap.getKey()),
                        new PipesVectorWritable(new DenseDoubleVector(kernel.m_itemsMatrix[itemMap.getValue()])));
            }
        }

        this.m_bspTimeGpu = System.currentTimeMillis() - startTime;

        // **********************************************************************
        // Logging
        // **********************************************************************
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("OnlineCFTrainHybridBSP.bspGpu executed on GPU!\n");
            m_logger.writeChars(
                    "OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n");
            m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms\n");
            m_logger.writeChars(
                    "OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds\n");
            m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms\n");
            m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds\n");

            List<StatsRow> stats = context.getStats();
            for (StatsRow row : stats) {
                m_logger.writeChars("  StatsRow:\n");
                m_logger.writeChars("    serial time: " + row.getSerializationTime() + "\n");
                m_logger.writeChars("    exec time: " + row.getExecutionTime() + "\n");
                m_logger.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
                m_logger.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
                m_logger.writeChars("    num threads: " + row.getNumThreads() + "\n");
                m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n");
            }

            m_logger.close();
        }

        // Logging
        List<StatsRow> stats = context.getStats();
        for (StatsRow row : stats) {
            LOG.info("  StatsRow:");
            LOG.info("    serial time: " + row.getSerializationTime());
            LOG.info("    exec time: " + row.getExecutionTime());
            LOG.info("    deserial time: " + row.getDeserializationTime());
            LOG.info("    num blocks: " + row.getNumBlocks());
            LOG.info("    num threads: " + row.getNumThreads());
            LOG.info("GPUTime: " + watch.elapsedTimeMillis() + " ms");
        }
        LOG.info("OnlineCFTrainHybridBSP.bspGpu executed on GPU!");
        LOG.info("OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize);
        LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms");
        LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds");
        LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms");
        LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds");

    }

    // **********************************************************************
    // sortByValues(Map)
    // **********************************************************************
    public static <K extends Comparable, V extends Comparable> Map<K, V> sortByValues(Map<K, V> map) {

        List<Map.Entry<K, V>> entries = new LinkedList<Map.Entry<K, V>>(map.entrySet());

        Collections.sort(entries, new Comparator<Map.Entry<K, V>>() {
            @Override
            public int compare(Entry<K, V> o1, Entry<K, V> o2) {
                return o2.getValue().compareTo(o1.getValue());
            }
        });

        // LinkedHashMap will keep the keys in the order they are inserted
        // which is currently sorted on natural ordering
        Map<K, V> sortedMap = new LinkedHashMap<K, V>();
        for (Map.Entry<K, V> entry : entries) {
            sortedMap.put(entry.getKey(), entry.getValue());
        }

        return sortedMap;
    }

    // **********************************************************************
    // createJobConfiguration
    // **********************************************************************
    public static BSPJob createOnlineCFTrainHybridBSPConf(Path inPath, Path outPath) throws IOException {
        return createOnlineCFTrainHybridBSPConf(new HamaConfiguration(), inPath, outPath);
    }

    public static BSPJob createOnlineCFTrainHybridBSPConf(Configuration conf, Path inPath, Path outPath)
            throws IOException {

        if (conf.getInt(OnlineCF.CONF_MATRIX_RANK, -1) == -1) {
            conf.setInt(OnlineCF.CONF_MATRIX_RANK, OnlineCF.DFLT_MATRIX_RANK);
        }

        if (conf.getInt(OnlineCF.CONF_ITERATION_COUNT, -1) == -1) {
            conf.setInt(OnlineCF.CONF_ITERATION_COUNT, OnlineCF.DFLT_ITERATION_COUNT);
        }

        if (conf.getInt(OnlineCF.CONF_SKIP_COUNT, -1) == -1) {
            conf.setInt(OnlineCF.CONF_SKIP_COUNT, OnlineCF.DFLT_SKIP_COUNT);
        }

        BSPJob job = new BSPJob(new HamaConfiguration(conf), OnlineCFTrainHybridBSP.class);
        // Set the job name
        job.setJobName("Online Collaboration Filtering");
        // set the BSP class which shall be executed
        job.setBspClass(OnlineCFTrainHybridBSP.class);
        // help Hama to locale the jar to be distributed
        job.setJarByClass(OnlineCFTrainHybridBSP.class);

        job.setInputPath(inPath);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setInputKeyClass(IntWritable.class);
        job.setInputValueClass(PipesVectorWritable.class);

        job.setOutputPath(outPath);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(PipesVectorWritable.class);

        job.setMessageClass(ItemMessage.class);

        // Enable Partitioning
        // job.setBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, true);
        // job.setPartitioner(HashPartitioner.class);

        job.set("bsp.child.java.opts", "-Xmx8G");

        return job;
    }

    // **********************************************************************
    // Main
    // **********************************************************************
    public static void main(String[] args) throws Exception {
        // Defaults
        int numBspTask = 1; // CPU + GPU tasks
        int numGpuBspTask = 1; // GPU tasks
        int blockSize = BLOCK_SIZE;
        int gridSize = GRID_SIZE;

        int maxIteration = 3; // 150;
        int matrixRank = 3;
        int skipCount = 1;

        double alpha = ALPHA;
        int userCount = 0;
        int itemCount = 0;
        int percentNonZeroValues = 0;

        int GPUPercentage = 20;

        boolean useTestExampleInput = true;
        boolean isDebugging = true;
        String inputFile = "";
        String separator = "\\t";

        Configuration conf = new HamaConfiguration();
        FileSystem fs = FileSystem.get(conf);

        // Set numBspTask to maxTasks
        // BSPJobClient jobClient = new BSPJobClient(conf);
        // ClusterStatus cluster = jobClient.getClusterStatus(true);
        // numBspTask = cluster.getMaxTasks();

        if (args.length > 0) {
            if (args.length >= 14) {
                numBspTask = Integer.parseInt(args[0]);
                numGpuBspTask = Integer.parseInt(args[1]);
                blockSize = Integer.parseInt(args[2]);
                gridSize = Integer.parseInt(args[3]);

                maxIteration = Integer.parseInt(args[4]);
                matrixRank = Integer.parseInt(args[5]);
                skipCount = Integer.parseInt(args[6]);

                alpha = Double.parseDouble(args[7]);
                userCount = Integer.parseInt(args[8]);
                itemCount = Integer.parseInt(args[9]);
                percentNonZeroValues = Integer.parseInt(args[10]);

                GPUPercentage = Integer.parseInt(args[11]);

                useTestExampleInput = Boolean.parseBoolean(args[12]);
                isDebugging = Boolean.parseBoolean(args[13]);

                // optional parameters
                if (args.length > 14) {
                    inputFile = args[14];
                }
                if (args.length > 15) {
                    separator = args[15];
                }

            } else {
                System.out.println("Wrong argument size!");
                System.out.println("    Argument1=numBspTask");
                System.out.println("    Argument2=numGpuBspTask");
                System.out.println("    Argument3=blockSize");
                System.out.println("    Argument4=gridSize");
                System.out.println(
                        "    Argument5=maxIterations | Number of maximal iterations (" + maxIteration + ")");
                System.out.println("    Argument6=matrixRank | matrixRank (" + matrixRank + ")");
                System.out.println("    Argument7=skipCount | skipCount (" + skipCount + ")");
                System.out.println("    Argument8=alpha | alpha (" + alpha + ")");
                System.out.println("    Argument9=userCount | userCount (" + userCount + ")");
                System.out.println("    Argument10=itemCount | itemCount (" + itemCount + ")");
                System.out.println("    Argument11=percentNonZeroValues | percentNonZeroValues ("
                        + percentNonZeroValues + ")");
                System.out.println("    Argument12=GPUPercentage (percentage of input)");
                System.out.println("    Argument13=testExample | Use testExample input (true|false=default)");
                System.out.println("    Argument14=debug | Enable debugging (true|false=default)");
                System.out.println("    Argument15=inputFile (optional) | MovieLens inputFile");
                System.out.println("    Argument16=separator (optional) | default '" + separator + "' ");
                return;
            }
        }

        // Check if inputFile exists
        if ((!inputFile.isEmpty()) && (!new File(inputFile).exists())) {
            System.out.println("Error: inputFile: " + inputFile + " does not exist!");
            return;
        }

        // Check parameters
        if ((inputFile.isEmpty()) && (!useTestExampleInput) && (userCount <= 0) && (itemCount <= 0)
                && (percentNonZeroValues <= 0)) {
            System.out.println("Invalid parameter: userCount: " + userCount + " itemCount: " + itemCount
                    + " percentNonZeroValues: " + percentNonZeroValues);
            return;
        }

        // Check if blockSize < matrixRank when using GPU
        if ((numGpuBspTask > 0) && (blockSize < matrixRank)) {
            System.out.println("Error: BlockSize < matrixRank");
            return;
        }

        // Check GPUPercentage
        if ((GPUPercentage < 0) && (GPUPercentage > 100)) {
            System.out.println("Error: GPUPercentage must be between 0 and 100 percent");
            return;
        }

        // Set config variables
        conf.setBoolean(CONF_DEBUG, isDebugging);
        conf.setBoolean("hama.pipes.logging", isDebugging);
        // Set CPU tasks
        conf.setInt("bsp.peers.num", numBspTask);
        // Set GPU tasks
        conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
        // Set GPU blockSize and gridSize
        conf.set(CONF_BLOCKSIZE, "" + blockSize);
        conf.set(CONF_GRIDSIZE, "" + gridSize);

        conf.setInt(OnlineCF.CONF_ITERATION_COUNT, maxIteration);
        conf.setInt(OnlineCF.CONF_MATRIX_RANK, matrixRank);
        conf.setInt(OnlineCF.CONF_SKIP_COUNT, skipCount);

        // Debug output
        LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
        LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
        LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
        LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
        LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
        LOG.info("GPUPercentage: " + GPUPercentage);

        LOG.info("isDebugging: " + isDebugging);
        LOG.info("useTestExampleInput: " + useTestExampleInput);
        LOG.info("inputPath: " + CONF_INPUT_DIR);
        LOG.info("outputPath: " + CONF_OUTPUT_DIR);

        LOG.info("maxIteration: " + maxIteration);
        LOG.info("matrixRank: " + matrixRank);
        LOG.info("skipCount: " + skipCount);

        LOG.info("alpha: " + alpha);
        LOG.info("userCount: " + userCount);
        LOG.info("itemCount: " + itemCount);
        LOG.info("percentNonZeroValues: " + percentNonZeroValues);

        if (!inputFile.isEmpty()) {
            LOG.info("inputFile: " + inputFile);
            LOG.info("separator: " + separator);
        }

        // prepare Input
        int maxTestPrefs = 10;
        Path preferencesIn = new Path(CONF_INPUT_DIR, "preferences_in.seq");
        List<Preference<Long, Long>> testPrefs = null;
        if (useTestExampleInput) {

            testPrefs = prepareTestInputData(conf, fs, CONF_INPUT_DIR, preferencesIn);

        } else if (inputFile.isEmpty()) {

            testPrefs = generateRandomInputData(conf, fs, CONF_INPUT_DIR, numBspTask, numGpuBspTask, userCount,
                    itemCount, percentNonZeroValues, GPUPercentage, maxTestPrefs);

        } else if (!inputFile.isEmpty()) {
            // parse inputFile and return first entries for testing
            testPrefs = convertInputData(conf, fs, CONF_INPUT_DIR, preferencesIn, inputFile, separator,
                    maxTestPrefs);
        }

        // Generate Job config
        BSPJob job = createOnlineCFTrainHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

        // Execute Job
        long startTime = System.currentTimeMillis();
        if (job.waitForCompletion(true)) {

            LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

            // Load Job results for testing
            OnlineCF recommender = new OnlineCF();
            recommender.load(CONF_OUTPUT_DIR.toString(), false);

            // Test results
            int error = 0;
            double totalError = 0;
            for (Preference<Long, Long> test : testPrefs) {
                double expected = test.getValue().get();
                double estimated = recommender.estimatePreference(test.getUserId(), test.getItemId());

                if (testPrefs.size() <= 20) {
                    LOG.info("(" + test.getUserId() + ", " + test.getItemId() + ", " + expected + "): " + estimated
                            + " error: " + Math.abs(expected - estimated));
                }
                totalError += Math.abs(expected - estimated);
                error += (Math.abs(expected - estimated) < 0.5) ? 1 : 0;
            }

            LOG.info("totalError: " + totalError);
            LOG.info("assertEquals(expected: " + (testPrefs.size() * 0.75) + " == " + error
                    + " actual) with delta: 1");

            if (isDebugging) {
                printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
            }
        }

    }

    // **********************************************************************
    // prepareTestInputData
    // **********************************************************************
    public static List<Preference<Long, Long>> prepareTestInputData(Configuration conf, FileSystem fs, Path in,
            Path preferencesIn) throws IOException {

        Preference[] train_prefs = { new Preference<Integer, Integer>(1, 0, 4),
                new Preference<Integer, Integer>(1, 1, 2.5), new Preference<Integer, Integer>(1, 2, 3.5),

                new Preference<Integer, Integer>(2, 0, 4), new Preference<Integer, Integer>(2, 1, 2.5),
                new Preference<Integer, Integer>(2, 2, 3.5), new Preference<Integer, Integer>(2, 3, 1),
                new Preference<Integer, Integer>(2, 4, 3.5),

                new Preference<Integer, Integer>(3, 0, 4), new Preference<Integer, Integer>(3, 1, 2.5),
                new Preference<Integer, Integer>(3, 2, 3.5), new Preference<Integer, Integer>(3, 3, 1),
                new Preference<Integer, Integer>(3, 4, 3.5) };

        List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>();
        test_prefs.add(new Preference<Long, Long>(1l, 0l, 4));
        test_prefs.add(new Preference<Long, Long>(1l, 1l, 2.5));
        test_prefs.add(new Preference<Long, Long>(1l, 2l, 3.5));
        test_prefs.add(new Preference<Long, Long>(1l, 3l, 1));
        test_prefs.add(new Preference<Long, Long>(1l, 4l, 3.5));

        // Delete input files if already exist
        if (fs.exists(in)) {
            fs.delete(in, true);
        }
        if (fs.exists(preferencesIn)) {
            fs.delete(preferencesIn, true);
        }

        final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
                LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

        for (Preference<Integer, Integer> taste : train_prefs) {
            double values[] = new double[2];
            values[0] = taste.getItemId();
            values[1] = taste.getValue().get();
            prefWriter.append(new LongWritable(taste.getUserId()),
                    new PipesVectorWritable(new DenseDoubleVector(values)));
        }
        prefWriter.close();

        return test_prefs;
    }

    // **********************************************************************
    // generateRandomInputData and return test data
    // **********************************************************************
    public static List<Preference<Long, Long>> generateRandomInputData(Configuration conf, FileSystem fs, Path in,
            int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues,
            int GPUPercentage, int maxTestPrefs) throws IOException {

        // Delete input directory if already exist
        if (fs.exists(in)) {
            fs.delete(in, true);
        }

        Random rand = new Random(32L);
        Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>();
        List<Preference<Long, Long>> testItems = new ArrayList<Preference<Long, Long>>();

        int possibleUserItemRatings = userCount * itemCount;
        int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100;
        System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: "
                + userItemRatings);

        // Compute work distributions
        int cpuTaskNum = numBspTask - numGPUBspTask;
        long ratingsPerGPUTask = 0;
        long ratingsPerCPU = 0;
        long ratingsPerCPUTask = 0;
        if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
            ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100;
            ratingsPerCPU = userItemRatings - ratingsPerGPUTask;
        } else {
            ratingsPerCPU = userItemRatings;
        }
        if (cpuTaskNum > 0) {
            ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum;
        }

        System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: "
                + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask);

        for (int part = 0; part < numBspTask; part++) {
            Path partIn = new Path(in, "part" + part + ".seq");
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class,
                    PipesVectorWritable.class, CompressionType.NONE);

            long interval = 0;
            if (part > cpuTaskNum) {
                interval = ratingsPerGPUTask;
            } else {
                interval = ratingsPerCPUTask;
            }
            long start = interval * part;
            long end = start + interval - 1;
            if ((numBspTask - 1) == part) {
                end = userItemRatings;
            }
            LOG.info("Partition " + part + ": from " + start + " to " + end);

            for (long i = start; i <= end; i++) {

                // Find new user item rating which was not used before
                Map.Entry<Long, Long> userItemPair;
                do {
                    long userId = rand.nextInt(userCount);
                    long itemId = rand.nextInt(itemCount);
                    userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId);
                } while (userItemPairs.contains(userItemPair));

                // Add user item rating
                userItemPairs.add(userItemPair);

                // Generate rating
                int rating = rand.nextInt(5) + 1; // values between 1 and 5

                // Add user item rating to test data
                if (i < maxTestPrefs) {
                    testItems.add(
                            new Preference<Long, Long>(userItemPair.getKey(), userItemPair.getValue(), rating));
                }

                // Write out user item rating
                dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable(
                        new DenseDoubleVector(new double[] { userItemPair.getValue(), rating })));
            }
            dataWriter.close();
        }

        return testItems;
    }

    // **********************************************************************
    // convertInputData (MovieLens input files)
    // **********************************************************************
    public static List<Preference<Long, Long>> convertInputData(Configuration conf, FileSystem fs, Path in,
            Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException {

        List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>();

        // Delete input files if already exist
        if (fs.exists(in)) {
            fs.delete(in, true);
        }
        if (fs.exists(preferencesIn)) {
            fs.delete(preferencesIn, true);
        }

        final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
                LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

        BufferedReader br = new BufferedReader(new FileReader(inputFile));
        String line;
        while ((line = br.readLine()) != null) {
            String[] values = line.split(separator);
            long userId = Long.parseLong(values[0]);
            long itemId = Long.parseLong(values[1]);
            double rating = Double.parseDouble(values[2]);
            // System.out.println("userId: " + userId + " itemId: " + itemId
            // + " rating: " + rating);

            double vector[] = new double[2];
            vector[0] = itemId;
            vector[1] = rating;
            prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector)));

            // Add test preferences
            maxTestPrefs--;
            if (maxTestPrefs > 0) {
                test_prefs.add(new Preference<Long, Long>(userId, itemId, rating));
            }

        }
        br.close();
        prefWriter.close();

        return test_prefs;
    }

    // **********************************************************************
    // printOutput
    // **********************************************************************
    static void printOutput(Configuration conf, FileSystem fs, String extensionFilter, Writable key, Writable value)
            throws IOException {
        FileStatus[] files = fs.listStatus(CONF_OUTPUT_DIR);
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getLen() > 0) && (files[i].getPath().getName().endsWith(extensionFilter))) {
                printFile(conf, fs, files[i].getPath(), key, value);
            }
        }
        // fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // **********************************************************************
    // printFile
    // **********************************************************************
    static void printFile(Configuration conf, FileSystem fs, Path file, Writable key, Writable value)
            throws IOException {
        System.out.println("File " + file.toString());
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, file, conf);

            while (reader.next(key, value)) {
                System.out.println("key: '" + key.toString() + "' value: '" + value.toString() + "'\n");
            }
        } catch (IOException e) {
            FSDataInputStream in = fs.open(file);
            IOUtils.copyBytes(in, System.out, conf, false);
            in.close();
        } catch (NullPointerException e) {
            LOG.error(e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }

}