br.com.autonomiccs.cloudTraces.main.GoogleTracesToCloudTracesParser.java Source code

Java tutorial

Introduction

Here is the source code for br.com.autonomiccs.cloudTraces.main.GoogleTracesToCloudTracesParser.java

Source

/*
 * Cloud traces
 * Copyright (C) 2016 Autonomiccs, Inc.
 *
 * Licensed to the Autonomiccs, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The Autonomiccs, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package br.com.autonomiccs.cloudTraces.main;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.log4j.Logger;

import br.com.autonomiccs.cloudTraces.beans.GoogleJob;
import br.com.autonomiccs.cloudTraces.beans.GoogleTask;
import br.com.autonomiccs.cloudTraces.beans.GoogleTrace;
import br.com.autonomiccs.cloudTraces.beans.VirtualMachine;
import br.com.autonomiccs.cloudTraces.beans.VmServiceOffering;
import br.com.autonomiccs.cloudTraces.exceptions.GoogleTracesToCloudTracesException;
import br.com.autonomiccs.cloudTraces.service.VmServiceOfferingService;

public class GoogleTracesToCloudTracesParser {

    private final static Logger logger = Logger.getLogger(GoogleTracesToCloudTracesParser.class);

    private static String cloudTracesFileName = "cloudVmTraces.csv";

    /**
     * This parameter indicates how much a normalized core usage with value '0' represents in MHz
     */
    private static int minimumCoreUsageInMhz = 100;

    /**
     * This parameter indicates how much a normalized core usage with value '1' represents in MHz
     */
    private static int maximumCoreUsageInMhz = 3400;

    /**
     * This parameter indicates how much a normalized memory usage with value '0' represents in MB
     */
    private static int minimumMemoryUsageInMb = 25;

    /**
     * This parameter indicates how much a normalized memory usage with value '1' represents in MB
     */
    private static int maximumMemoryUsageInMb = 2048;

    public static void main(String[] args) {
        validateArguments(args);
        List<GoogleTrace> googleTraces = readAllGoogleTracesFromDataset(args[0]);

        logger.info(String.format("#Google traces loaded [%d]", googleTraces.size()));
        Collection<GoogleJob> googleJobs = buildTasksHierachyAndCreateJobList(googleTraces);
        buildJobsTaksByTimeMap(googleJobs);
        fillOutStartAndEndTimeOfJobs(googleJobs);
        calculateThePeakJobResourceUsage(googleJobs);

        GoogleJob biggestCpuUsageJob = googleJobs.iterator().next();
        GoogleJob biggestMemoryUsageJob = biggestCpuUsageJob;

        GoogleJob lowestCpuUsageJob = googleJobs.iterator().next();
        GoogleJob lowestMemoryUsageJob = lowestCpuUsageJob;

        for (GoogleJob googleJob : googleJobs) {
            if (biggestCpuUsageJob.getMaximumCpuUsageAtTime() < googleJob.getMaximumCpuUsageAtTime()) {
                biggestCpuUsageJob = googleJob;
            }
            if (biggestMemoryUsageJob.getMaximumMemoryUsageAtTime() < googleJob.getMaximumMemoryUsageAtTime()) {
                biggestMemoryUsageJob = googleJob;
            }

            if (lowestCpuUsageJob.getMaximumCpuUsageAtTime() > googleJob.getMaximumCpuUsageAtTime()) {
                lowestCpuUsageJob = googleJob;
            }
            if (lowestMemoryUsageJob.getMaximumMemoryUsageAtTime() > googleJob.getMaximumMemoryUsageAtTime()) {
                lowestMemoryUsageJob = googleJob;
            }
        }
        logger.info("Max job cpu usage: " + biggestCpuUsageJob);
        logger.info("Max job memory usage: " + biggestMemoryUsageJob);
        logger.info("Min job cpu usage: " + lowestCpuUsageJob);
        logger.info("Min job memory usage: " + lowestMemoryUsageJob);

        List<VirtualMachine> virtualMachines = createVmsToExecuteJobs(googleJobs);
        writeVmTracesToFile(virtualMachines);
    }

    private static void writeVmTracesToFile(List<VirtualMachine> virtualMachines) {
        List<Integer> allTimeThatHaveTasks = getAllTimesThatWeExecuteTask(virtualMachines);

        Collections.sort(virtualMachines, new Comparator<VirtualMachine>() {
            @Override
            public int compare(VirtualMachine o1, VirtualMachine o2) {
                return o1.getDeployTime() - o2.getDeployTime();
            }
        });
        try (BufferedWriter bfw = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(cloudTracesFileName)))) {
            bfw.write(
                    "#Time, vmName, deployTime, remove time, service offering name, service offering number of cores, total allocated cpu, allocated memory, used cpu, used memory");
            bfw.newLine();
            int lines = 0;
            for (Integer time : allTimeThatHaveTasks) {
                for (VirtualMachine virtualMachine : virtualMachines) {
                    if (virtualMachine.getDeployTime() > time || virtualMachine.getDestroyTime() < time) {
                        continue;
                    }
                    VmServiceOffering vmServiceOffering = virtualMachine.getVmServiceOffering();
                    List<GoogleTask> vmTasksForTime = virtualMachine.getGoogleJob().getMapTimeByTasks().get(time);
                    if (CollectionUtils.isEmpty(vmTasksForTime)) {
                        continue;
                    }
                    double vmUsedMemoryForTime = 0;
                    double vmUsedCpuForTime = 0;
                    for (GoogleTask googleTask : vmTasksForTime) {
                        vmUsedCpuForTime += googleTask.getCpuUsage();
                        vmUsedMemoryForTime += googleTask.getMemoryUsage();
                    }
                    String vmTraceForTime = String.format("%d, %s, %d, %d, %s, %d, %d, %d, %.10f, %.10f", time,
                            virtualMachine.getVmId(), virtualMachine.getDeployTime(),
                            virtualMachine.getDestroyTime(), vmServiceOffering.getName(),
                            vmServiceOffering.getNumberOfCores(),
                            vmServiceOffering.getCoreSpeed() * vmServiceOffering.getNumberOfCores(),
                            vmServiceOffering.getMemoryInMegaByte(), vmUsedCpuForTime, vmUsedMemoryForTime);
                    bfw.write(vmTraceForTime);
                    bfw.newLine();
                    lines++;
                }
            }
            logger.info(String.format("#lines [%d] written to the cloud data traces.", lines));
        } catch (IOException e) {
            throw new GoogleTracesToCloudTracesException(e);
        }

    }

    private static List<Integer> getAllTimesThatWeExecuteTask(List<VirtualMachine> virtualMachines) {
        Set<Integer> times = new HashSet<>();
        for (VirtualMachine virtualMachine : virtualMachines) {
            times.addAll(virtualMachine.getGoogleJob().getMapTimeByTasks().keySet());
        }
        ArrayList<Integer> listOfTimes = new ArrayList<>(times);
        Collections.sort(listOfTimes);
        return listOfTimes;
    }

    private static List<VirtualMachine> createVmsToExecuteJobs(Collection<GoogleJob> googleJobs) {
        List<VirtualMachine> virtualMachines = new ArrayList<>(googleJobs.size());
        int count = 1;
        for (GoogleJob googleJob : googleJobs) {
            VirtualMachine vm = createVirtualMachineForJob(googleJob);
            vm.setVmId("VM-" + (count++));
            virtualMachines.add(vm);
        }
        return virtualMachines;
    }

    private static VirtualMachine createVirtualMachineForJob(GoogleJob googleJob) {
        VirtualMachine virtualMachine = new VirtualMachine();
        virtualMachine.setDeployTime(googleJob.getStartTime());
        virtualMachine.setDestroyTime(googleJob.getEndTime());
        virtualMachine.setGoogleJob(googleJob);
        virtualMachine.setVmServiceOffering(getServiceOfferingForVmToSupportJobsPeakLoad(googleJob));
        return virtualMachine;
    }

    private static VmServiceOffering getServiceOfferingForVmToSupportJobsPeakLoad(GoogleJob googleJob) {
        for (VmServiceOffering vmServiceOffering : VmServiceOfferingService.getVmServiceOfferings()) {
            if (vmServiceOffering.getCoreSpeed() * vmServiceOffering.getNumberOfCores() < googleJob
                    .getMaximumCpuUsageAtTime()) {
                continue;
            }
            if (vmServiceOffering.getMemoryInMegaByte() < googleJob.getMaximumMemoryUsageAtTime()) {
                continue;
            }
            return vmServiceOffering;
        }
        throw new GoogleTracesToCloudTracesException(
                "Could not find a suitable service offering for a VM that is supposed to host the Job: "
                        + googleJob);
    }

    private static void calculateThePeakJobResourceUsage(Collection<GoogleJob> googleJobs) {
        for (GoogleJob googleJob : googleJobs) {
            List<Integer> times = new ArrayList<>(googleJob.getMapTimeByTasks().keySet());
            Collections.sort(times);
            for (Integer time : times) {
                double cpuUsageAtTime = 0;
                double memoryUsageAtTime = 0;
                for (GoogleTask googleTask : googleJob.getMapTimeByTasks().get(time)) {
                    cpuUsageAtTime += googleTask.getCpuUsage();
                    memoryUsageAtTime += googleTask.getMemoryUsage();
                }
                if (cpuUsageAtTime > googleJob.getMaximumCpuUsageAtTime()) {
                    googleJob.setMaximumCpuUsageAtTime(cpuUsageAtTime);
                    googleJob.setTimeWithPeakCpuUsage(time);
                }
                if (memoryUsageAtTime > googleJob.getMaximumMemoryUsageAtTime()) {
                    googleJob.setMaximumMemoryUsageAtTime(memoryUsageAtTime);
                    googleJob.setTimeWithPeakMemoryUsage(time);
                }
            }
        }

    }

    private static void fillOutStartAndEndTimeOfJobs(Collection<GoogleJob> googleJobs) {
        for (GoogleJob googleJob : googleJobs) {
            List<Integer> times = new ArrayList<>(googleJob.getMapTimeByTasks().keySet());
            Collections.sort(times);
            googleJob.setStartTime(times.get(0));
            googleJob.setEndTime(times.get(times.size() - 1));
        }

    }

    private static void buildJobsTaksByTimeMap(Collection<GoogleJob> googleJobs) {
        for (GoogleJob googleJob : googleJobs) {
            for (GoogleTask googleTask : googleJob.getTasks()) {
                List<GoogleTask> googleTasksAtTime = googleJob.getMapTimeByTasks().get(googleTask.getTime());
                if (googleTasksAtTime == null) {
                    googleTasksAtTime = new ArrayList<>();
                    googleJob.getMapTimeByTasks().put(googleTask.getTime(), googleTasksAtTime);
                }
                googleTasksAtTime.add(googleTask);
            }
        }
    }

    private static Collection<GoogleJob> buildTasksHierachyAndCreateJobList(List<GoogleTrace> googleTraces) {
        Map<Integer, Integer> mapJobIdByJobType = new HashMap<>();
        Map<Integer, GoogleTask> mapTaskIdGoogleTaskObject = new HashMap<>();
        for (GoogleTrace g : googleTraces) {
            mapJobIdByJobType.put(g.getJobId(), g.getJobType());
            int taskId = g.getTaskId();
            GoogleTask googleTaskParent = mapTaskIdGoogleTaskObject.get(taskId);
            if (googleTaskParent == null) {
                googleTaskParent = createGoogleTaskFromGoogleTrace(g);
                mapTaskIdGoogleTaskObject.put(taskId, googleTaskParent);
            } else {
                GoogleTask googleTask = createGoogleTaskFromGoogleTrace(g);
                googleTaskParent.addExecutionOfTaskThroughTime(googleTask);
            }
        }
        logger.info(
                String.format("#Tasks after we created the task hierarchy [%d]", mapTaskIdGoogleTaskObject.size()));
        logger.info(String.format("#Jobs ids found [%d]", mapJobIdByJobType.size()));
        Map<Integer, GoogleJob> mapJobIdByGoogleJob = new HashMap<>();
        for (GoogleTask googleTask : mapTaskIdGoogleTaskObject.values()) {
            int jobId = googleTask.getJobId();
            GoogleJob googleJob = mapJobIdByGoogleJob.get(jobId);
            if (googleJob == null) {
                googleJob = new GoogleJob(jobId);
                googleJob.setJobType(mapJobIdByJobType.get(jobId));
                mapJobIdByGoogleJob.put(jobId, googleJob);
            }
            googleJob.getTasks().addAll(googleTask.getExecutionThroughTime());
        }
        logger.info(String.format("#Jobs with tasks [%d]", mapJobIdByGoogleJob.values().size()));
        return mapJobIdByGoogleJob.values();
    }

    private static GoogleTask createGoogleTaskFromGoogleTrace(GoogleTrace g) {
        GoogleTask googleTask = new GoogleTask(g.getTaskId(), g.getTime(), g.getJobId());

        double normalizedTaskCores = g.getNormalizedTaskCores();
        double normalizedTaskMemory = g.getNormalizedTaskMemory();

        googleTask.setNormalizedTaskCores(normalizedTaskCores);
        googleTask.setNormalizedTaskMemory(normalizedTaskMemory);

        googleTask.setCpuUsage(deNormalizeData(normalizedTaskCores, maximumCoreUsageInMhz, minimumCoreUsageInMhz));
        googleTask.setMemoryUsage(
                deNormalizeData(normalizedTaskMemory, maximumMemoryUsageInMb, minimumMemoryUsageInMb));
        return googleTask;
    }

    private static double deNormalizeData(double normalizedValue, double max, double min) {
        return normalizedValue * (max - min) + min;
    }

    private static Set<Integer> times = new HashSet<>();

    private static List<GoogleTrace> readAllGoogleTracesFromDataset(String googleTracesDataSet) {
        List<GoogleTrace> googleTraces = new ArrayList<>();
        try (BufferedReader bf = new BufferedReader(new FileReader(googleTracesDataSet))) {
            //ignore the header, the metadata line.
            String line = bf.readLine();
            do {
                line = bf.readLine();
                if (line == null) {
                    continue;
                }
                GoogleTrace googleTrace = createGoogleTrace(line);
                googleTraces.add(googleTrace);
            } while (line != null);
        } catch (IOException e) {
            throw new GoogleTracesToCloudTracesException(e);
        }
        logger.info(String.format("#Times that have some task execution [%d]", times.size()));
        return googleTraces;
    }

    private static Pattern patternMatchGoogleTracesGroups = Pattern
            .compile("(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d)\\s(.+)");

    private static GoogleTrace createGoogleTrace(String line) {
        Matcher matcher = patternMatchGoogleTracesGroups.matcher(line);
        if (!matcher.matches()) {
            throw new GoogleTracesToCloudTracesException(
                    String.format("The trace [%s] does not meet the expected pattern.", line));
        }
        GoogleTrace googleTrace = new GoogleTrace();
        times.add(NumberUtils.toInt(matcher.group(1)));
        googleTrace.setTime(NumberUtils.toInt(matcher.group(1)));
        googleTrace.setJobId(NumberUtils.toInt(matcher.group(2)));
        googleTrace.setTaskId(NumberUtils.toInt(matcher.group(3)));
        googleTrace.setJobType(NumberUtils.toInt(matcher.group(4)));

        String[] normalizedCpuAndMemory = matcher.group(5).split(" ");
        googleTrace.setNormalizedTaskCores(NumberUtils.toDouble(normalizedCpuAndMemory[0]));
        googleTrace.setNormalizedTaskMemory(NumberUtils.toDouble(normalizedCpuAndMemory[1]));
        return googleTrace;
    }

    private static void validateArguments(String[] args) {
        if (args.length != 1) {
            throw new GoogleTracesToCloudTracesException(
                    "You should inform the full qualified path to the Google traces data set.");
        }
        File file = new File(args[0]);
        if (!file.exists()) {
            throw new GoogleTracesToCloudTracesException(String.format("File [%s] does not exist.", args[0]));
        }
        if (!file.canRead()) {
            throw new GoogleTracesToCloudTracesException(String.format("Cannot read file [%s] .", args[0]));
        }
    }
}