Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.eagle.jpm.spark.history.crawl; import org.apache.eagle.jpm.spark.entity.*; import org.apache.eagle.jpm.spark.history.SparkHistoryJobAppConfig; import org.apache.eagle.jpm.util.*; import org.apache.eagle.log.base.taggedlog.TaggedLogAPIEntity; import org.apache.eagle.service.client.EagleServiceClientException; import org.apache.eagle.service.client.impl.EagleServiceBaseClient; import org.apache.eagle.service.client.impl.EagleServiceClientImpl; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import org.apache.commons.lang.ArrayUtils; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.*; public class JHFSparkEventReader { private static final Logger LOG = LoggerFactory.getLogger(JHFSparkEventReader.class); private static final int FLUSH_LIMIT = 500; private long firstTaskLaunchTime; private long lastEventTime; private Map<String, SparkExecutor> executors; private SparkApp app; private Map<Integer, SparkJob> jobs; private Map<String, SparkStage> stages; private Map<Integer, Set<String>> jobStageMap; private Map<Long, SparkTask> tasks; private EagleServiceClientImpl client; private Map<String, Map<Integer, Boolean>> stageTaskStatusMap; private List<TaggedLogAPIEntity> createEntities; private SparkHistoryJobAppConfig config; private Config conf; public JHFSparkEventReader(SparkHistoryJobAppConfig config, Map<String, String> baseTags, SparkApplicationInfo info) { app = new SparkApp(); app.setTags(new HashMap<String, String>(baseTags)); app.setYarnState(info.getState()); app.setYarnStatus(info.getFinalStatus()); createEntities = new ArrayList<>(); jobs = new HashMap<Integer, SparkJob>(); stages = new HashMap<String, SparkStage>(); jobStageMap = new HashMap<Integer, Set<String>>(); tasks = new HashMap<Long, SparkTask>(); executors = new HashMap<String, SparkExecutor>(); stageTaskStatusMap = new HashMap<>(); conf = config.getConfig(); this.config = config; this.initiateClient(); } public SparkApp getApp() { return this.app; } public void read(JSONObject eventObj) { String eventType = (String) eventObj.get("Event"); if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerApplicationStart.toString())) { handleAppStarted(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerEnvironmentUpdate.toString())) { handleEnvironmentSet(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerExecutorAdded.toString())) { handleExecutorAdd(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerBlockManagerAdded.toString())) { handleBlockManagerAdd(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerJobStart.toString())) { handleJobStart(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerStageSubmitted.toString())) { handleStageSubmit(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerTaskStart.toString())) { handleTaskStart(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerTaskEnd.toString())) { handleTaskEnd(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerStageCompleted.toString())) { handleStageComplete(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerJobEnd.toString())) { handleJobEnd(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerExecutorRemoved.toString())) { handleExecutorRemoved(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerApplicationEnd.toString())) { handleAppEnd(eventObj); } else if (eventType.equalsIgnoreCase(SparkEventType.SparkListenerBlockManagerRemoved.toString())) { //nothing to do now } else { LOG.info("Not registered event type:" + eventType); } } private void handleEnvironmentSet(JSONObject event) { app.setConfig(new JobConfig()); JSONObject sparkProps = (JSONObject) event.get("Spark Properties"); String[] props = { "spark.yarn.app.id", "spark.executor.memory", "spark.driver.host", "spark.driver.port", "spark.driver.memory", "spark.scheduler.pool", "spark.executor.cores", "spark.yarn.am.memory", "spark.yarn.am.cores", "spark.yarn.executor.memoryOverhead", "spark.yarn.driver.memoryOverhead", "spark.yarn.am.memoryOverhead", "spark.master" }; String[] additionalJobConf = null; if (conf.hasPath("spark.jobConf.additional.info")) { additionalJobConf = conf.getString("spark.jobConf.additional.info").split(",\\s*"); } String[] jobConf = (String[]) ArrayUtils.addAll(additionalJobConf, props); for (String prop : jobConf) { if (sparkProps.containsKey(prop)) { app.getConfig().getConfig().put(prop, (String) sparkProps.get(prop)); } } } private Object getConfigVal(JobConfig config, String configName, String type) { if (config.getConfig().containsKey(configName)) { Object val = config.getConfig().get(configName); if (type.equalsIgnoreCase(Integer.class.getName())) { return Integer.parseInt((String) val); } else { return val; } } else { if (type.equalsIgnoreCase(Integer.class.getName())) { return conf.getInt("spark.defaultVal." + configName); } else { return conf.getString("spark.defaultVal." + configName); } } } private boolean isClientMode(JobConfig config) { return config.getConfig().get("spark.master").equalsIgnoreCase("yarn-client"); } private void handleAppStarted(JSONObject event) { //need update all entities tag before app start List<TaggedLogAPIEntity> entities = new ArrayList<TaggedLogAPIEntity>(); entities.addAll(this.executors.values()); entities.add(this.app); long appStartTime = JSONUtils.getLong(event, "Timestamp", lastEventTime); for (TaggedLogAPIEntity entity : entities) { entity.getTags().put(SparkJobTagName.SPARK_APP_ID.toString(), JSONUtils.getString(event, "App ID")); entity.getTags().put(SparkJobTagName.SPARK_APP_NAME.toString(), JSONUtils.getString(event, "App Name")); // In yarn-client mode, attemptId is not available in the log, so we set attemptId = 1. String attemptId = isClientMode(this.app.getConfig()) ? "1" : JSONUtils.getString(event, "App Attempt ID"); entity.getTags().put(SparkJobTagName.SPARK_APP_ATTEMPT_ID.toString(), attemptId); // the second argument of getNormalizeName() is changed to null because the original code contains sensitive text // original second argument looks like: this.app.getConfig().getConfig().get("xxx"), "xxx" is the sensitive text entity.getTags().put(SparkJobTagName.SPARK_APP_NORM_NAME.toString(), this.getNormalizedName(JSONUtils.getString(event, "App Name"), null)); entity.getTags().put(SparkJobTagName.SPARK_USER.toString(), JSONUtils.getString(event, "User")); entity.setTimestamp(appStartTime); } this.app.setStartTime(appStartTime); this.lastEventTime = appStartTime; } private void handleExecutorAdd(JSONObject event) { String executorID = (String) event.get("Executor ID"); long executorAddTime = JSONUtils.getLong(event, "Timestamp", lastEventTime); this.lastEventTime = executorAddTime; SparkExecutor executor = this.initiateExecutor(executorID, executorAddTime); JSONObject executorInfo = JSONUtils.getJSONObject(event, "Executor Info"); } private void handleBlockManagerAdd(JSONObject event) { long maxMemory = JSONUtils.getLong(event, "Maximum Memory"); long timestamp = JSONUtils.getLong(event, "Timestamp", lastEventTime); this.lastEventTime = timestamp; JSONObject blockInfo = JSONUtils.getJSONObject(event, "Block Manager ID"); String executorID = JSONUtils.getString(blockInfo, "Executor ID"); String hostAndPort = JSONUtils.getString(blockInfo, "Host") + ":" + JSONUtils.getLong(blockInfo, "Port"); SparkExecutor executor = this.initiateExecutor(executorID, timestamp); executor.setMaxMemory(maxMemory); executor.setHostPort(hostAndPort); } private void handleTaskStart(JSONObject event) { this.initializeTask(event); } private void handleTaskEnd(JSONObject event) { JSONObject taskInfo = JSONUtils.getJSONObject(event, "Task Info"); long taskId = JSONUtils.getLong(taskInfo, "Task ID"); SparkTask task = tasks.get(taskId); if (task == null) { return; } task.setFailed(JSONUtils.getBoolean(taskInfo, "Failed")); JSONObject taskMetrics = JSONUtils.getJSONObject(event, "Task Metrics"); if (null != taskMetrics) { task.setExecutorDeserializeTime( JSONUtils.getLong(taskMetrics, "Executor Deserialize Time", lastEventTime)); task.setExecutorRunTime(JSONUtils.getLong(taskMetrics, "Executor Run Time", lastEventTime)); task.setJvmGcTime(JSONUtils.getLong(taskMetrics, "JVM GC Time", lastEventTime)); task.setResultSize(JSONUtils.getLong(taskMetrics, "Result Size")); task.setResultSerializationTime( JSONUtils.getLong(taskMetrics, "Result Serialization Time", lastEventTime)); task.setMemoryBytesSpilled(JSONUtils.getLong(taskMetrics, "Memory Bytes Spilled")); task.setDiskBytesSpilled(JSONUtils.getLong(taskMetrics, "Disk Bytes Spilled")); JSONObject inputMetrics = JSONUtils.getJSONObject(taskMetrics, "Input Metrics"); if (null != inputMetrics) { task.setInputBytes(JSONUtils.getLong(inputMetrics, "Bytes Read")); task.setInputRecords(JSONUtils.getLong(inputMetrics, "Records Read")); } JSONObject outputMetrics = JSONUtils.getJSONObject(taskMetrics, "Output Metrics"); if (null != outputMetrics) { task.setOutputBytes(JSONUtils.getLong(outputMetrics, "Bytes Written")); task.setOutputRecords(JSONUtils.getLong(outputMetrics, "Records Written")); } JSONObject shuffleWriteMetrics = JSONUtils.getJSONObject(taskMetrics, "Shuffle Write Metrics"); if (null != shuffleWriteMetrics) { task.setShuffleWriteBytes(JSONUtils.getLong(shuffleWriteMetrics, "Shuffle Bytes Written")); task.setShuffleWriteRecords(JSONUtils.getLong(shuffleWriteMetrics, "Shuffle Records Written")); } JSONObject shuffleReadMetrics = JSONUtils.getJSONObject(taskMetrics, "Shuffle Read Metrics"); if (null != shuffleReadMetrics) { task.setShuffleReadLocalBytes(JSONUtils.getLong(shuffleReadMetrics, "Local Bytes Read")); task.setShuffleReadRemoteBytes(JSONUtils.getLong(shuffleReadMetrics, "Remote Bytes Read")); task.setShuffleReadRecords(JSONUtils.getLong(shuffleReadMetrics, "Total Records Read")); } } else { //for tasks success without task metrics, save in the end if no other information if (!task.isFailed()) { return; } } aggregateToStage(task); aggregateToExecutor(task); tasks.remove(taskId); this.flushEntities(task, false); } private SparkTask initializeTask(JSONObject event) { SparkTask task = new SparkTask(); task.setTags(new HashMap<>(this.app.getTags())); task.setTimestamp(app.getTimestamp()); task.getTags().put(SparkJobTagName.SPARK_SATGE_ID.toString(), Long.toString(JSONUtils.getLong(event, "Stage ID"))); task.getTags().put(SparkJobTagName.SPARK_STAGE_ATTEMPT_ID.toString(), Long.toString(JSONUtils.getLong(event, "Stage Attempt ID"))); JSONObject taskInfo = JSONUtils.getJSONObject(event, "Task Info"); long taskId = JSONUtils.getLong(taskInfo, "Task ID"); task.setTaskId(taskId); task.getTags().put(SparkJobTagName.SPARK_TASK_INDEX.toString(), Long.toString(JSONUtils.getLong(taskInfo, "Index"))); task.getTags().put(SparkJobTagName.SPARK_TASK_ATTEMPT_ID.toString(), Integer.toString(JSONUtils.getInt(taskInfo, "Attempt"))); long launchTime = JSONUtils.getLong(taskInfo, "Launch Time", lastEventTime); this.lastEventTime = launchTime; if (taskId == 0) { this.setFirstTaskLaunchTime(launchTime); } task.setLaunchTime(launchTime); task.setExecutorId(JSONUtils.getString(taskInfo, "Executor ID")); task.setHost(JSONUtils.getString(taskInfo, "Host")); task.setTaskLocality(JSONUtils.getString(taskInfo, "Locality")); task.setSpeculative(JSONUtils.getBoolean(taskInfo, "Speculative")); tasks.put(task.getTaskId(), task); return task; } private void setFirstTaskLaunchTime(long launchTime) { this.firstTaskLaunchTime = launchTime; } private void handleJobStart(JSONObject event) { SparkJob job = new SparkJob(); job.setTags(new HashMap<>(this.app.getTags())); job.setTimestamp(app.getTimestamp()); int jobId = JSONUtils.getInt(event, "Job ID"); job.getTags().put(SparkJobTagName.SPARK_JOB_ID.toString(), Integer.toString(jobId)); long submissionTime = JSONUtils.getLong(event, "Submission Time", lastEventTime); job.setSubmissionTime(submissionTime); this.lastEventTime = submissionTime; //for complete application, no active stages/tasks job.setNumActiveStages(0); job.setNumActiveTasks(0); this.jobs.put(jobId, job); this.jobStageMap.put(jobId, new HashSet<String>()); JSONArray stages = JSONUtils.getJSONArray(event, "Stage Infos"); int stagesSize = (stages == null ? 0 : stages.size()); job.setNumStages(stagesSize); for (int i = 0; i < stagesSize; i++) { JSONObject stageInfo = (JSONObject) stages.get(i); int stageId = JSONUtils.getInt(stageInfo, "Stage ID"); int stageAttemptId = JSONUtils.getInt(stageInfo, "Stage Attempt ID"); String stageName = JSONUtils.getString(stageInfo, "Stage Name"); int numTasks = JSONUtils.getInt(stageInfo, "Number of Tasks"); this.initiateStage(jobId, stageId, stageAttemptId, stageName, numTasks); } } private void handleStageSubmit(JSONObject event) { JSONObject stageInfo = JSONUtils.getJSONObject(event, "Stage Info"); int stageId = JSONUtils.getInt(stageInfo, "Stage ID"); int stageAttemptId = JSONUtils.getInt(stageInfo, "Stage Attempt ID"); String key = this.generateStageKey(Integer.toString(stageId), Integer.toString(stageAttemptId)); stageTaskStatusMap.put(key, new HashMap<Integer, Boolean>()); if (!stages.containsKey(key)) { //may be further attempt for one stage String baseAttempt = this.generateStageKey(Integer.toString(stageId), "0"); if (stages.containsKey(baseAttempt)) { SparkStage stage = stages.get(baseAttempt); String jobId = stage.getTags().get(SparkJobTagName.SPARK_JOB_ID.toString()); String stageName = JSONUtils.getString(event, "Stage Name"); int numTasks = JSONUtils.getInt(stageInfo, "Number of Tasks"); this.initiateStage(Integer.parseInt(jobId), stageId, stageAttemptId, stageName, numTasks); } } } private void handleStageComplete(JSONObject event) { JSONObject stageInfo = JSONUtils.getJSONObject(event, "Stage Info"); int stageId = JSONUtils.getInt(stageInfo, "Stage ID"); int stageAttemptId = JSONUtils.getInt(stageInfo, "Stage Attempt ID"); String key = this.generateStageKey(Integer.toString(stageId), Integer.toString(stageAttemptId)); SparkStage stage = stages.get(key); // If "Submission Time" is not available, use the "Launch Time" of "Task ID" = 0. Long submissionTime = JSONUtils.getLong(stageInfo, "Submission Time", firstTaskLaunchTime); stage.setSubmitTime(submissionTime); long completeTime = JSONUtils.getLong(stageInfo, "Completion Time", lastEventTime); stage.setCompleteTime(completeTime); this.lastEventTime = completeTime; if (stageInfo != null && stageInfo.containsKey("Failure Reason")) { stage.setStatus(SparkEntityConstant.SparkStageStatus.FAILED.toString()); } else { stage.setStatus(SparkEntityConstant.SparkStageStatus.COMPLETE.toString()); } } private void handleExecutorRemoved(JSONObject event) { String executorID = JSONUtils.getString(event, "Executor ID"); SparkExecutor executor = executors.get(executorID); long removedTime = JSONUtils.getLong(event, "Timestamp", lastEventTime); executor.setEndTime(removedTime); this.lastEventTime = removedTime; } private void handleJobEnd(JSONObject event) { int jobId = JSONUtils.getInt(event, "Job ID"); SparkJob job = jobs.get(jobId); long completionTime = JSONUtils.getLong(event, "Completion Time", lastEventTime); job.setCompletionTime(completionTime); this.lastEventTime = completionTime; JSONObject jobResult = JSONUtils.getJSONObject(event, "Job Result"); String result = JSONUtils.getString(jobResult, "Result"); if (result.equalsIgnoreCase("JobSucceeded")) { job.setStatus(SparkEntityConstant.SparkJobStatus.SUCCEEDED.toString()); } else { job.setStatus(SparkEntityConstant.SparkJobStatus.FAILED.toString()); } } private void handleAppEnd(JSONObject event) { long endTime = JSONUtils.getLong(event, "Timestamp", lastEventTime); app.setEndTime(endTime); this.lastEventTime = endTime; } public void clearReader() throws Exception { //clear tasks for (SparkTask task : tasks.values()) { LOG.info("Task {} does not have result or no task metrics.", task.getTaskId()); task.setFailed(true); aggregateToStage(task); aggregateToExecutor(task); this.flushEntities(task, false); } List<SparkStage> needStoreStages = new ArrayList<>(); for (SparkStage stage : this.stages.values()) { int jobId = Integer.parseInt(stage.getTags().get(SparkJobTagName.SPARK_JOB_ID.toString())); if (stage.getSubmitTime() == 0 || stage.getCompleteTime() == 0) { SparkJob job = this.jobs.get(jobId); job.setNumSkippedStages(job.getNumSkippedStages() + 1); job.setNumSkippedTasks(job.getNumSkippedTasks() + stage.getNumTasks()); } else { this.aggregateToJob(stage); this.aggregateStageToApp(stage); needStoreStages.add(stage); } String stageId = stage.getTags().get(SparkJobTagName.SPARK_SATGE_ID.toString()); String stageAttemptId = stage.getTags().get(SparkJobTagName.SPARK_STAGE_ATTEMPT_ID.toString()); this.jobStageMap.get(jobId).remove(this.generateStageKey(stageId, stageAttemptId)); } this.flushEntities(needStoreStages, false); for (SparkJob job : jobs.values()) { this.aggregateJobToApp(job); } this.flushEntities(jobs.values(), false); app.setExecutors(executors.values().size()); long executorMemory = Utils.parseMemory( (String) this.getConfigVal(this.app.getConfig(), "spark.executor.memory", String.class.getName())); long driverMemory = Utils.parseMemory(this.isClientMode(app.getConfig()) ? (String) this.getConfigVal(this.app.getConfig(), "spark.yarn.am.memory", String.class.getName()) : (String) this.getConfigVal(app.getConfig(), "spark.driver.memory", String.class.getName())); int executorCore = (Integer) this.getConfigVal(app.getConfig(), "spark.executor.cores", Integer.class.getName()); int driverCore = this.isClientMode(app.getConfig()) ? (Integer) this.getConfigVal(app.getConfig(), "spark.yarn.am.cores", Integer.class.getName()) : (Integer) this.getConfigVal(app.getConfig(), "spark.driver.cores", Integer.class.getName()); long executorMemoryOverhead = this.getMemoryOverhead(app.getConfig(), executorMemory, "spark.yarn.executor.memoryOverhead"); long driverMemoryOverhead = this.isClientMode(app.getConfig()) ? this.getMemoryOverhead(app.getConfig(), driverMemory, "spark.yarn.am.memoryOverhead") : this.getMemoryOverhead(app.getConfig(), driverMemory, "spark.yarn.driver.memoryOverhead"); app.setExecMemoryBytes(executorMemory); app.setDriveMemoryBytes(driverMemory); app.setExecutorCores(executorCore); app.setDriverCores(driverCore); app.setExecutorMemoryOverhead(executorMemoryOverhead); app.setDriverMemoryOverhead(driverMemoryOverhead); for (SparkExecutor executor : executors.values()) { String executorID = executor.getTags().get(SparkJobTagName.SPARK_EXECUTOR_ID.toString()); if (executorID.equalsIgnoreCase("driver")) { executor.setExecMemoryBytes(driverMemory); executor.setCores(driverCore); executor.setMemoryOverhead(driverMemoryOverhead); } else { executor.setExecMemoryBytes(executorMemory); executor.setCores(executorCore); executor.setMemoryOverhead(executorMemoryOverhead); } if (app.getEndTime() <= 0L) { app.setEndTime(this.lastEventTime); } if (executor.getEndTime() <= 0L) { executor.setEndTime(app.getEndTime()); } this.aggregateExecutorToApp(executor); } this.flushEntities(executors.values(), false); //spark code...tricky app.setSkippedTasks(app.getCompleteTasks()); this.flushEntities(app, true); } private long getMemoryOverhead(JobConfig config, long executorMemory, String fieldName) { long result = 0L; String fieldValue = config.getConfig().get(fieldName); if (fieldValue != null) { result = Utils.parseMemory(fieldValue + "m"); if (result == 0L) { result = Utils.parseMemory(fieldValue); } } if (result == 0L) { result = Math.max(Utils.parseMemory(conf.getString("spark.defaultVal.spark.yarn.overhead.min")), executorMemory * conf.getInt("spark.defaultVal." + fieldName + ".factor") / 100); } return result; } private void aggregateExecutorToApp(SparkExecutor executor) { long totalExecutorTime = app.getTotalExecutorTime() + executor.getEndTime() - executor.getStartTime(); if (totalExecutorTime < 0L) { totalExecutorTime = 0L; } app.setTotalExecutorTime(totalExecutorTime); } private void aggregateJobToApp(SparkJob job) { //aggregate job level metrics app.setNumJobs(app.getNumJobs() + 1); app.setTotalTasks(app.getTotalTasks() + job.getNumTask()); app.setCompleteTasks(app.getCompleteTasks() + job.getNumCompletedTasks()); app.setSkippedTasks(app.getSkippedTasks() + job.getNumSkippedTasks()); app.setFailedTasks(app.getFailedTasks() + job.getNumFailedTasks()); app.setTotalStages(app.getTotalStages() + job.getNumStages()); app.setFailedStages(app.getFailedStages() + job.getNumFailedStages()); app.setSkippedStages(app.getSkippedStages() + job.getNumSkippedStages()); } private void aggregateStageToApp(SparkStage stage) { //aggregate task level metrics app.setDiskBytesSpilled(app.getDiskBytesSpilled() + stage.getDiskBytesSpilled()); app.setMemoryBytesSpilled(app.getMemoryBytesSpilled() + stage.getMemoryBytesSpilled()); app.setExecutorRunTime(app.getExecutorRunTime() + stage.getExecutorRunTime()); app.setJvmGcTime(app.getJvmGcTime() + stage.getJvmGcTime()); app.setExecutorDeserializeTime(app.getExecutorDeserializeTime() + stage.getExecutorDeserializeTime()); app.setResultSerializationTime(app.getResultSerializationTime() + stage.getResultSerializationTime()); app.setResultSize(app.getResultSize() + stage.getResultSize()); app.setInputRecords(app.getInputRecords() + stage.getInputRecords()); app.setInputBytes(app.getInputBytes() + stage.getInputBytes()); app.setOutputRecords(app.getOutputRecords() + stage.getOutputRecords()); app.setOutputBytes(app.getOutputBytes() + stage.getOutputBytes()); app.setShuffleWriteRecords(app.getShuffleWriteRecords() + stage.getShuffleWriteRecords()); app.setShuffleWriteBytes(app.getShuffleWriteBytes() + stage.getShuffleWriteBytes()); app.setShuffleReadRecords(app.getShuffleReadRecords() + stage.getShuffleReadRecords()); app.setShuffleReadBytes(app.getShuffleReadBytes() + stage.getShuffleReadBytes()); } private void aggregateToStage(SparkTask task) { String stageId = task.getTags().get(SparkJobTagName.SPARK_SATGE_ID.toString()); String stageAttemptId = task.getTags().get(SparkJobTagName.SPARK_STAGE_ATTEMPT_ID.toString()); String key = this.generateStageKey(stageId, stageAttemptId); SparkStage stage = stages.get(key); stage.setDiskBytesSpilled(stage.getDiskBytesSpilled() + task.getDiskBytesSpilled()); stage.setMemoryBytesSpilled(stage.getMemoryBytesSpilled() + task.getMemoryBytesSpilled()); stage.setExecutorRunTime(stage.getExecutorRunTime() + task.getExecutorRunTime()); stage.setJvmGcTime(stage.getJvmGcTime() + task.getJvmGcTime()); stage.setExecutorDeserializeTime(stage.getExecutorDeserializeTime() + task.getExecutorDeserializeTime()); stage.setResultSerializationTime(stage.getResultSerializationTime() + task.getResultSerializationTime()); stage.setResultSize(stage.getResultSize() + task.getResultSize()); stage.setInputRecords(stage.getInputRecords() + task.getInputRecords()); stage.setInputBytes(stage.getInputBytes() + task.getInputBytes()); stage.setOutputRecords(stage.getOutputRecords() + task.getOutputRecords()); stage.setOutputBytes(stage.getOutputBytes() + task.getOutputBytes()); stage.setShuffleWriteRecords(stage.getShuffleWriteRecords() + task.getShuffleWriteRecords()); stage.setShuffleWriteBytes(stage.getShuffleWriteBytes() + task.getShuffleWriteBytes()); stage.setShuffleReadRecords(stage.getShuffleReadRecords() + task.getShuffleReadRecords()); long taskShuffleReadBytes = task.getShuffleReadLocalBytes() + task.getShuffleReadRemoteBytes(); stage.setShuffleReadBytes(stage.getShuffleReadBytes() + taskShuffleReadBytes); boolean success = !task.isFailed(); Integer taskIndex = Integer.parseInt(task.getTags().get(SparkJobTagName.SPARK_TASK_INDEX.toString())); if (stageTaskStatusMap.get(key).containsKey(taskIndex)) { //has previous task attempt, retrieved from task index in one stage boolean previousResult = stageTaskStatusMap.get(key).get(taskIndex); success = previousResult || success; if (previousResult != success) { stage.setNumFailedTasks(stage.getNumFailedTasks() - 1); stage.setNumCompletedTasks(stage.getNumCompletedTasks() + 1); stageTaskStatusMap.get(key).put(taskIndex, success); } } else { if (success) { stage.setNumCompletedTasks(stage.getNumCompletedTasks() + 1); } else { stage.setNumFailedTasks(stage.getNumFailedTasks() + 1); } stageTaskStatusMap.get(key).put(taskIndex, success); } } private void aggregateToExecutor(SparkTask task) { String executorId = task.getExecutorId(); SparkExecutor executor = executors.get(executorId); if (null != executor) { executor.setTotalTasks(executor.getTotalTasks() + 1); if (task.isFailed()) { executor.setFailedTasks(executor.getFailedTasks() + 1); } else { executor.setCompletedTasks(executor.getCompletedTasks() + 1); } long taskShuffleReadBytes = task.getShuffleReadLocalBytes() + task.getShuffleReadRemoteBytes(); executor.setTotalShuffleRead(executor.getTotalShuffleRead() + taskShuffleReadBytes); executor.setTotalDuration(executor.getTotalDuration() + task.getExecutorRunTime()); executor.setTotalInputBytes(executor.getTotalInputBytes() + task.getInputBytes()); executor.setTotalShuffleWrite(executor.getTotalShuffleWrite() + task.getShuffleWriteBytes()); executor.setTotalDuration(executor.getTotalDuration() + task.getExecutorRunTime()); } } private void aggregateToJob(SparkStage stage) { int jobId = Integer.parseInt(stage.getTags().get(SparkJobTagName.SPARK_JOB_ID.toString())); SparkJob job = jobs.get(jobId); job.setNumCompletedTasks(job.getNumCompletedTasks() + stage.getNumCompletedTasks()); job.setNumFailedTasks(job.getNumFailedTasks() + stage.getNumFailedTasks()); job.setNumTask(job.getNumTask() + stage.getNumTasks()); if (stage.getStatus().equalsIgnoreCase(SparkEntityConstant.SparkStageStatus.COMPLETE.toString())) { //if multiple attempts succeed, just count one if (!hasStagePriorAttemptSuccess(stage)) { job.setNumCompletedStages(job.getNumCompletedStages() + 1); } } else { job.setNumFailedStages(job.getNumFailedStages() + 1); } } private boolean hasStagePriorAttemptSuccess(SparkStage stage) { int stageAttemptId = Integer .parseInt(stage.getTags().get(SparkJobTagName.SPARK_STAGE_ATTEMPT_ID.toString())); for (int i = 0; i < stageAttemptId; i++) { SparkStage previousStage = stages.get(this.generateStageKey( stage.getTags().get(SparkJobTagName.SPARK_SATGE_ID.toString()), Integer.toString(i))); if (previousStage.getStatus() .equalsIgnoreCase(SparkEntityConstant.SparkStageStatus.COMPLETE.toString())) { return true; } } return false; } private String generateStageKey(String stageId, String stageAttemptId) { return stageId + "-" + stageAttemptId; } private void initiateStage(int jobId, int stageId, int stageAttemptId, String name, int numTasks) { SparkStage stage = new SparkStage(); stage.setTags(new HashMap<>(this.app.getTags())); stage.setTimestamp(app.getTimestamp()); stage.getTags().put(SparkJobTagName.SPARK_JOB_ID.toString(), Integer.toString(jobId)); stage.getTags().put(SparkJobTagName.SPARK_SATGE_ID.toString(), Integer.toString(stageId)); stage.getTags().put(SparkJobTagName.SPARK_STAGE_ATTEMPT_ID.toString(), Integer.toString(stageAttemptId)); stage.setName(name); stage.setNumActiveTasks(0); stage.setNumTasks(numTasks); stage.setSchedulingPool(this.app.getConfig().getConfig().get("spark.scheduler.pool") == null ? "default" : this.app.getConfig().getConfig().get("spark.scheduler.pool")); String stageKey = this.generateStageKey(Integer.toString(stageId), Integer.toString(stageAttemptId)); stages.put(stageKey, stage); this.jobStageMap.get(jobId).add(stageKey); } private SparkExecutor initiateExecutor(String executorID, long startTime) { if (!executors.containsKey(executorID)) { SparkExecutor executor = new SparkExecutor(); executor.setTags(new HashMap<>(this.app.getTags())); executor.getTags().put(SparkJobTagName.SPARK_EXECUTOR_ID.toString(), executorID); executor.setStartTime(startTime); executor.setTimestamp(app.getTimestamp()); this.executors.put(executorID, executor); } return this.executors.get(executorID); } private String getNormalizedName(String jobName, String assignedName) { if (null != assignedName) { return assignedName; } else { return JobNameNormalization.getInstance(this.config.getConfig()).normalize(jobName); } } private void flushEntities(Object entity, boolean forceFlush) { this.flushEntities(Collections.singletonList(entity), forceFlush); } private void flushEntities(Collection entities, boolean forceFlush) { this.createEntities.addAll(entities); if (forceFlush || this.createEntities.size() >= config.eagleInfo.flushLimit) { try { this.doFlush(this.createEntities); this.createEntities.clear(); } catch (Exception e) { LOG.error("Fail to flush entities", e); } } } private EagleServiceBaseClient initiateClient() { client = new EagleServiceClientImpl(config.eagleInfo.host, config.eagleInfo.port, config.eagleInfo.basePath, config.eagleInfo.username, config.eagleInfo.password); int timeout = config.eagleInfo.timeout; client.setReadTimeout(timeout * 1000); return client; } private void doFlush(List entities) throws IOException, EagleServiceClientException { client.create(entities); int size = (entities == null ? 0 : entities.size()); LOG.info("finish flushing entities of total number " + size); } }