Java tutorial
/******************************************************************************* * Copyright (c) 2015 Unicon (R) Licensed under the * Educational Community License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.osedu.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. *******************************************************************************/ package org.apereo.lap.model; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.configuration.HierarchicalConfiguration; import org.apache.commons.configuration.XMLConfiguration; import org.apache.commons.lang.StringUtils; import org.apereo.lap.model.Output.OutputField; import org.apereo.lap.services.configuration.ConfigurationService; import org.apereo.lap.services.input.BaseInputHandlerService; import org.apereo.lap.services.storage.StorageService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This is an object that represents all configuration settings for a specific pipeline * * Each pipeline will be defined by a set of metadata which includes: * - name * - description (and recommendations for running the model) * - stat indicators (accuracy, confidence interval, etc.) * - required input fields * - processors (kettle ktr and kjb files, pmml files, etc.) * - output result definition * * @author Aaron Zeckoski (azeckoski @ unicon.net) (azeckoski @ vt.edu) */ //@JsonIgnoreProperties({"configuration"}) public class PipelineConfig { private static final Logger logger = LoggerFactory.getLogger(PipelineConfig.class); /** * The pipeline XML file which was loaded to create this config */ String filename; /** * the type of pipeline (e.g. marist_student_risk) this is the config for * (should be unique and should only use lowercase alphanums) */ String type; /** * the display name for this pipeline (used in logging as well) */ String name; String description; Map<String, Float> stats; List<BaseInputHandlerService> inputHandlers; List<InputField> inputs; List<Processor> processors; List<Output> outputs; /** * The list of reasons why the loaded pipeline config is not valid */ List<String> invalidReasons; /** * The LAP config service */ private PipelineConfig() { stats = new ConcurrentHashMap<>(); } /** * Add an InputField to this config * @param inputField the InputField * @return the list of all current InputField */ public List<InputField> addInputField(InputField inputField) { if (this.inputs == null) { this.inputs = new ArrayList<>(); } for (InputField input : this.inputs) { if (inputField.name.equals(input.name)) { throw new IllegalArgumentException( "Duplicate input field (" + inputField.name + "), input field can only be defined once"); } } this.inputs.add(inputField); return this.inputs; } public List<BaseInputHandlerService> addInputHandlerField(String type, HierarchicalConfiguration sourceConfiguration, ConfigurationService configurationService, StorageService storage) { if (this.inputHandlers == null) { this.inputHandlers = new ArrayList<>(); } this.inputHandlers.add( BaseInputHandlerService.getInputHandler(type, sourceConfiguration, configurationService, storage)); return this.inputHandlers; } /** * Add a Processor to this config * @param processor the Processor * @return the list of all current Output */ public List<Processor> addProcessor(Processor processor) { if (this.processors == null) { this.processors = new ArrayList<>(); } this.processors.add(processor); return this.processors; } /** * Add an Output to this config * @param output the Output * @return the list of all current Output */ public List<Output> addOutput(Output output) { if (this.outputs == null) { this.outputs = new ArrayList<>(); } if (output.fields == null || output.fields.isEmpty()) { throw new IllegalArgumentException( "Output must contain at LEAST 1 field (is empty or null currently) before it can be added"); } this.outputs.add(output); return this.outputs; } // GETTERS public String getType() { return type; } public String getName() { return name; } public String getDescription() { return description; } public Map<String, ?> getStats() { return stats; } public List<InputField> getInputs() { return inputs; } public List<Processor> getProcessors() { return processors; } public List<BaseInputHandlerService> getInputHandlers() { return inputHandlers; } public List<Output> getOutputs() { return outputs; } /** * @return true if the pipeline config is valid, false otherwise * If not valid, the reasons are indicated in the #invalidReasons variable */ public boolean isValid() { boolean valid = true; invalidReasons = new ArrayList<>(); if (StringUtils.isBlank(type)) { invalidReasons.add("Missing <type> (must not be blank)"); valid = false; } if (StringUtils.isBlank(name)) { invalidReasons.add("Missing <name> (must not be blank)"); valid = false; } if (inputs == null) { invalidReasons.add("Missing <inputs> (must be included and at least one input defined)"); valid = false; } else if (inputs.isEmpty()) { invalidReasons.add("No <input> in <inputs> (must have at least 1 input defined)"); valid = false; } if (processors == null) { invalidReasons.add("Missing <processors> (must be included and at least one processor defined)"); valid = false; } else if (processors.isEmpty()) { invalidReasons.add("No <processor> in <processors> (must have at least 1 processor defined)"); valid = false; } if (outputs == null) { invalidReasons.add("Missing <outputs> (must be included and at least one output defined)"); valid = false; } else if (outputs.isEmpty()) { invalidReasons.add("No <output> in <outputs> (must have at least 1 output defined)"); valid = false; } return valid; } public List<String> getInvalidReasons() { return invalidReasons; } // BUILDER public static PipelineConfig makeConfigFromXML(ConfigurationService configurationService, StorageService storage, XMLConfiguration xmlConfig) { PipelineConfig pc = new PipelineConfig(); pc.filename = xmlConfig.getFileName(); pc.name = xmlConfig.getString("name"); pc.type = xmlConfig.getString("type"); pc.description = xmlConfig.getString("description"); // special handling for stats metadata HierarchicalConfiguration stats = xmlConfig.configurationAt("stats"); Iterator<String> statsKeys = stats.getKeys(); while (statsKeys.hasNext()) { String next = statsKeys.next(); try { Float f = stats.getFloat(next); pc.stats.put(next, f); } catch (Exception e) { // skip this float and warn logger.warn("Unable to get float from " + next + " <stats> field (skipping it): " + e); } } // load the lists // sources List<HierarchicalConfiguration> sourceFields = xmlConfig.configurationsAt("sources.source"); for (HierarchicalConfiguration field : sourceFields) { try { pc.addInputHandlerField(field.getString("type"), field, configurationService, storage); } catch (Exception e) { // skip this input and warn logger.warn("Unable to load input field (" + field.toString() + ") (skipping it): " + e); } } // load the lists // inputs List<HierarchicalConfiguration> inputFields = xmlConfig.configurationsAt("inputs.fields.field"); for (HierarchicalConfiguration field : inputFields) { try { pc.addInputField(InputField.make(field.getString("name"), field.getBoolean("required", false))); } catch (Exception e) { // skip this input and warn logger.warn("Unable to load input field (" + field.toString() + ") (skipping it): " + e); } } // processors List<HierarchicalConfiguration> processors = xmlConfig.configurationsAt("processors.processor"); for (HierarchicalConfiguration processor : processors) { try { String pType = processor.getString("type"); Processor.ProcessorType pt = Processor.ProcessorType.fromString(pType); // IllegalArgumentException if invalid if (pt == Processor.ProcessorType.KETTLE_JOB) { pc.addProcessor( Processor.makeKettleJob(processor.getString("name"), processor.getString("file"))); } else if (pt == Processor.ProcessorType.KETTLE_TRANSFORM) { pc.addProcessor(Processor.makeKettleTransform(processor.getString("name"), processor.getString("file"))); } else if (pt == Processor.ProcessorType.KETTLE_DATA) { Processor p = new Processor(); p.type = Processor.ProcessorType.KETTLE_DATA; p.name = processor.getString("name"); p.count = processor.getInt("count"); pc.addProcessor(p); logger.warn("KETTLE DATA processor loaded (" + p.toString() + ")"); } // Add other types here as needed } catch (Exception e) { // skip this processor and warn logger.warn("Unable to load processor (" + processor.toString() + ") (skipping it): " + e); } } // outputs List<HierarchicalConfiguration> outputs = xmlConfig.configurationsAt("outputs.output"); for (HierarchicalConfiguration output : outputs) { // TODO - we need to rethink output handling // don't want to add code every time we need to support a new output type try { String oType = output.getString("type"); Output.OutputType ot = Output.OutputType.fromString(oType); // IllegalArgumentException if invalid if (ot == Output.OutputType.CSV) { Output o = Output.makeCSV(output.getString("from"), output.getString("filename")); // load the output fields List<HierarchicalConfiguration> outputFields = output.configurationsAt("fields.field"); for (HierarchicalConfiguration outputField : outputFields) { o.addFieldCSV(outputField.getString("source"), outputField.getString("header")); } pc.addOutput(o); } else if (ot == Output.OutputType.STORAGE) { Output o = Output.makeStorage(output.getString("from"), output.getString("to")); // load the output fields List<HierarchicalConfiguration> outputFields = output.configurationsAt("fields.field"); for (HierarchicalConfiguration outputField : outputFields) { o.addFieldStorage(outputField.getString("source"), outputField.getString("target")); } pc.addOutput(o); } else if (ot == Output.OutputType.SSPEARLYALERT) { Output o = new Output(); o.type = Output.OutputType.SSPEARLYALERT; o.from = output.getString("from"); o.to = output.getString("to"); List<HierarchicalConfiguration> outputFields = output.configurationsAt("fields.field"); for (HierarchicalConfiguration outputField : outputFields) { OutputField field = new OutputField(o.type, outputField.getString("source"), outputField.getString("target"), null); o.fields.add(field); } pc.addOutput(o); } // Add other types here as needed } catch (Exception e) { // skip this processor and warn logger.warn("Unable to load output (" + output.toString() + ") (skipping it): " + e); } } return pc; } // Objects to hold specialized data /** * Represents a single field of input for a pipeline * * All inputs are defined in the resources/extracts/README.md file * There are 5 inputs types: PERSONAL, COURSE, ENROLLMENT, GRADE, ACTIVITY * A field is specified using a combination of the type and the name, for example: COURSE.COURSE_ID or PERSONAL.AGE */ public static class InputField { public BaseInputHandlerService.InputCollection collection; public String name; public boolean required = false; private InputField() { } /** * For making input fields * @param collectionAndName the collection and name (period separated) of the temp storage field (e.g. PERSONAL.AGE) * @param required true if this field is required input * @return the input field object */ public static InputField make(String collectionAndName, boolean required) { assert StringUtils.isNotBlank(collectionAndName); String[] parts = StringUtils.split(StringUtils.trim(collectionAndName), '.'); if (parts == null || parts.length != 2) { throw new IllegalArgumentException( "Cannot extract collection and name from input field (must follow format {COLL}.{NAME}): " + collectionAndName); } assert StringUtils.isNotBlank(parts[0]) : "InputField Collection part is blank: " + collectionAndName; assert StringUtils.isNotBlank(parts[1]) : "InputField Name part is blank: " + collectionAndName; InputField field = new InputField(); field.collection = BaseInputHandlerService.InputCollection.fromString(parts[0]); field.name = parts[1]; field.required = required; return field; } /** * @return the collection for this field (e.g. PERSONAL from PERSONAL.AGE) */ public BaseInputHandlerService.InputCollection getCollection() { return collection; } /** * @return the collection for this field (e.g. PERSONAL from PERSONAL.AGE) */ public String getName() { return name; } @Override public String toString() { return (required ? "Required" : "optional") + " input (" + collection + "." + name + ")"; } } }