List of usage examples for org.apache.commons.digester NodeCreateRule NodeCreateRule
public NodeCreateRule() throws ParserConfigurationException
From source file:eu.planets_project.pp.plato.services.characterisation.jhove.JHoveAdaptor.java
/** * Process any kind of XML String and returns a {@link JHoveTreeNode} * this seemingly absurd method stems from the fact that jhove might include ENCODED xml in a response, * so that within the xml tag we have another encoded, embedded, XML string. * For example, XMP info is embedded in that way. * (see data/testfiles-characterisation/jhove-output-with-xmpinfo.xml and sample-pdf-including-xmp.pdf) * So we create another xml root node, copy the encoded stuff in and send it to an XML extraction method. * /*from w w w .ja v a 2s .co m*/ * @param XMPasString * @param name * @return * @see #getJHoveTreeNodeFromXPathNode(Node) */ private JHoveTreeNode getJHoveTreeNodeFromXML(String XMPasString, String name) { try { //tested with digester: XML displayed as a jhoveTreeNode XMPasString = XMPasString.substring(1, XMPasString.length() - 1); Digester digester = new Digester(); digester.addRule("XMPInfo", new NodeCreateRule()); Node rootNode = (Node) digester.parse(new StringReader("<XMPInfo>" + XMPasString + "</XMPInfo>")); return getJHoveTreeNodeFromXPathNode(rootNode); } catch (Exception e) { log.warn("Error in JHove identification: " + e.getMessage(), e); return new JHoveTreeNode("Error in Processing XMP", "leaf"); } }
From source file:eu.planets_project.pp.plato.xml.ProjectImporter.java
/** * Imports the XML representation of plans from the given inputstream. * /*from w w w . ja v a 2 s . c o m*/ * @return list of read plans */ public List<Plan> importProjects(InputStream in) throws IOException, SAXException { String tempPath = OS.getTmpPath() + "import_xml" + System.currentTimeMillis() + "/"; File tempDir = new File(tempPath); tempDir.mkdirs(); try { String currentVersionFile = getCurrentVersionData(in, tempPath); if (currentVersionFile == null) { log.error("Failed to migrate plans."); return this.plans; } Digester digester = new Digester(); // digester.setValidating(true); StrictErrorHandler errorHandler = new StrictErrorHandler(); digester.setErrorHandler(errorHandler); digester.setNamespaceAware(true); // digester.setSchemaLanguage("http://www.w3.org/2001/XMLSchema"); // digester.setSchema("http://localhost:8080/plato/schema/plato-2.1.xsd"); /* * It is NOT sufficient to use setValidating(true) and digester.setSchema("data/schemas/plato.xsd")! * the following parameters have to be set and a special error handler is necessary */ try { digester.setFeature("http://xml.org/sax/features/validation", true); digester.setFeature("http://apache.org/xml/features/validation/schema", true); // digester.setFeature("http://xml.org/sax/features/namespaces", true); // digester.setFeature("http://apache.org/xml/features/validation/schema-full-checking", true); /* * And provide the relative path to the xsd-schema: */ digester.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); URL platoSchema = Thread.currentThread().getContextClassLoader() .getResource("data/schemas/plato-3.0.xsd"); URL wdtSchema = Thread.currentThread().getContextClassLoader() .getResource("data/schemas/planets_wdt-1.0.xsd"); digester.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", "http://www.planets-project.eu/plato " + platoSchema + " http://www.planets-project.eu/wdt " + wdtSchema); //http://localhost:8080/plato/schema/planets_wdt-1.0.xsd } catch (ParserConfigurationException e) { log.debug("Cannot import XML file: Configuration of parser failed.", e); throw new SAXException("Cannot import XML file: Configuration of parser failed."); } digester.push(this); // start with a new file digester.addObjectCreate("*/plan", Plan.class); digester.addSetProperties("*/plan"); digester.addSetRoot("*/plan", "setProject"); digester.addFactoryCreate("*/changelog", ChangeLogFactory.class); digester.addSetNext("*/changelog", "setChangeLog"); digester.addObjectCreate("*/plan/state", PlanState.class); digester.addSetProperties("*/plan/state"); digester.addSetNext("*/plan/state", "setState"); digester.addObjectCreate("*/plan/properties", PlanProperties.class); digester.addSetProperties("*/plan/properties"); digester.addSetNext("*/plan/properties", "setPlanProperties"); digester.addCallMethod("*/plan/properties/description", "setDescription", 0); digester.addCallMethod("*/plan/properties/owner", "setOwner", 0); addCreateUpload(digester, "*/plan/properties/report", "setReportUpload", DigitalObject.class); digester.addObjectCreate("*/plan/basis", ProjectBasis.class); digester.addSetProperties("*/plan/basis"); digester.addSetNext("*/plan/basis", "setProjectBasis"); digester.addCallMethod("*/plan/basis/applyingPolicies", "setApplyingPolicies", 0); digester.addCallMethod("*/plan/basis/designatedCommunity", "setDesignatedCommunity", 0); digester.addCallMethod("*/plan/basis/mandate", "setMandate", 0); digester.addCallMethod("*/plan/basis/documentTypes", "setDocumentTypes", 0); digester.addCallMethod("*/plan/basis/identificationCode", "setIdentificationCode", 0); digester.addCallMethod("*/plan/basis/organisationalProcedures", "setOrganisationalProcedures", 0); digester.addCallMethod("*/plan/basis/planningPurpose", "setPlanningPurpose", 0); digester.addCallMethod("*/plan/basis/planRelations", "setPlanRelations", 0); digester.addCallMethod("*/plan/basis/preservationRights", "setPreservationRights", 0); digester.addCallMethod("*/plan/basis/referenceToAgreements", "setReferenceToAgreements", 0); // define common rule for triggers, for all */triggers/...! // also used for PlanDefinition digester.addObjectCreate("*/triggers", TriggerDefinition.class); digester.addSetNext("*/triggers", "setTriggers"); // every time a */triggers/trigger is encountered: digester.addFactoryCreate("*/triggers/trigger", TriggerFactory.class); digester.addSetNext("*/triggers/trigger", "setTrigger"); // // Policy Tree // digester.addObjectCreate("*/plan/basis/policyTree", PolicyTree.class); digester.addSetProperties("*/plan/basis/policyTree"); digester.addSetNext("*/plan/basis/policyTree", "setPolicyTree"); digester.addObjectCreate("*/plan/basis/policyTree/policyNode", PolicyNode.class); digester.addSetProperties("*/plan/basis/policyTree/policyNode"); digester.addSetNext("*/plan/basis/policyTree/policyNode", "setRoot"); digester.addObjectCreate("*/policyNode/policyNode", PolicyNode.class); digester.addSetProperties("*/policyNode/policyNode"); digester.addSetNext("*/policyNode/policyNode", "addChild"); digester.addObjectCreate("*/policyNode/policy", Policy.class); digester.addSetProperties("*/policyNode/policy"); digester.addSetNext("*/policyNode/policy", "addChild"); // // Sample Records // digester.addObjectCreate("*/plan/sampleRecords", SampleRecordsDefinition.class); digester.addSetProperties("*/plan/sampleRecords"); digester.addSetNext("*/plan/sampleRecords", "setSampleRecordsDefinition"); digester.addCallMethod("*/plan/sampleRecords/samplesDescription", "setSamplesDescription", 0); // - records digester.addObjectCreate("*/record", SampleObject.class); digester.addSetProperties("*/record"); digester.addSetNext("*/record", "addRecord"); digester.addCallMethod("*/record/description", "setDescription", 0); digester.addCallMethod("*/record/originalTechnicalEnvironment", "setOriginalTechnicalEnvironment", 0); digester.addObjectCreate("*/record/data", BinaryDataWrapper.class); digester.addSetTop("*/record/data", "setData"); digester.addCallMethod("*/record/data", "setFromBase64Encoded", 0); // set up an general rule for all jhove strings! digester.addObjectCreate("*/jhoveXML", BinaryDataWrapper.class); digester.addSetTop("*/jhoveXML", "setString"); digester.addCallMethod("*/jhoveXML", "setFromBase64Encoded", 0); digester.addCallMethod("*/jhoveXML", "setMethodName", 1, new String[] { "java.lang.String" }); digester.addObjectParam("*/jhoveXML", 0, "setJhoveXMLString"); // set up general rule for all fitsXMLs digester.addObjectCreate("*/fitsXML", BinaryDataWrapper.class); digester.addSetTop("*/fitsXML", "setString"); digester.addCallMethod("*/fitsXML", "setFromBase64Encoded", 0); digester.addCallMethod("*/fitsXML", "setMethodName", 1, new String[] { "java.lang.String" }); digester.addObjectParam("*/fitsXML", 0, "setFitsXMLString"); digester.addObjectCreate("*/record/formatInfo", FormatInfo.class); digester.addSetProperties("*/record/formatInfo"); digester.addSetNext("*/record/formatInfo", "setFormatInfo"); addCreateUpload(digester, "*/record/xcdlDescription", "setXcdlDescription", XcdlDescription.class); // - collection profile digester.addObjectCreate("*/plan/sampleRecords/collectionProfile", CollectionProfile.class); digester.addSetProperties("*/plan/sampleRecords/collectionProfile"); digester.addSetNext("*/plan/sampleRecords/collectionProfile", "setCollectionProfile"); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/collectionID", "setCollectionID", 0); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/description", "setDescription", 0); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/numberOfObjects", "setNumberOfObjects", 0); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/typeOfObjects", "setTypeOfObjects", 0); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/expectedGrowthRate", "setExpectedGrowthRate", 0); digester.addCallMethod("*/plan/sampleRecords/collectionProfile/retentionPeriod", "setRetentionPeriod", 0); // requirements definition digester.addObjectCreate("*/plan/requirementsDefinition", RequirementsDefinition.class); digester.addSetProperties("*/plan/requirementsDefinition"); digester.addSetNext("*/plan/requirementsDefinition", "setRequirementsDefinition"); digester.addCallMethod("*/plan/requirementsDefinition/description", "setDescription", 0); // - uploads digester.addObjectCreate("*/plan/requirementsDefinition/uploads", ArrayList.class); digester.addSetNext("*/plan/requirementsDefinition/uploads", "setUploads"); addCreateUpload(digester, "*/plan/requirementsDefinition/uploads/upload", "add", DigitalObject.class); // alternatives digester.addObjectCreate("*/plan/alternatives", AlternativesDefinition.class); digester.addSetProperties("*/plan/alternatives"); digester.addCallMethod("*/plan/alternatives/description", "setDescription", 0); digester.addSetNext("*/plan/alternatives", "setAlternativesDefinition"); digester.addObjectCreate("*/plan/alternatives/alternative", Alternative.class); digester.addSetProperties("*/plan/alternatives/alternative"); digester.addSetNext("*/plan/alternatives/alternative", "addAlternative"); // - action digester.addObjectCreate("*/plan/alternatives/alternative/action", PreservationActionDefinition.class); digester.addSetProperties("*/plan/alternatives/alternative/action"); digester.addBeanPropertySetter("*/plan/alternatives/alternative/action/descriptor"); digester.addBeanPropertySetter("*/plan/alternatives/alternative/action/parameterInfo"); digester.addSetNext("*/plan/alternatives/alternative/action", "setAction"); digester.addCallMethod("*/plan/alternatives/alternative/description", "setDescription", 0); // - - params digester.addObjectCreate("*/plan/alternatives/alternative/action/params", LinkedList.class); digester.addSetNext("*/plan/alternatives/alternative/action/params", "setParams"); digester.addObjectCreate("*/plan/alternatives/alternative/action/params/param", Parameter.class); digester.addSetProperties("*/plan/alternatives/alternative/action/params/param"); digester.addSetNext("*/plan/alternatives/alternative/action/params/param", "add"); // - resource description digester.addObjectCreate("*/resourceDescription", ResourceDescription.class); digester.addSetProperties("*/resourceDescription"); digester.addSetNext("*/resourceDescription", "setResourceDescription"); digester.addCallMethod("*/resourceDescription/configSettings", "setConfigSettings", 0); digester.addCallMethod("*/resourceDescription/necessaryResources", "setNecessaryResources", 0); digester.addCallMethod("*/resourceDescription/reasonForConsidering", "setReasonForConsidering", 0); // - experiment digester.addObjectCreate("*/experiment", ExperimentWrapper.class); digester.addSetProperties("*/experiment"); digester.addSetNext("*/experiment", "setExperiment"); digester.addCallMethod("*/experiment/description", "setDescription", 0); digester.addCallMethod("*/experiment/settings", "setSettings", 0); addCreateUpload(digester, "*/experiment/results/result", null, DigitalObject.class); addCreateUpload(digester, "*/result/xcdlDescription", "setXcdlDescription", XcdlDescription.class); // call function addUpload of ExperimentWrapper CallMethodRule r = new CallMethodRule(1, "addResult", 2); //method with two params // every time */experiment/uploads/upload is encountered digester.addRule("*/experiment/results/result", r); // use attribute "key" as first param digester.addCallParam("*/experiment/results/result", 0, "key"); // and the object on stack (DigitalObject) as the second digester.addCallParam("*/experiment/results/result", 1, true); // addCreateUpload(digester, "*/experiment/xcdlDescriptions/xcdlDescription", null, XcdlDescription.class); // // call function addXcdlDescription of ExperimentWrapper // r = new CallMethodRule(1, "addXcdlDescription", 2); //method with two params // // every time */experiment/xcdlDescriptions/xcdlDescription is encountered // digester.addRule("*/experiment/xcdlDescriptions/xcdlDescription", r); // // use attribute "key" as first param // digester.addCallParam("*/experiment/xcdlDescriptions/xcdlDescription", 0 , "key"); // // and the object on stack (DigitalObject) as the second // digester.addCallParam("*/experiment/xcdlDescriptions/xcdlDescription",1,true); digester.addObjectCreate("*/experiment/detailedInfos/detailedInfo", DetailedExperimentInfo.class); digester.addSetProperties("*/experiment/detailedInfos/detailedInfo"); digester.addBeanPropertySetter("*/experiment/detailedInfos/detailedInfo/programOutput"); digester.addBeanPropertySetter("*/experiment/detailedInfos/detailedInfo/cpr"); // call function "addDetailedInfo" of ExperimentWrapper r = new CallMethodRule(1, "addDetailedInfo", 2); //method with two params // every time */experiment/detailedInfos/detailedInfo is encountered digester.addRule("*/experiment/detailedInfos/detailedInfo", r); // use attribute "key" as first param digester.addCallParam("*/experiment/detailedInfos/detailedInfo", 0, "key"); // and the object on stack as second parameter digester.addCallParam("*/experiment/detailedInfos/detailedInfo", 1, true); // read contained measurements: digester.addObjectCreate("*/detailedInfo/measurements/measurement", Measurement.class); digester.addSetNext("*/detailedInfo/measurements/measurement", "put"); // values are defined with wild-cards, and therefore set automatically digester.addObjectCreate("*/measurement/property", MeasurableProperty.class); digester.addSetProperties("*/measurement/property"); digester.addSetNext("*/measurement/property", "setProperty"); // scales are defined with wild-cards, and therefore set automatically /* * for each value type a set of rules * because of FreeStringValue we need to store the value as XML-element * instead of an attribute * naming them "ResultValues" wasn't nice too */ addCreateValue(digester, BooleanValue.class, "setValue"); addCreateValue(digester, FloatRangeValue.class, "setValue"); addCreateValue(digester, IntegerValue.class, "setValue"); addCreateValue(digester, IntRangeValue.class, "setValue"); addCreateValue(digester, OrdinalValue.class, "setValue"); addCreateValue(digester, PositiveFloatValue.class, "setValue"); addCreateValue(digester, PositiveIntegerValue.class, "setValue"); addCreateValue(digester, YanValue.class, "setValue"); addCreateValue(digester, FreeStringValue.class, "setValue"); // go no go decision digester.addObjectCreate("*/plan/decision", Decision.class); digester.addSetProperties("*/plan/decision"); digester.addSetNext("*/plan/decision", "setDecision"); digester.addCallMethod("*/plan/decision/actionNeeded", "setActionNeeded", 0); digester.addCallMethod("*/plan/decision/reason", "setReason", 0); digester.addFactoryCreate("*/plan/decision/goDecision", GoDecisionFactory.class); digester.addSetNext("*/plan/decision/goDecision", "setDecision"); // evaluation digester.addObjectCreate("*/plan/evaluation", Evaluation.class); digester.addSetProperties("*/plan/evaluation"); digester.addSetNext("*/plan/evaluation", "setEvaluation"); digester.addCallMethod("*/plan/evaluation/comment", "setComment", 0); // importance weighting digester.addObjectCreate("*/plan/importanceWeighting", ImportanceWeighting.class); digester.addSetProperties("*/plan/importanceWeighting"); digester.addSetNext("*/plan/importanceWeighting", "setImportanceWeighting"); digester.addCallMethod("*/plan/importanceWeighting/comment", "setComment", 0); // recommendation digester.addObjectCreate("*/plan/recommendation", RecommendationWrapper.class); digester.addSetProperties("*/plan/recommendation"); digester.addSetNext("*/plan/recommendation", "setRecommendation"); digester.addCallMethod("*/plan/recommendation/reasoning", "setReasoning", 0); digester.addCallMethod("*/plan/recommendation/effects", "setEffects", 0); // transformation digester.addObjectCreate("*/plan/transformation", Transformation.class); digester.addSetProperties("*/plan/transformation"); digester.addSetNext("*/plan/transformation", "setTransformation"); digester.addCallMethod("*/plan/transformation/comment", "setComment", 0); // Tree /* Some rules for tree parsing are necessary for importing templates too, * that's why they are added by this static method. */ ProjectImporter.addTreeParsingRulesToDigester(digester); digester.addObjectCreate("*/leaf/evaluation", HashMap.class); digester.addSetNext("*/leaf/evaluation", "setValueMap"); /* * The valueMap has an entry for each (considered) alternative ... * and for each alternative there is a list of values, one per SampleObject. * Note: The digester uses a stack, therefore the rule to put the list of values to the valueMap * must be added after the rule for adding the values to the list. */ /* * 2. and for each alternative there is a list of values, one per SampleObject */ digester.addObjectCreate("*/leaf/evaluation/alternative", Values.class); digester.addCallMethod("*/leaf/evaluation/alternative/comment", "setComment", 0); /* * for each result-type a set of rules * they are added to the valueMap by the rules above */ addCreateResultValue(digester, BooleanValue.class); addCreateResultValue(digester, FloatValue.class); addCreateResultValue(digester, FloatRangeValue.class); addCreateResultValue(digester, IntegerValue.class); addCreateResultValue(digester, IntRangeValue.class); addCreateResultValue(digester, OrdinalValue.class); addCreateResultValue(digester, PositiveFloatValue.class); addCreateResultValue(digester, PositiveIntegerValue.class); addCreateResultValue(digester, YanValue.class); addCreateResultValue(digester, FreeStringValue.class); /* * 1. The valueMap has an entry for each (considered) alternative ... */ // call put of the ValueMap (HashMap) r = new CallMethodRule(1, "put", 2); digester.addRule("*/leaf/evaluation/alternative", r); digester.addCallParam("*/leaf/evaluation/alternative", 0, "key"); digester.addCallParam("*/leaf/evaluation/alternative", 1, true); // digester.addObjectCreate("*/plan/executablePlan/planWorkflow", ExecutablePlanContentWrapper.class); // digester.addSetProperties("*/plan/executablePlan/planWorkflow"); // digester.addSetNext("*/plan/executablePlan/planWorkflow", "setRecommendation"); // Executable plan definition digester.addObjectCreate("*/plan/executablePlan", ExecutablePlanDefinition.class); digester.addSetProperties("*/plan/executablePlan"); digester.addSetNext("*/plan/executablePlan", "setExecutablePlanDefinition"); // // Import Planets executable plan if present // try { // object-create rules are called at the beginning element-tags, in the same order as defined // first create the wrapper digester.addObjectCreate("*/plan/executablePlan/planWorkflow", NodeContentWrapper.class); // then an element for workflowConf digester.addRule("*/plan/executablePlan/planWorkflow/workflowConf", new NodeCreateRule()); // CallMethod and SetNext rules are called at closing element-tags, (last in - first out!) CallMethodRule rr = new CallMethodRule(1, "setNodeContent", 2); digester.addRule("*/plan/executablePlan/planWorkflow/workflowConf", rr); // right below the wrapper is an instance of ExecutablePlanDefinition digester.addCallParam("*/plan/executablePlan/planWorkflow/workflowConf", 0, 1); // provide the name of the setter method digester.addObjectParam("*/plan/executablePlan/planWorkflow/workflowConf", 1, "setExecutablePlan"); // the generated node is not accessible as CallParam (why?!?), but available for addSetNext digester.addSetNext("*/plan/executablePlan/planWorkflow/workflowConf", "setNode"); } catch (ParserConfigurationException e) { PlatoLogger.getLogger(this.getClass()).error(e.getMessage(), e); } // // Import EPrints executable plan if present // try { digester.addObjectCreate("*/plan/executablePlan/eprintsPlan", NodeContentWrapper.class); // then an element for workflowConf digester.addRule("*/plan/executablePlan/eprintsPlan", new NodeCreateRule()); CallMethodRule rr2 = new CallMethodRule(1, "setNodeContentEPrintsPlan", 2); digester.addRule("*/plan/executablePlan/eprintsPlan", rr2); // right below the wrapper is an instance of ExecutablePlanDefinition digester.addCallParam("*/plan/executablePlan/eprintsPlan", 0, 1); // provide the name of the setter method digester.addObjectParam("*/plan/executablePlan/eprintsPlan", 1, "setEprintsExecutablePlan"); digester.addSetNext("*/plan/executablePlan/eprintsPlan", "setNode"); } catch (ParserConfigurationException e) { PlatoLogger.getLogger(this.getClass()).error(e.getMessage(), e); } digester.addCallMethod("*/plan/executablePlan/objectPath", "setObjectPath", 0); digester.addCallMethod("*/plan/executablePlan/toolParameters", "setToolParameters", 0); digester.addCallMethod("*/plan/executablePlan/triggersConditions", "setTriggersConditions", 0); digester.addCallMethod("*/plan/executablePlan/validateQA", "setValidateQA", 0); // Plan definition digester.addObjectCreate("*/plan/planDefinition", PlanDefinition.class); digester.addSetProperties("*/plan/planDefinition"); digester.addSetNext("*/plan/planDefinition", "setPlanDefinition"); digester.addCallMethod("*/plan/planDefinition/costsIG", "setCostsIG", 0); digester.addCallMethod("*/plan/planDefinition/costsPA", "setCostsPA", 0); digester.addCallMethod("*/plan/planDefinition/costsPE", "setCostsPE", 0); digester.addCallMethod("*/plan/planDefinition/costsQA", "setCostsQA", 0); digester.addCallMethod("*/plan/planDefinition/costsREI", "setCostsREI", 0); digester.addCallMethod("*/plan/planDefinition/costsRemarks", "setCostsRemarks", 0); digester.addCallMethod("*/plan/planDefinition/costsRM", "setCostsRM", 0); digester.addCallMethod("*/plan/planDefinition/costsTCO", "setCostsTCO", 0); digester.addCallMethod("*/plan/planDefinition/responsibleExecution", "setResponsibleExecution", 0); digester.addCallMethod("*/plan/planDefinition/responsibleMonitoring", "setResponsibleMonitoring", 0); digester.addObjectCreate("*/plan/planDefinition/triggers", TriggerDefinition.class); digester.addSetNext("*/plan/planDefinition/triggers", "setTriggers"); // every time a */plan/basis/triggers/trigger is encountered: digester.addFactoryCreate("*/plan/planDefinition/triggers/trigger", TriggerFactory.class); digester.addSetNext("*/plan/planDefinition/triggers/trigger", "setTrigger"); digester.setUseContextClassLoader(true); this.plans = new ArrayList<Plan>(); // finally parse the XML representation with all created rules digester.parse(new FileInputStream(currentVersionFile)); for (Plan plan : plans) { String projectName = plan.getPlanProperties().getName(); if ((projectName != null) && (!"".equals(projectName))) { /* * establish links from values to scales */ plan.getTree().initValues(plan.getAlternativesDefinition().getConsideredAlternatives(), plan.getSampleRecordsDefinition().getRecords().size(), true); /* * establish references of Experiment.uploads */ HashMap<String, SampleObject> records = new HashMap<String, SampleObject>(); for (SampleObject record : plan.getSampleRecordsDefinition().getRecords()) { records.put(record.getShortName(), record); } for (Alternative alt : plan.getAlternativesDefinition().getAlternatives()) { if ((alt.getExperiment() != null) && (alt.getExperiment() instanceof ExperimentWrapper)) { alt.setExperiment(((ExperimentWrapper) alt.getExperiment()).getExperiment(records)); } } // DESCRIBE all DigitalObjects with Jhove. for (SampleObject record : plan.getSampleRecordsDefinition().getRecords()) { if (record.isDataExistent()) { // characterise try { record.setJhoveXMLString(new JHoveAdaptor().describe(record)); } catch (Throwable e) { log.error("Error running Jhove for record " + record.getShortName() + ". " + e.getMessage(), e); } for (Alternative alt : plan.getAlternativesDefinition().getAlternatives()) { DigitalObject result = alt.getExperiment().getResults().get(record); if (result != null && result.isDataExistent()) { try { result.setJhoveXMLString(new JHoveAdaptor().describe(result)); } catch (Throwable e) { log.error( "Error running Jhove for record " + record.getShortName() + ", alternative " + alt.getName() + ". " + e.getMessage(), e); } } } } } // CHECK NUMERIC TRANSFORMER THRESHOLDS for (Leaf l : plan.getTree().getRoot().getAllLeaves()) { eu.planets_project.pp.plato.model.transform.Transformer t = l.getTransformer(); if (t != null && t instanceof NumericTransformer) { NumericTransformer nt = (NumericTransformer) t; if (!nt.checkOrder()) { StringBuffer sb = new StringBuffer("NUMERICTRANSFORMER THRESHOLD ERROR "); sb.append(l.getName()).append("::NUMERICTRANSFORMER:: "); sb.append(nt.getThreshold1()).append(" ").append(nt.getThreshold2()).append(" ") .append(nt.getThreshold3()).append(" ").append(nt.getThreshold4()) .append(" ").append(nt.getThreshold5()); log.error(sb.toString()); } } } /* * establish references to selected alternative */ HashMap<String, Alternative> alternatives = new HashMap<String, Alternative>(); for (Alternative alt : plan.getAlternativesDefinition().getAlternatives()) { alternatives.put(alt.getName(), alt); } if ((plan.getRecommendation() != null) && (plan.getRecommendation() instanceof RecommendationWrapper)) { plan.setRecommendation( ((RecommendationWrapper) plan.getRecommendation()).getRecommendation(alternatives)); } if ((plan.getState().getValue() == PlanState.ANALYSED) && ((plan.getRecommendation() == null) || (plan.getRecommendation().getAlternative() == null))) { /* * This project is NOT completely analysed */ plan.getState().setValue(PlanState.ANALYSED - 1); } } else { throw new SAXException("Could not find any project data."); } } } finally { OS.deleteDirectory(tempDir); /* * Importing big plans results in an increasing memory consumption * strange: The rise of memory consumption occurs when persisting the loaded project * NOT during parsing with the digester */ System.gc(); } return this.plans; }
From source file:org.gbif.occurrence.OccurrenceParser.java
/** * This parses a stream of uncompressed ABCD or DwC Occurrences into {@link RawXmlOccurrence}s. * * No care is taken to handle wrong encodings or character sets in general. This might be changed later on. * * @param is stream to parse//w ww.j a va 2 s.c om * @return list of parsed occurrences * @throws ParsingException if there were any problems during parsing the stream */ // TODO: Optionally handle compressed streams public List<RawXmlOccurrence> parseStream(InputStream is) throws ParsingException { checkNotNull(is, "is can't be null"); try { ParsedSearchResponse responseBody = new ParsedSearchResponse(); InputSource inputSource = new InputSource(is); Digester digester = new Digester(); digester.setNamespaceAware(true); digester.setValidating(false); digester.push(responseBody); NodeCreateRule rawAbcd = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.ABCD_RECORD_XPATH, rawAbcd); digester.addSetNext(ExtractionSimpleXPaths.ABCD_RECORD_XPATH, "addRecordAsXml"); NodeCreateRule rawAbcd1Header = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.ABCD_HEADER_XPATH, rawAbcd1Header); digester.addSetNext(ExtractionSimpleXPaths.ABCD_HEADER_XPATH, "setAbcd1Header"); NodeCreateRule rawDwc1_0 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_1_0_RECORD_XPATH, rawDwc1_0); digester.addSetNext(ExtractionSimpleXPaths.DWC_1_0_RECORD_XPATH, "addRecordAsXml"); NodeCreateRule rawDwc1_4 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_1_4_RECORD_XPATH, rawDwc1_4); digester.addSetNext(ExtractionSimpleXPaths.DWC_1_4_RECORD_XPATH, "addRecordAsXml"); // NodeCreateRule rawDwcManis = new NodeCreateRule(); // digester.addRule(ExtractionSimpleXPaths.DWC_MANIS_RECORD_XPATH, rawDwcManis); // digester.addSetNext(ExtractionSimpleXPaths.DWC_MANIS_RECORD_XPATH, "addRecordAsXml"); NodeCreateRule rawDwc2009 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_2009_RECORD_XPATH, rawDwc2009); digester.addSetNext(ExtractionSimpleXPaths.DWC_2009_RECORD_XPATH, "addRecordAsXml"); digester.parse(inputSource); return responseBody.getRecords(); } catch (ParserConfigurationException e) { throw new ServiceUnavailableException("Error setting up Commons Digester", e); } catch (SAXException e) { throw new ParsingException("Parsing failed", e); } catch (IOException e) { throw new ParsingException("Parsing failed", e); } catch (TransformerException e) { throw new ServiceUnavailableException("Error setting up Commons Digester", e); } }
From source file:org.gbif.occurrence.OccurrenceParser.java
/** * Parses a single response gzipFile and returns a List of the contained RawXmlOccurrences. *//*from w ww . j a v a 2s. c om*/ public List<RawXmlOccurrence> parseResponseFileToRawXml(File gzipFile) { if (LOG.isDebugEnabled()) LOG.debug(">> parseResponseFileToRawXml [{}]", gzipFile.getAbsolutePath()); ParsedSearchResponse responseBody = null; InputStreamReader inputStreamReader = null; BufferedReader bufferedReader = null; try { responseBody = new ParsedSearchResponse(); FileInputStream fis = new FileInputStream(gzipFile); GZIPInputStream inputStream = new GZIPInputStream(fis); // charsets are a nightmare and users can't be trusted, so strategy // is try these encodings in order until one of them (hopefully) works // (note the last two could be repeats of the first two): // - utf-8 // - latin1 (iso-8859-1) // - the declared encoding from the parsing itself // - a guess at detecting the charset from the raw gzipFile bytes List<String> charsets = new ArrayList<String>(); charsets.add("UTF-8"); charsets.add("ISO-8859-1"); // read parsing declaration inputStreamReader = new InputStreamReader(inputStream); bufferedReader = new BufferedReader(inputStreamReader); boolean gotEncoding = false; String encoding = ""; int lineCount = 0; while (bufferedReader.ready() && !gotEncoding && lineCount < 5) { String line = bufferedReader.readLine(); lineCount++; if (line != null && line.contains("encoding=")) { encoding = line.split("encoding=")[1]; // drop trailing ?> encoding = encoding.substring(0, encoding.length() - 2); // drop quotes encoding = encoding.replaceAll("\"", "").replaceAll("'", "").trim(); LOG.debug("Found encoding [{}] in parsing declaration", encoding); try { Charset.forName(encoding); charsets.add(encoding); } catch (Exception e) { LOG.debug( "Could not find supported charset matching detected encoding of [{}] - trying other guesses instead", encoding); } gotEncoding = true; } } // attempt detection from bytes Charset charset = CharsetDetection.detectEncoding(gzipFile); charsets.add(charset.name()); String goodCharset = null; boolean encodingError = false; for (String charsetName : charsets) { LOG.debug("Trying charset [{}]", charsetName); try { // reset streams fis = new FileInputStream(gzipFile); inputStream = new GZIPInputStream(fis); BufferedReader inputReader = new BufferedReader( new XmlSanitizingReader(new InputStreamReader(inputStream, charsetName))); InputSource inputSource = new InputSource(inputReader); Digester digester = new Digester(); digester.setNamespaceAware(true); digester.setValidating(false); digester.push(responseBody); NodeCreateRule rawAbcd = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.ABCD_RECORD_XPATH, rawAbcd); digester.addSetNext(ExtractionSimpleXPaths.ABCD_RECORD_XPATH, "addRecordAsXml"); NodeCreateRule rawAbcd1Header = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.ABCD_HEADER_XPATH, rawAbcd1Header); digester.addSetNext(ExtractionSimpleXPaths.ABCD_HEADER_XPATH, "setAbcd1Header"); NodeCreateRule rawDwc1_0 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_1_0_RECORD_XPATH, rawDwc1_0); digester.addSetNext(ExtractionSimpleXPaths.DWC_1_0_RECORD_XPATH, "addRecordAsXml"); NodeCreateRule rawDwc1_4 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_1_4_RECORD_XPATH, rawDwc1_4); digester.addSetNext(ExtractionSimpleXPaths.DWC_1_4_RECORD_XPATH, "addRecordAsXml"); // TODO: dwc_manis appears to work without a NodeCreateRule here - why? NodeCreateRule rawDwc2009 = new NodeCreateRule(); digester.addRule(ExtractionSimpleXPaths.DWC_2009_RECORD_XPATH, rawDwc2009); digester.addSetNext(ExtractionSimpleXPaths.DWC_2009_RECORD_XPATH, "addRecordAsXml"); digester.parse(inputSource); LOG.debug("Success with charset [{}] - skipping any others", charsetName); goodCharset = charsetName; break; } catch (SAXException e) { String msg = "SAX exception when parsing parsing from response gzipFile [" + gzipFile.getAbsolutePath() + "] using encoding [" + charsetName + "] - trying another charset"; LOG.debug(msg, e); } catch (IOException e) { if (e instanceof MalformedByteSequenceException) { LOG.debug("Malformed utf-8 byte when parsing with encoding [{}] - trying another charset", charsetName); encodingError = true; } } } if (goodCharset == null) { if (encodingError) { LOG.warn( "Could not parse gzipFile - none of the encoding attempts worked (failed with malformed utf8) - skipping gzipFile [{}]", gzipFile.getAbsolutePath()); } else { LOG.warn("Could not parse gzipFile (malformed parsing) - skipping gzipFile [{}]", gzipFile.getAbsolutePath()); } } } catch (FileNotFoundException e) { LOG.warn("Could not find response gzipFile [{}] - skipping gzipFile", gzipFile.getAbsolutePath(), e); } catch (IOException e) { LOG.warn("Could not read response gzipFile [{}] - skipping gzipFile", gzipFile.getAbsolutePath(), e); } catch (TransformerException e) { LOG.warn("Could not create parsing transformer for [{}] - skipping gzipFile", gzipFile.getAbsolutePath(), e); } catch (ParserConfigurationException e) { LOG.warn("Failed to pull raw parsing from response gzipFile [{}] - skipping gzipFile", gzipFile.getAbsolutePath(), e); } finally { try { if (bufferedReader != null) bufferedReader.close(); if (inputStreamReader != null) inputStreamReader.close(); } catch (IOException e) { LOG.debug("Failed to close input files", e); } } if (LOG.isDebugEnabled()) LOG.debug("<< parseResponseFileToRawXml [{}]", gzipFile.getAbsolutePath()); return (responseBody == null) ? null : responseBody.getRecords(); }