List of usage examples for javax.xml.xpath XPath evaluate
public Object evaluate(String expression, InputSource source, QName returnType) throws XPathExpressionException;
From source file:edu.uams.clara.webapp.xml.processor.impl.DefaultXmlProcessorImpl.java
@Override public synchronized Map<String, Object> deleteElementByPathById(String path, final String originalXml, String elementId) throws SAXException, IOException, XPathExpressionException { Assert.hasText(path);//from w w w . j a va2 s .com Assert.hasText(originalXml); Assert.hasText(elementId); Document originalDom = parse(originalXml); Document finalDom = originalDom; String xPathString = path + "[@id='" + elementId + "']"; XPath xPath = getXPathInstance(); // find all the nodes specified by xPathString in the finalDom, and // delete them all NodeList existingNodeList = (NodeList) (xPath.evaluate(xPathString, finalDom, XPathConstants.NODESET)); int el = existingNodeList.getLength(); logger.trace("find '" + el + "' in originalDom using xPath: " + xPathString); for (int i = 0; i < el; i++) { Node c = existingNodeList.item(i); Node cp = c.getParentNode(); // remove this node from its parent... cp.removeChild(c); logger.trace("node has child : " + cp.getChildNodes().getLength() + ":" + cp.hasChildNodes()); } logger.trace(DomUtils.elementToString(finalDom)); Map<String, Object> resultMap = new HashMap<String, Object>(3); resultMap.put("finalXml", DomUtils.elementToString(finalDom)); resultMap.put("isDeleted", true); return resultMap; }
From source file:gov.nih.nci.ncicb.tcga.dcc.qclive.common.action.validation.ClinicalXmlValidator.java
private Boolean checkAllBarcodesAndUuids(final File xmlFile, final QcContext context, final XPath xpath, final Document document) throws XPathExpressionException, ProcessorException { boolean valid = true; final List<ClinicalTable> clinicalTables = clinicalLoaderQueries.getAllClinicalTables(); final Map<String, String> barcodeToUuid = new HashMap<String, String>(); final Map<String, String> uuidToBarcode = new HashMap<String, String>(); // for every clinical table... for (final ClinicalTable clinicalTable : clinicalTables) { // if it stores barcodes and uuids... if (clinicalTable.getUuidElementName() != null && clinicalTable.getBarcodeElementName() != null) { // get all barcode/uuid pairs for the element representing that table final NodeList uuidNodes = (NodeList) xpath.evaluate( "//" + clinicalTable.getElementNodeName() + "/" + clinicalTable.getUuidElementName(), document, XPathConstants.NODESET); final NodeList barcodeNodes = (NodeList) xpath.evaluate( "//" + clinicalTable.getElementNodeName() + "/" + clinicalTable.getBarcodeElementName(), document, XPathConstants.NODESET); // pull out the barcode/uuid values... for (int i = 0; i < barcodeNodes.getLength(); i++) { final Node barcodeNode = barcodeNodes.item(i); final String barcode = barcodeNode.getTextContent().trim(); String uuid = null; final Node uuidNode = uuidNodes.item(i); // if there is no uuid node, then it will be null, API doesn't // throw exception if index is invalid. weird! if (uuidNode != null) { uuid = uuidNode.getTextContent().trim(); }// w w w . j a v a2s . c o m if ((uuid == null || uuid.length() == 0) && areUuidsRequired()) { context.addError("No uuid given for " + clinicalTable.getElementNodeName() + " " + barcode + " but UUIDs are required"); valid = false; } String barcodeType = clinicalTable.getElementNodeName(); barcodeType = convertNodeNameToBarcodeType(barcodeType); boolean ignoreBarcodeValidation = false; // for legacy slide barcodes, if it already exists in the database do not validate the barcode if (CommonBarcodeAndUUIDValidatorImpl.SLIDE_ITEM_TYPE_NAME.equals(barcodeType)) { ignoreBarcodeValidation = bcridProcessor.slideBarcodeExists(barcode); } if (!ignoreBarcodeValidation && !qcLiveBarcodeAndUUIDValidator.validateAnyBarcode(barcode, context, xmlFile.getName(), false, barcodeType)) { valid = false; } else { valid = checkForUuidConflicts(barcode, uuid, clinicalTable.getElementNodeName(), barcodeToUuid, uuidToBarcode, context) && valid; // only do barcode-tumor validation if the barcode itself is valid -- otherwise we know for sure we won't find it // also, only verify aliquot barcodes for now, since the code doesn't support other types if (clinicalTable.getElementNodeName().equals("aliquot") && barcodeTumorValidator != null && !barcodeTumorValidator.barcodeIsValidForTumor(barcode, context.getArchive().getTumorType())) { context.addError("Barcode '" + barcode + "' in file '" + xmlFile.getName() + "' is not part of disease set for " + context.getArchive().getTumorType()); valid = false; } } } } } return valid; }
From source file:org.ala.harvester.PpmlHarvester.java
/** * Process a single image, do the document mapping etc * /*w w w . j a va 2 s . co m*/ * @param infosourceId * @param imageIndex * @param currentResDom * @throws Exception */ private void processSingleImage(int infosourceId, int imageIndex, Document currentResDom) throws Exception { XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); ParsedDocument pd = new ParsedDocument(); String subject = MappingUtils.getSubject(); String xPathToIdentifier = "//root/item[" + imageIndex + "]/url/text()"; String xPathToScientificName = "//root/item[" + imageIndex + "]/scientificName/text()"; String xPathToCommonName = "//root/item[" + imageIndex + "]/commonName/text()"; String xPathToPhylum = "//root/item[" + imageIndex + "]/phylum/text()"; String xPathToOrder = "//root/item[" + imageIndex + "]/order/text()"; String xPathToGenus = "//root/item[" + imageIndex + "]/genus/text()"; String xPathToSpecies = "//root/item[" + imageIndex + "]/species/text()"; // String xPathToImageUrl = "/response/object[" + imageIndex + "]/thumbUrl/text()"; // String xPathToLicense = "/response/object[" + imageIndex + "]/copyrightText/text()"; // String xPathToSpecificEpithet = "/response/object[" + imageIndex + "]/SpecificEpithet/text()"; // String xPathToCountry = "/response/object[" + imageIndex + "]/Country/text()"; // String xPathToLocality = "/response/object[" + imageIndex + "]/Locality/text()"; String identifier = null; String scientificName = null; String phylum = null; String order = null; String genus = null; String commonName = null; String species = null; try { identifier = (String) xpath.evaluate(xPathToIdentifier, currentResDom, XPathConstants.STRING); scientificName = (String) xpath.evaluate(xPathToScientificName, currentResDom, XPathConstants.STRING); phylum = (String) xpath.evaluate(xPathToPhylum, currentResDom, XPathConstants.STRING); order = (String) xpath.evaluate(xPathToOrder, currentResDom, XPathConstants.STRING); genus = (String) xpath.evaluate(xPathToGenus, currentResDom, XPathConstants.STRING); commonName = (String) xpath.evaluate(xPathToCommonName, currentResDom, XPathConstants.STRING); species = (String) xpath.evaluate(xPathToSpecies, currentResDom, XPathConstants.STRING); } catch (XPathExpressionException getPageFragmentationError) { String errMsg = "Failed to obtain Detail Page Url"; logger.error(errMsg); throw new Exception(errMsg, getPageFragmentationError); } // System.out.println("Index: " + imageIndex); System.out.println(imageIndex + ", PHOTO URL:" + identifier); List<Triple<String, String, String>> triples = pd.getTriples(); Map<String, String> dcs = pd.getDublinCore(); pd.setGuid(identifier); pd.setContent(getContent(identifier)); pd.setContentType(contentType); dcs.put(Predicates.DC_TITLE.toString(), scientificName); dcs.put(Predicates.DC_IDENTIFIER.toString(), identifier); // dcs.put(Predicates.DC_LICENSE.toString(), "Creative Commons Attribution-Non Commercial 3.0 Australia License, http://creativecommons.org/licenses/by-nc/3.0/au/deed.en"); // dcs.put(Predicates.DC_CREATOR.toString(), license); dcs.put(Predicates.COUNTRY.toString(), "Australia"); triples.add(new Triple(subject, Predicates.SCIENTIFIC_NAME.toString(), scientificName)); triples.add(new Triple(subject, Predicates.PHYLUM.toString(), phylum)); triples.add(new Triple(subject, Predicates.ORDER.toString(), order)); triples.add(new Triple(subject, Predicates.GENUS.toString(), genus)); triples.add(new Triple(subject, Predicates.SPECIES.toString(), species)); triples.add(new Triple(subject, Predicates.COMMON_NAME.toString(), commonName)); if (pd != null) { this.repository.storeDocument(infosourceId, pd); debugParsedDoc(pd); } String xpathToImageCount = "count(//root/item[" + imageIndex + "]/medium/media)"; int imageCount = getCount(currentResDom, xpathToImageCount); System.out.println("item: " + imageIndex + ", counts: " + imageCount); for (int imgCounter = 1; imgCounter <= imageCount; imgCounter++) { String xPathToImageUrl = "//root/item[" + imageIndex + "]/medium/media[" + imgCounter + "]/filename/text()"; String xPathToImageLicense = "//root/item[" + imageIndex + "]/medium/media[" + imgCounter + "]/rights/text()"; String xPathToImageCreator = "//root/item[" + imageIndex + "]/medium/media[" + imgCounter + "]/acknowledgement/text()"; String xPathToImageType = "//root/item[" + imageIndex + "]/medium/media[" + imgCounter + "]/type/text()"; String imageUrl = null; String license = null; String creator = null; String type = null; try { imageUrl = (String) xpath.evaluate(xPathToImageUrl, currentResDom, XPathConstants.STRING); license = (String) xpath.evaluate(xPathToImageLicense, currentResDom, XPathConstants.STRING); creator = (String) xpath.evaluate(xPathToImageCreator, currentResDom, XPathConstants.STRING); type = (String) xpath.evaluate(xPathToImageType, currentResDom, XPathConstants.STRING); } catch (XPathExpressionException getPageFragmentationError) { String errMsg = "Failed to obtain Image details"; logger.error(errMsg); throw new Exception(errMsg, getPageFragmentationError); } if (license != null && license.contains("CC BY") && imageUrl != null && !"".equals(imageUrl) && (type != null && (!type.contains("Thumb") && !type.contains("List")))) { ParsedDocument imageDoc = new ParsedDocument(); System.out.println(type); imageDoc = MappingUtils.retrieveImageDocument(pd, imageUrl); if (imageDoc != null) { imageDoc.getDublinCore().put(Predicates.DC_LICENSE.toString(), license); if (creator != null && !"".equals(creator)) { imageDoc.getDublinCore().put(Predicates.DC_CREATOR.toString(), creator); imageDoc.getDublinCore().put(Predicates.DC_RIGHTS.toString(), creator); debugParsedDoc(imageDoc); this.repository.storeDocument(infosourceId, imageDoc); } } } } }
From source file:edu.uams.clara.webapp.xml.processor.impl.DefaultXmlProcessorImpl.java
/** * If the xpath identifies multiple elements, it will only add to the first * element, if there is no such parent element, it will just add it... *///from w w w. j ava2 s . c o m @Override public synchronized Map<String, Object> addSubElementToElementIdentifiedByXPath(final String parentElementXPath, final String originalXml, final String elementXml, boolean generateId) throws SAXException, IOException, XPathExpressionException { Assert.hasText(parentElementXPath); Assert.hasText(originalXml); Assert.hasText(elementXml); Document originalDom = parse(originalXml); Document finalDom = originalDom; Document elementDom = parse(elementXml); Element elementRoot = (Element) elementDom.getFirstChild(); XPath xPath = getXPathInstance(); // find all the nodes specified by xPathString in the finalDom, and // delete them all NodeList existingNodeList = (NodeList) (xPath.evaluate(parentElementXPath, finalDom, XPathConstants.NODESET)); int el = existingNodeList.getLength(); String id = ""; Element currentNode = finalDom.getDocumentElement(); if (el == 0) { // doesn't exist, create the parent... List<String> nodeList = getNodeList(parentElementXPath); // remove first one, should be protocol nodeList.remove(0); int c = 0; for (String n : nodeList) { NodeList cur = currentNode.getElementsByTagName(n); String curName = currentNode.getNodeName(); c = cur.getLength(); if (c > 1) { throw new RuntimeException("illeagl xml structure; find " + c + " elements with name " + n); } if (c == 0) { logger.debug("empty node...; " + n + " doesn't exist under " + curName); Element newN = finalDom.createElement(n); currentNode.appendChild(newN); currentNode = newN; continue; } currentNode = (Element) cur.item(0); } } else if (el > 0) { currentNode = (Element) existingNodeList.item(0); // only the first // one } if (generateId) { // using jdk UUID as uuid generator... id = UUID.randomUUID().toString(); elementRoot.setAttribute("id", id); } currentNode.appendChild(finalDom.importNode(elementRoot, true)); /* * for(int i = 0; i < el; i++){ Node c = existingNodeList.item(i); * * if (generateId) { // using jdk UUID as uuid generator... String id = * UUID.randomUUID().toString(); * * elementRoot.setAttribute("id", id); } * * c.appendChild(finalDom.importNode(elementRoot, true)); * * } */ logger.trace(DomUtils.elementToString(finalDom)); Map<String, Object> resultMap = new HashMap<String, Object>(3); resultMap.put("finalXml", DomUtils.elementToString(finalDom)); resultMap.put("elementXml", DomUtils.elementToString(elementDom)); resultMap.put("elementId", id); return resultMap; }
From source file:edu.uams.clara.webapp.xml.processor.impl.DefaultXmlProcessorImpl.java
/** * replace elements in originalDom with modifiedDom according to listed * xPaths, if the originalDom has elements not listed in the xPath, it will * be kept untouched. in the HashMap<String, String> xPathPairs, the key is * the path in the source xml, and the value is the xpath for the final * note*: the if the xpath has attributes, it's not going to work... need to * do a custom implementation when that use case happened... * //from ww w . j a v a 2 s . c o m * @param originalDom * @param modifiedDom * @param xPaths * @return * @throws XPathExpressionException */ private Document replaceByXPaths(final Document originalDom, final Document modifiedDom, Map<String, String> xPathPairs) throws XPathExpressionException { Document finalDom = originalDom; Element finalDomRoot = (Element) finalDom.getFirstChild(); Element lastChild = null; for (Entry<String, String> xPathPair : xPathPairs.entrySet()) { /** * basically, this is to copy the element specified in srcXPath, and * replace/add it to the position pointed by destXPath... */ String srcXPath = xPathPair.getKey(); logger.debug("srcXPath: " + srcXPath); String destXPath = xPathPair.getValue(); logger.debug("destXPath: " + destXPath); XPath xPath = getXPathInstance(); // find all the nodes specified by destXPath in the originalDom, and // delete them all NodeList existingNodeList = (NodeList) (xPath.evaluate(destXPath, finalDom, XPathConstants.NODESET)); int el = existingNodeList.getLength(); logger.debug("find '" + el + "' in originalDom using xPath: " + destXPath); for (int i = 0; i < el; i++) { Node c = existingNodeList.item(i); // remove this node from its parent... c.getParentNode().removeChild(c); } // create the node structure first. and return the last child of the // path... the right most node... lastChild = createElementStructureByPath(finalDomRoot, destXPath); List<String> nodeNameList = getNodeList(destXPath); String lastNodeName = nodeNameList.get(nodeNameList.size() - 1); xPath.reset(); // find all the nodes specified by srcXPath in the modifiedDom NodeList nodeList = (NodeList) (xPath.evaluate(srcXPath, modifiedDom, XPathConstants.NODESET)); int l = nodeList.getLength(); logger.debug("find '" + l + "' in modifiedXml using xPath: " + srcXPath); Node currentNode = null; for (int i = 0; i < l; i++) { currentNode = nodeList.item(i); // the name of the last node in srcXPath might not be the same // as the name of the last node in destXPath Element lastElement = finalDom.createElement(lastNodeName); // NodeList currentNodeChildNodes = currentNode.getChildNodes(); // int s = currentNodeChildNodes.getLength(); // for(int j = 0; j < s; j++){ // lastElement.appendChild(finalDom.importNode(currentNodeChildNodes.item(j), // true)); // } if (currentNode.hasAttributes()) { NamedNodeMap attributes = currentNode.getAttributes(); for (int j = 0; j < attributes.getLength(); j++) { String attribute_name = attributes.item(j).getNodeName(); String attribute_value = attributes.item(j).getNodeValue(); lastElement.setAttribute(attribute_name, attribute_value); } } while (currentNode.hasChildNodes()) { Node kid = currentNode.getFirstChild(); currentNode.removeChild(kid); lastElement.appendChild(finalDom.importNode(kid, true)); } lastChild.appendChild(lastElement); } } return finalDom; }
From source file:edu.uams.clara.webapp.xml.processor.impl.DefaultXmlProcessorImpl.java
private Document replaceIfExistingByXPaths(final Document originalDom, final Document modifiedDom, Map<String, String> xPathPairs) throws XPathExpressionException { Document finalDom = originalDom; Element finalDomRoot = (Element) finalDom.getFirstChild(); //Element modifiedDomRoot = (Element) modifiedDom.getFirstChild(); Element lastChild = null;// ww w. j a va 2 s. c o m for (Entry<String, String> xPathPair : xPathPairs.entrySet()) { /** * basically, this is to copy the element specified in srcXPath, and * replace/add it to the position pointed by destXPath... */ String srcXPath = xPathPair.getKey(); logger.debug("srcXPath: " + srcXPath); String destXPath = xPathPair.getValue(); logger.debug("destXPath: " + destXPath); XPath xPath = getXPathInstance(); // find all the nodes specified by destXPath in the originalDom, and // delete them all NodeList existingNodeList = (NodeList) (xPath.evaluate(destXPath, finalDom, XPathConstants.NODESET)); xPath.reset(); // find all the nodes specified by srcXPath in the modifiedDom NodeList nodeList = (NodeList) (xPath.evaluate(srcXPath, modifiedDom, XPathConstants.NODESET)); int el = existingNodeList.getLength(); logger.debug("find '" + el + "' in originalDom using xPath: " + destXPath); int l = nodeList.getLength(); logger.debug("find '" + l + "' in modifiedXml using xPath: " + srcXPath); for (int i = 0; i < el; i++) { Node c = existingNodeList.item(i); //xPathExpression = xPath.compile(srcXPath); //NodeList srcNodeLst = (NodeList) (xPathExpression.evaluate( //modifiedDom, XPathConstants.NODESET)); //NodeList srcNodeLst = modifiedDomRoot.getElementsByTagName(c.getNodeName()); if (l > 0) { // remove this node from its parent... c.getParentNode().removeChild(c); logger.debug("Node:" + c.getNodeName() + " is removed!"); } } // create the node structure first. and return the last child of the // path... the right most node... lastChild = createElementStructureByPath(finalDomRoot, destXPath); List<String> nodeNameList = getNodeList(destXPath); String lastNodeName = nodeNameList.get(nodeNameList.size() - 1); Node currentNode = null; for (int i = 0; i < l; i++) { currentNode = nodeList.item(i); // the name of the last node in srcXPath might not be the same // as the name of the last node in destXPath Element lastElement = finalDom.createElement(lastNodeName); // NodeList currentNodeChildNodes = currentNode.getChildNodes(); // int s = currentNodeChildNodes.getLength(); // for(int j = 0; j < s; j++){ // lastElement.appendChild(finalDom.importNode(currentNodeChildNodes.item(j), // true)); // } if (currentNode.hasAttributes()) { NamedNodeMap attributes = currentNode.getAttributes(); for (int j = 0; j < attributes.getLength(); j++) { String attribute_name = attributes.item(j).getNodeName(); String attribute_value = attributes.item(j).getNodeValue(); lastElement.setAttribute(attribute_name, attribute_value); } } while (currentNode.hasChildNodes()) { Node kid = currentNode.getFirstChild(); currentNode.removeChild(kid); lastElement.appendChild(finalDom.importNode(kid, true)); } lastChild.appendChild(lastElement); } } return finalDom; }
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
/** * processMeta - handle an individual field *///from w w w. ja v a2 s.c o m private void processMeta(DocumentPojo f, metaField m, String text, SourcePojo source, UnstructuredAnalysisConfigPojo uap) { boolean bAllowDuplicates = false; if ((null != m.flags) && m.flags.contains("U")) { bAllowDuplicates = true; } if ((null == m.scriptlang) || m.scriptlang.equalsIgnoreCase("regex")) { Pattern metaPattern = createRegex(m.script, m.flags); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata timesToRun = currField.length; text = (String) currField[0]; } //TESTED Matcher matcher = metaPattern.matcher(text); LinkedList<String> Llist = null; for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; matcher = metaPattern.matcher(text); } //TESTED StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; try { while (matcher.find()) { if (null == Llist) { Llist = new LinkedList<String>(); } if (null == m.groupNum) { m.groupNum = 0; } String toAdd = matcher.group(m.groupNum); if (null != m.replace) { toAdd = metaPattern.matcher(toAdd).replaceFirst(m.replace); } if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } } catch (Exception e) { this._context.getHarvestStatus().logMessage("processMeta1: " + e.getMessage(), true); } } //(end metadata chaining handling) if (null != Llist) { if (null != currField) { // (overwrite) f.getMetadata().put(m.fieldName, Llist.toArray()); } else { f.addToMetadata(m.fieldName, Llist.toArray()); } } //TESTED } else if (m.scriptlang.equalsIgnoreCase("javascript")) { if (null == f.getMetadata()) { f.setMetadata(new LinkedHashMap<String, Object[]>()); } //set the script engine up if necessary if ((null != source) && (null != uap)) { //(these are null if called from new processing pipeline vs legacy code) intializeScriptEngine(source, uap); } try { //TODO (INF-2488): in new format, this should only happen in between contentMeta blocks/docs // (also should be able to use SAH _document object I think?) // Javascript: the user passes in Object[] currField = f.getMetadata().get(m.fieldName); if ((null == m.flags) || m.flags.isEmpty()) { if (null == currField) { engine.put("text", text); engine.put("_iterator", null); } //(otherwise will just pass the current fields in there) } else { // flags specified if (m.flags.contains("t")) { // text engine.put("text", text); } if (m.flags.contains("d")) { // entire document (minus ents and assocs) GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); List<EntityPojo> ents = f.getEntities(); List<AssociationPojo> assocs = f.getAssociations(); try { f.setEntities(null); f.setAssociations(null); engine.put("document", g.toJson(f)); securityManager.eval(engine, JavaScriptUtils.initScript); } finally { f.setEntities(ents); f.setAssociations(assocs); } } if (m.flags.contains("m")) { // metadata GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_metadata", g.toJson(f.getMetadata())); securityManager.eval(engine, JavaScriptUtils.iteratorMetaScript); } } //(end flags processing) if (null != currField) { f.getMetadata().remove(m.fieldName); GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_iterator", g.toJson(currField)); securityManager.eval(engine, JavaScriptUtils.iteratorDocScript); } //TESTED (handling of flags, and replacing of existing fields, including when field is null but specified) Object returnVal = securityManager.eval(engine, m.script); if (null != returnVal) { if (returnVal instanceof String) { // The only easy case Object[] array = new Object[1]; if ((null != m.flags) && m.flags.contains("H")) { returnVal = StringEscapeUtils.unescapeHtml((String) returnVal); } array[0] = returnVal; f.addToMetadata(m.fieldName, array); } else { // complex object or array - in either case the engine turns these into // internal.NativeArray or internal.NativeObject BasicDBList outList = JavaScriptUtils.parseNativeJsObject(returnVal, engine); f.addToMetadata(m.fieldName, outList.toArray()); } } } catch (ScriptException e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } catch (Exception e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } } else if (m.scriptlang.equalsIgnoreCase("xpath")) { String xpath = m.script; try { createHtmlCleanerIfNeeded(); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata f.getMetadata().remove(m.fieldName); // (so will add to the end) timesToRun = currField.length; text = (String) currField[0]; } //TESTED for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; } //TESTED TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes())); //NewCode : Only use html cleaner for cleansing //use JAXP for full Xpath lib Document doc = new DomSerializer(new CleanerProperties()).createDOM(node); String extraRegex = extractRegexFromXpath(xpath); if (extraRegex != null) xpath = xpath.replace(extraRegex, ""); XPath xpa = XPathFactory.newInstance().newXPath(); NodeList res = (NodeList) xpa.evaluate(xpath, doc, XPathConstants.NODESET); if (res.getLength() > 0) { if ((null != m.flags) && (m.flags.contains("o"))) { // "o" for object m.groupNum = -1; // (see bConvertToObject below) } StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; ArrayList<Object> Llist = new ArrayList<Object>(res.getLength()); boolean bConvertToObject = ((m.groupNum != null) && (m.groupNum == -1)); boolean convertToXml = ((null != m.flags) && (m.flags.contains("x"))); for (int i = 0; i < res.getLength(); i++) { Node info_node = res.item(i); if ((null != m.flags) && (m.flags.contains("g"))) { Llist.add(parseHtmlTable(info_node, m.replace)); } else if (bConvertToObject || convertToXml) { // Try to create a JSON object out of this StringWriter writer = new StringWriter(); try { Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.transform(new DOMSource(info_node), new StreamResult(writer)); } catch (TransformerException e1) { continue; } if (bConvertToObject) { try { JSONObject subObj = XML.toJSONObject(writer.toString()); if (xpath.endsWith("*")) { // (can have any number of different names here) Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj)); } //TESTED else { String[] rootNames = JSONObject.getNames(subObj); if (1 == rootNames.length) { // (don't think it can't be any other number in fact) subObj = subObj.getJSONObject(rootNames[0]); } boolean bUnescapeHtml = ((null != m.flags) && m.flags.contains("H")); Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj, bUnescapeHtml)); } //TESTED } catch (JSONException e) { // Just carry on continue; } //TESTED } else { // leave in XML form Llist.add(writer.toString().substring(38)); // +38: (step over <?xml version="1.0" encoding="UTF-8"?>) } //TESTED (xpath_test.json) } else { // Treat this as string, either directly or via regex String info = info_node.getTextContent().trim(); if (extraRegex == null || extraRegex.isEmpty()) { prefix.setLength(nFieldNameLen); prefix.append(info); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { info = StringEscapeUtils.unescapeHtml(info); } Llist.add(info); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } else { // Apply regex to the string Pattern dataRegex = createRegex(extraRegex, m.flags); Matcher dataMatcher = dataRegex.matcher(info); boolean result = dataMatcher.find(); while (result) { String toAdd; if (m.groupNum != null) toAdd = dataMatcher.group(m.groupNum); else toAdd = dataMatcher.group(); prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } result = dataMatcher.find(); } } //(regex vs no regex) } //(end string vs object) } if (Llist.size() > 0) { f.addToMetadata(m.fieldName, Llist.toArray()); } } } //(end loop over metadata objects if applicable) } catch (IOException ioe) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(ioe).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(ioe.getMessage()); } catch (ParserConfigurationException e1) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(e1.getMessage()); } catch (XPathExpressionException e1) { _context.getHarvestStatus().logMessage("Error evaluating xpath expression: " + xpath, true); } } else if (m.scriptlang.equalsIgnoreCase("stream")) { // XML or JSON streaming interface // which one? try { boolean json = false; boolean xml = false; for (int i = 0; i < 128; ++i) { if ('<' == text.charAt(i)) { xml = true; break; } if ('{' == text.charAt(i) || '[' == text.charAt(i)) { json = true; break; } if (!Character.isSpaceChar(text.charAt(i))) { break; } } //TESTED (too many spaces: meta_stream_test, test4; incorrect chars: test3, xml: test1, json: test2) boolean textNotObject = m.flags == null || !m.flags.contains("o"); List<DocumentPojo> docs = new LinkedList<DocumentPojo>(); List<String> levelOneFields = null; if (null != m.script) { levelOneFields = Arrays.asList(m.script.split("\\s*,\\s*")); if ((1 == levelOneFields.size()) && levelOneFields.get(0).isEmpty()) { // convert [""] to null levelOneFields = null; } } //TESTED (json and xml) if (xml) { XmlToMetadataParser parser = new XmlToMetadataParser(levelOneFields, null, null, null, null, null, Integer.MAX_VALUE); XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); XMLStreamReader reader = null; try { reader = factory.createXMLStreamReader(new ByteArrayInputStream(text.getBytes())); docs = parser.parseDocument(reader, textNotObject); } finally { if (null != reader) reader.close(); } } //TESTED (meta_stream_test, test1) if (json) { JsonReader jsonReader = null; try { JsonToMetadataParser parser = new JsonToMetadataParser(null, levelOneFields, null, null, Integer.MAX_VALUE); jsonReader = new JsonReader( new InputStreamReader(new ByteArrayInputStream(text.getBytes()), "UTF-8")); jsonReader.setLenient(true); docs = parser.parseDocument(jsonReader, textNotObject); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED (meta_stream_test test2) if (!docs.isEmpty()) { ArrayList<String> Llist = null; ArrayList<Object> LlistObj = null; if (textNotObject) { Llist = new ArrayList<String>(docs.size()); } else { LlistObj = new ArrayList<Object>(docs.size()); } for (DocumentPojo doc : docs) { if ((null != doc.getFullText()) || (null != doc.getMetadata())) { if (textNotObject) { Llist.add(doc.getFullText()); } //TESTED else if (xml) { LlistObj.add(doc.getMetadata()); } //TESTED else if (json) { Object o = doc.getMetadata(); if (null != o) { o = doc.getMetadata().get("json"); if (o instanceof Object[]) { LlistObj.addAll(Arrays.asList((Object[]) o)); } else if (null != o) { LlistObj.add(o); } //TESTED } } //TESTED } } //TESTED if ((null != Llist) && !Llist.isEmpty()) { f.addToMetadata(m.fieldName, Llist.toArray()); } //TESTED if ((null != LlistObj) && !LlistObj.isEmpty()) { f.addToMetadata(m.fieldName, LlistObj.toArray()); } //TESTED } //TESTED (meta_stream_test test1,test2) } //(end try) catch (Exception e) { // various parsing errors _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } } //TESTED (meta_stream_test) // (don't currently support other script types) }
From source file:com.ephesoft.gxt.systemconfig.server.SystemConfigServiceImpl.java
/** * @param pluginXmlDTO/*from w w w . j ava2 s . c o m*/ * @param xpath * @param pluginPropertyNode * @throws */ private void extractPluginConfigs(PluginXmlDTO pluginXmlDTO, XPath xpath, NodeList pluginNodeList) throws UIException { try { NodeList pluginPropertyNode = (NodeList) xpath.evaluate( SystemConfigSharedConstants.PLUGIN_PROPERTY_EXPR, pluginNodeList.item(0), XPathConstants.NODESET); LOGGER.info("Extracting plugin Configs from the xml"); List<PluginConfigXmlDTO> pluginConfigXmlDTOs = new ArrayList<PluginConfigXmlDTO>(0); int numberOfPluginConfigs = pluginPropertyNode.getLength(); LOGGER.info(numberOfPluginConfigs + " plugin configs found: "); for (int index = 0; index < numberOfPluginConfigs; index++) { LOGGER.info("Plugin config " + index + ": "); boolean isMandetory; boolean isMultiValue; String configName = SystemConfigSharedConstants.EMPTY_STRING; String propertyType = SystemConfigSharedConstants.EMPTY_STRING; String propertyDescription = SystemConfigSharedConstants.EMPTY_STRING; String operation = SystemConfigSharedConstants.EMPTY_STRING; isMandetory = Boolean.parseBoolean( (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_PROPERTY_IS_MANDETORY_EXPR, pluginPropertyNode.item(index), XPathConstants.STRING)); isMultiValue = Boolean.parseBoolean( (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_PROPERTY_IS_MULTI_VALUES_EXPR, pluginPropertyNode.item(index), XPathConstants.STRING)); configName = (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_PROPERTY_NAME_EXPR, pluginPropertyNode.item(index), XPathConstants.STRING); propertyType = (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_PROPERTY_TYPE_EXPR, pluginPropertyNode.item(index), XPathConstants.STRING); propertyDescription = (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_PROPERTY_DESC_EXPR, pluginPropertyNode.item(index), XPathConstants.STRING); operation = (String) xpath.evaluate(SystemConfigSharedConstants.OPERATION, pluginPropertyNode.item(index), XPathConstants.STRING); LOGGER.info("Extracting values for config: " + index); LOGGER.info("Is Mandatory" + isMandetory); LOGGER.info("Is Multivalue" + isMultiValue); LOGGER.info("Config Name" + configName); LOGGER.info("Property Type" + propertyType); LOGGER.info("Property Description" + propertyDescription); if (!configName.isEmpty() && !propertyType.isEmpty() && !propertyDescription.isEmpty()) { PluginConfigXmlDTO pluginConfigXmlDTO = new PluginConfigXmlDTO(); pluginConfigXmlDTO.setPluginPropertyIsMandatory(isMandetory); pluginConfigXmlDTO.setPluginPropertyIsMultiValues(isMultiValue); pluginConfigXmlDTO.setPluginPropertyName(configName); pluginConfigXmlDTO.setPluginPropertyType(propertyType); pluginConfigXmlDTO.setPluginPropertyDesc(propertyDescription); pluginConfigXmlDTO.setOperation(operation); NodeList sampleValuesNode = (NodeList) xpath.evaluate( SystemConfigSharedConstants.PLUGIN_PROPERTY_SAMPLE_VALUES_EXPR, pluginPropertyNode.item(index), XPathConstants.NODESET); List<String> sampleValuesList = new ArrayList<String>(0); LOGGER.info("Extracting sample values: "); int numberOfSampleValues = sampleValuesNode.getLength(); LOGGER.info(numberOfSampleValues + " sample values found"); for (int sampleValueIndex = 0; sampleValueIndex < numberOfSampleValues; sampleValueIndex++) { String sampleValue = (String) xpath.evaluate( SystemConfigSharedConstants.PLUGIN_PROPERTY_SAMPLE_VALUE_EXPR, sampleValuesNode.item(sampleValueIndex), XPathConstants.STRING); LOGGER.info("Sample value " + sampleValueIndex + " :" + sampleValue); sampleValuesList.add(sampleValue); } pluginConfigXmlDTO.setPluginPropertySampleValues(sampleValuesList); pluginConfigXmlDTOs.add(pluginConfigXmlDTO); } else { String errorMsg = SystemConfigSharedConstants.INVALID_XML_CONTENT_MESSAGE; LOGGER.error(errorMsg); throw new UIException(errorMsg); } } pluginXmlDTO.setConfigXmlDTOs(pluginConfigXmlDTOs); } catch (XPathExpressionException e) { String errorMsg = SystemConfigSharedConstants.INVALID_XML_CONTENT_MESSAGE; LOGGER.error(errorMsg, e); throw new UIException(errorMsg); } }
From source file:com.ephesoft.gxt.systemconfig.server.SystemConfigServiceImpl.java
/** * @param pluginXmlDTO//w w w. jav a2s. c o m * @param xpath * @param pluginNodeList * @throws */ private void extractPluginDependenciesFromXml(PluginXmlDTO pluginXmlDTO, XPath xpath, NodeList pluginNodeList) throws UIException { NodeList pluginDependenciesNode; try { pluginDependenciesNode = (NodeList) xpath.evaluate( SystemConfigSharedConstants.DEPENDENCIES_LIST_DEPENDENCY, pluginNodeList.item(0), XPathConstants.NODESET); } catch (XPathExpressionException e) { String errorMsg = "Invalid xml content. A mandatory field is missing."; LOGGER.error(errorMsg, e); throw new UIException(errorMsg); } LOGGER.info("Extracting Dependencies from xml:"); List<PluginDependencyXmlDTO> pluginDependencyXmlDTOs = new ArrayList<PluginDependencyXmlDTO>(0); int numberOfDependencies = pluginDependenciesNode.getLength(); LOGGER.info(numberOfDependencies + " dependencies found"); for (int index = 0; index < numberOfDependencies; index++) { PluginDependencyXmlDTO pluginDependencyXmlDTO = new PluginDependencyXmlDTO(); LOGGER.info("Plugin Dependency " + index + ":"); String dependencyType = SystemConfigSharedConstants.EMPTY_STRING; String dependencyValue = SystemConfigSharedConstants.EMPTY_STRING; String operation = SystemConfigSharedConstants.EMPTY_STRING; try { dependencyType = (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_DEPENDENCY_TYPE, pluginDependenciesNode.item(index), XPathConstants.STRING); dependencyValue = (String) xpath.evaluate(SystemConfigSharedConstants.PLUGIN_DEPENDENCY_VALUE, pluginDependenciesNode.item(index), XPathConstants.STRING); operation = (String) xpath.evaluate(SystemConfigSharedConstants.OPERATION, pluginDependenciesNode.item(index), XPathConstants.STRING); } catch (XPathExpressionException e) { String errorMsg = "Error in xml content. A mandatory field is missing."; LOGGER.error(errorMsg, e); throw new UIException(errorMsg); } if (!dependencyType.isEmpty() && !dependencyValue.isEmpty()) { LOGGER.info("Type: " + dependencyType); LOGGER.info("Value: " + dependencyValue); pluginDependencyXmlDTO.setPluginDependencyType(dependencyType); pluginDependencyXmlDTO.setPluginDependencyValue(dependencyValue); pluginDependencyXmlDTO.setOperation(operation); pluginDependencyXmlDTOs.add(pluginDependencyXmlDTO); } } pluginXmlDTO.setDependencyXmlDTOs(pluginDependencyXmlDTOs); }