List of usage examples for org.jdom2.filter Filters comment
public static final Filter<Comment> comment()
From source file:es.upm.dit.xsdinferencer.extraction.extractorImpl.TypesExtractorImpl.java
License:Apache License
/** * Recursive method that traverses an element to extract all the possible information from it. * It is recursive because it calls itself for each child of the element (obviously, infinite recursion * is not possible as there are not, or there should not be, parent-child loops). * The index of the current document is necessary in order to add well some information to * the statistics.//w w w. j a v a2 s . com * @param documentIndex index of current document * @param element the element to traverse (as a JDOM2 {@link Element}) * @param enclosingComplexType the complex type which will contain the current element */ private void traverseElement(int documentIndex, Element element, String enclosingComplexType) { //Elements in the XSI namespace should be ignored if (element.getNamespaceURI().equalsIgnoreCase(XSI_NAMESPACE_URI)) return; List<String> realPathUnfiltered = getRealPathOfElementUnfiltered(element, configuration, false, solvedNamespaceToPrefixMapping); String realPathFiltered = filterAndJoinRealPath(realPathUnfiltered);//Path for the statistics List<String> typePathUnfiltered = getRealPathOfElementUnfiltered(element, configuration, false, solvedNamespaceToPrefixMapping); List<String> suitablePath = getSuitablePath(typePathUnfiltered);//Path for type name inferencing //First, we will register the information of width and depth //The root is in a level whose width is 1, if we did not do the following, that width would be never registered if (element.isRootElement()) { statistics.registerWidth(documentIndex, 1); } statistics.registerDepth(documentIndex, realPathUnfiltered.size()); int width = element.getChildren().size(); if (width > 0) { statistics.registerWidth(documentIndex, width); } TypeNameInferencer typeNameInferencer = configuration.getTypeNameInferencer(); String complexTypeName = typeNameInferencer.inferTypeName(suitablePath, configuration);//Complex type of this element // //Little workaround that ensures that the same complex type is used // //when the elements on its path are the same (same name and namespace) but some of them // //use different namespace prefixes // List<String> realPathUnfilteredKey=getRealPathOfElementUnfiltered(element, configuration, false, solvedNamespaceToPrefixMapping); // List<String> suitablePathKey=getSuitablePath(realPathUnfilteredKey);//Path for type name inferencing // String complexTypeNameKey = typeNameInferencer.inferTypeName(suitablePathKey, configuration);//Complex type of this element String complexTypeNameKey = complexTypeName; //The complex type object of this element. ComplexType complexType = complexTypes.get(complexTypeNameKey); if (complexType == null) { complexType = new ComplexType(complexTypeName, null, null, null); complexTypes.put(complexTypeNameKey, complexType); //New complex type } complexType.addSourceNodeNamespaceAndName(element.getNamespaceURI(), element.getName()); //Comment processing for (Comment comment : element.getDescendants(Filters.comment())) { if (comment.getParentElement().equals(element)) complexType.getComments().add(comment.getText()); } //Key to find the corresponding SchemaElement //This key is: if the SchemaElement has an enclosing complex type (i.e., it is not a valid root), its name will be: //enclosingComplexType+typeNamesSeparator+elementName //If the element is a suitable root, the key is the name of the element. String schemaElementKey = (!enclosingComplexType.equals("")) ? enclosingComplexType + configuration.getTypeNamesAncestorsSeparator() + element.getName() : element.getName(); if (configuration.getTypeNameInferencer() instanceof NameTypeNameInferencer) { schemaElementKey = element.getName(); //If we use a name-based type inferencer, the key is the name and we avoid problems. } SchemaElement schemaElement = elements.get(element.getNamespaceURI(), schemaElementKey); if (schemaElement == null) { schemaElement = new SchemaElement(element.getName(), element.getNamespaceURI(), complexType);//Complex type already not known. elements.put(element.getNamespaceURI(), schemaElementKey, schemaElement); } boolean wasAlreadyValidRoot = schemaElement.isValidRoot(); schemaElement.setValidRoot(wasAlreadyValidRoot || element.isRootElement()); ComplexTypeStatisticsEntry complexTypeStatisticsEntry = statistics.getComplexTypeInfo().get(complexType); if (complexTypeStatisticsEntry == null) { complexTypeStatisticsEntry = new ComplexTypeStatisticsEntry(xmlDocuments.size()); statistics.getComplexTypeInfo().put(complexType, complexTypeStatisticsEntry); } AttributeListInferencer attributeListInferencer = attributeListInferencers.get(complexTypeName); if (attributeListInferencer == null) { attributeListInferencer = inferencersFactory.getAttributeListInferencerInstance(complexTypeName, configuration, solvedNamespaceToPrefixMapping, statistics); attributeListInferencers.put(complexTypeName, attributeListInferencer); } attributeListInferencer.learnAttributeList(element.getAttributes(), documentIndex); SimpleTypeInferencer simpleTypeInferencer = simpleTypeInferencersOfComplexTypes.get(complexTypeName); if (simpleTypeInferencer == null) { simpleTypeInferencer = inferencersFactory.getSimpleTypeInferencerInstance(complexTypeName, configuration); simpleTypeInferencersOfComplexTypes.put(complexTypeName, simpleTypeInferencer); } simpleTypeInferencer.learnValue(element.getText(), element.getNamespaceURI(), element.getName()); // SchemaElement previousChildSchemaElement=null; //We need to store the previous child in order to add the edge between it and the current child. List<SchemaElement> schemaElementChildren = new ArrayList<>(element.getChildren().size()); for (int i = 0; i < element.getChildren().size(); i++) { Element child = element.getChildren().get(i); traverseElement(documentIndex, child, complexTypeName); String childSchemaElementKey = complexTypeName + configuration.getTypeNamesAncestorsSeparator() + child.getName(); if (configuration.getTypeNameInferencer() instanceof NameTypeNameInferencer) { childSchemaElementKey = child.getName(); // If we use the name-based type name inferencer, the name is the key } SchemaElement childSchemaElement = elements.get(child.getNamespaceURI(), childSchemaElementKey);//The SchemaElement object does exist because the method traverseElement is called before this. // if(i==0){ // automaton.addEdge(automaton.getInitialState(), childSchemaElement); // } // else { // automaton.addEdge(previousChildSchemaElement, childSchemaElement); // if(i==(element.getChildren().size()-1)){ // automaton.addEdge(childSchemaElement, automaton.getFinalState()); // } // } complexTypeStatisticsEntry.registerElementCount(childSchemaElement, documentIndex); schemaElementChildren.add(childSchemaElement); // previousChildSchemaElement=childSchemaElement; } ExtendedAutomaton automaton = automatons.get(complexTypeName); if (automaton == null) { automaton = new ExtendedAutomaton(); SchemaElement initialState = new SchemaElement("initial", DEFAULT_PSEUDOELEMENTS_NAMESPACE, null); automaton.setInitialState(initialState); SchemaElement finalState = new SchemaElement("final", DEFAULT_PSEUDOELEMENTS_NAMESPACE, null); automaton.setFinalState(finalState); automatons.put(complexTypeName, automaton); } List<SchemaElement> schemaElementChildrenWithInitialAndFinal = new ArrayList<>(schemaElementChildren); schemaElementChildrenWithInitialAndFinal.add(0, automaton.getInitialState()); schemaElementChildrenWithInitialAndFinal.add(automaton.getFinalState()); automaton.learn(schemaElementChildrenWithInitialAndFinal); complexTypeStatisticsEntry.registerSubpatternsFromList(schemaElementChildren); complexTypeStatisticsEntry.registerValueOfNodeCount(element.getText(), schemaElement, documentIndex); statistics.registerElementAtPathCount(realPathFiltered, documentIndex); statistics.registerValueAtPathCount(realPathFiltered, element.getText(), documentIndex); if (enclosingComplexType.equals("")) { statistics.registerRootElementOccurrence(schemaElement); } }