Example usage for org.dom4j.io SAXReader setEntityResolver

List of usage examples for org.dom4j.io SAXReader setEntityResolver

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader setEntityResolver.

Prototype

public void setEntityResolver(EntityResolver entityResolver) 

Source Link

Document

Sets the entity resolver used to resolve entities.

Usage

From source file:de.jwic.base.XmlApplicationSetup.java

License:Apache License

/**
 * @param source/*from   w  w w .  j  a va2 s. com*/
 * @throws IOException
 */
public XmlApplicationSetup(InputSource source) {

    try {
        SAXReader reader = new SAXReader();
        reader.setEntityResolver(new DTDEntityResolver(PUBLICID, SYSTEMID, DTD_RESOURCEPATH));
        reader.setIncludeExternalDTDDeclarations(false);

        Document document = reader.read(source);

        readDocument(document);

    } catch (Exception e1) {
        throw new RuntimeException("Error reading applicationSetup: " + e1, e1);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.MASCReader.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w  w  w.java 2  s. co m
public void getNext(JCas jCas) throws IOException, CollectionException {
    if (nodeIterator == null || nodeIterator.hasNext() == false) {
        Resource nextFile = nextFile();
        String fileLocation = nextFile.getLocation();
        int offset = fileLocation.indexOf(":") + 1; // ignore the initial
                                                    // "file:" or
                                                    // "classpath:" prefix
                                                    // of the location
        int extLength = 7; // ignore the extension "-wn.xml"
        String baseName = fileLocation.substring(offset, fileLocation.length() - extLength);
        logger.info("Processing " + fileLocation);
        // get the target word
        String targetItem = baseName.substring(baseName.lastIndexOf("/") + 1);

        setPosLemma(targetItem);

        String txtFileLocation = baseName + ".txt";
        logger.info("Raw corpus file: " + txtFileLocation);

        // open the txt file containing the raw corpus for this targetItem
        File txtFile = new File(txtFileLocation);
        String corpus;
        Scanner scanner;
        try {
            scanner = new Scanner(txtFile);
        } catch (FileNotFoundException fne) { // txt file is on the classpath
            InputStream is = this.getClass().getResourceAsStream(txtFileLocation);
            if (is == null) {
                System.out.println("File " + txtFileLocation + " not found! Skipping...");
                return;
            }
            scanner = new Scanner(is);
        }

        try {
            scanner.useDelimiter("\\Z");
            corpus = scanner.next();
            scanner.close();
        } catch (NoSuchElementException e) {
            System.out.println("File " + txtFileLocation + " might be empty! Skipping...");
            return;
        }

        // open the file containing WordNet annotations
        SAXReader reader = new SAXReader();
        NullEntityResolver resolver = new NullEntityResolver();
        reader.setEntityResolver(resolver);
        InputStream is = new BufferedInputStream(nextFile.getInputStream());
        try {
            document = reader.read(is);
        } catch (DocumentException e) {
            throw new CollectionException(e);
        }

        root = document.getRootElement();
        if (root.getName().equals(ELEMENT_GRAPH) == false) {
            throw new CollectionException("unknown_element", new Object[] { root.getName() });
        }

        Iterator<Element> regionIterator = root.elementIterator(ELEMENT_REGION);
        if (regionIterator.hasNext() == false) {
            throw new CollectionException("element_not_found", new Object[] { ELEMENT_REGION, root });
        }

        targetItems2sentences = mapTargetItems2sentences(regionIterator, corpus);

        nodeIterator = root.elementIterator(ELEMENT_NODE);
        if (nodeIterator.hasNext() == false) {
            throw new CollectionException("element_not_found", new Object[] { ELEMENT_NODE, root });
        }
    }

    Element node = nodeIterator.next();
    String documentText = processNode(jCas, node, root);

    // if no tie between annotators is discovered
    if (documentText != null) {
        setDocumentMetadata(jCas, node);
        jCas.setDocumentText(documentText);
    } else {
        jCas.reset();
    }

    // after all files are processed, output the annotation stats if oANN
    // specified
    if (!hasNext() && !outputAnnotation.equals("")) {
        logger.info("Outputting annotation info to " + outputAnnotation);
        outputAnnotations();
    }
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.SemCorXMLReader.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void getNext(JCas jCas) throws IOException, CollectionException {
    mappingProvider.configure(jCas.getCas());

    // Open the next file
    Document document;//from w ww.jav  a 2  s  .  co m
    SAXReader reader = new SAXReader();
    NullEntityResolver resolver = new NullEntityResolver();
    reader.setEntityResolver(resolver);
    InputStream is = new BufferedInputStream(nextFile().getInputStream());
    try {
        document = reader.read(is);
    } catch (DocumentException e) {
        throw new CollectionException(e);
    }

    // Get metadata from the top two elements
    Element contextFile = document.getRootElement();
    if (contextFile.getName().equals(ELEMENT_CONTEXTFILE) == false) {
        throw new CollectionException("unknown_element", new Object[] { contextFile.getName() });
    }
    Iterator<Element> contextIterator = contextFile.elementIterator(ELEMENT_CONTEXT);
    if (contextIterator.hasNext() == false) {
        throw new CollectionException("element_not_found",
                new Object[] { ELEMENT_CONTEXT, ELEMENT_CONTEXTFILE });
    }
    Element context = contextIterator.next();
    setDocumentMetadata(jCas, contextFile, context);
    String documentId = context.attributeValue(ATTR_FILENAME);
    logger.debug("Found context filename: " + documentId);

    // Process document text
    StringBuffer documentText = processParagraphs(jCas, context, documentId);
    if (documentText.length() == 0) {
        documentText = processSentences(jCas, context, 0, documentId);
    }
    jCas.setDocumentText(documentText.toString());
    logger.info("Read " + validWordFormCount + " valid word forms; skipped "
            + (totalWordFormCount - validWordFormCount));
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.io.reader.WebCAGeXMLReader.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void getNext(JCas jCas) throws IOException, CollectionException {
    if (textIterator == null || textIterator.hasNext() == false) {
        // Open the next file
        Document document;// www  . j  av a2s .co  m
        SAXReader reader = new SAXReader();
        NullEntityResolver resolver = new NullEntityResolver();
        reader.setEntityResolver(resolver);
        Resource nextFile = nextFile();

        logger.info("Reading " + nextFile.getLocation());
        InputStream is = new BufferedInputStream(nextFile.getInputStream());
        try {
            document = reader.read(is);
        } catch (DocumentException e) {
            throw new CollectionException(e);
        }

        // Get metadata from the top two elements
        corpus = document.getRootElement();
        if (corpus.getName().equals(ELEMENT_CORPUS) == false) {
            throw new CollectionException("unknown_element", new Object[] { corpus.getName() });
        }
        textIterator = corpus.elementIterator(ELEMENT_TEXT);
        if (textIterator.hasNext() == false) {
            throw new CollectionException("element_not_found", new Object[] { ELEMENT_TEXT, ELEMENT_CORPUS });
        }
    }
    Element text = textIterator.next();

    setDocumentMetadata(jCas, corpus, text);

    // Process document text
    StringBuffer documentText = processText(jCas, text);
    jCas.setDocumentText(documentText.toString());
}

From source file:de.tudarmstadt.ukp.dkpro.wsd.senseval.reader.SensevalReader.java

License:Apache License

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
    super.initialize(context);

    textCount = 0;//from   w  ww.  j  av  a 2s. c o  m

    // EntityResolver resolver = new EntityResolver() {
    // public InputSource resolveEntity(String publicId, String systemId) {
    // try {
    // URL url;
    // if (publicId == null) {
    // url = ResourceUtils.resolveLocation(systemId, this, null);
    // }
    // else {
    // url = ResourceUtils.resolveLocation(publicId, this, null);
    // }
    // return new InputSource(url.openStream());
    // } catch (IOException e) {
    // e.printStackTrace();
    // return null;
    // }
    // }
    // };
    Document documentCollection = null;
    SAXReader reader = new SAXReader();

    // TODO: We can't figure out how to get the XML parser to read DTDs in
    // all cases (i.e., whether they are in a directory or in a JAR) so the
    // following code just forces the SAXReader to ignore DTDs.  This is
    // not an optimal solution as it prevents the XML files from being
    // validated.
    EntityResolver resolver = new EntityResolver() {
        @Override
        public InputSource resolveEntity(String publicId, String systemId) {
            return new InputSource(new StringReader(""));
        }
    };
    reader.setEntityResolver(resolver);

    InputStream is = null;
    try {
        fileURL = ResourceUtils.resolveLocation(fileName, this, context);
        is = fileURL.openStream();
        // The following line fails on Jenkins but not locally
        // documentCollection = reader.read(fileURL.getFile());
        documentCollection = reader.read(is);
    } catch (DocumentException e) {
        throw new ResourceInitializationException(e);
    } catch (IOException e) {
        throw new ResourceInitializationException(e);
    } finally {
        IOUtils.closeQuietly(is);
    }

    // Get the (root) corpus element so we can iterate over its elements
    corpus = documentCollection.getRootElement();
    if (corpus.getName().equals(CORPUS_ELEMENT_NAME) == false) {
        throw new ResourceInitializationException("unknown_element", new Object[] { corpus.getName() });
    }
}

From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java

License:Apache License

/**
 * Read xml File and save its contents to Database
 * @param xmlFile//w  ww  .  j a v a 2s.c  o  m
 * @param lexicalResourceName
 * @throws DocumentException
 * @throws UbyInvalidArgumentException
 */
public void transform(File xmlFile, String lexicalResourceName)
        throws DocumentException, IllegalArgumentException {
    long startTime = System.currentTimeMillis();

    openSession();

    if (lexicalResourceName != null) {
        lexicalResource = (LexicalResource) session.get(LexicalResource.class, lexicalResourceName);
    }

    SAXReader reader = new SAXReader(false);
    reader.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
            if (systemId.endsWith(".dtd")) {
                return new InputSource(new StringReader(""));
            }
            return null;
        }
    });
    reader.setDefaultHandler(this);
    reader.read(xmlFile);

    commit();
    closeSession();

    System.out.println("TOTAL TIME: " + (System.currentTimeMillis() - startTime));
    System.out.println("NUM ENTRIES: " + commitCounter);
}

From source file:dkpro.similarity.uima.io.RTECorpusReader.java

License:Apache License

@Override
public List<CombinationPair> getAlignedPairs() throws ResourceInitializationException {
    List<CombinationPair> pairs = new ArrayList<CombinationPair>();

    SAXReader reader = null;
    InputStream is = null;/* w w w .j a  v a 2s.c o m*/
    URL url;
    try {
        reader = new SAXReader(false);

        // Disable DTD resolution (which fails due to relative path to DTD file)
        NullEntityResolver resolver = new NullEntityResolver();
        reader.setEntityResolver(resolver);

        url = ResourceUtils.resolveLocation(inputFile, this, this.getUimaContext());
        Document document = reader.read(new BufferedInputStream(url.openStream()));
        Element root = document.getRootElement();

        final XPath pairXPath = new Dom4jXPath("//pair");

        int i = 0;
        for (Object element : pairXPath.selectNodes(root)) {
            i++;
            String text1 = "";
            String text2 = "";
            String entailmentOutcome = "";
            if (element instanceof Element) {
                Element node = (Element) element;

                String tXPath = "child::t";

                for (Object tElement : new Dom4jXPath(tXPath).selectNodes(node)) {
                    if (tElement instanceof Element) {
                        text1 = ((Element) tElement).getText();
                    }
                }

                String hXPath = "child::h";

                for (Object hElement : new Dom4jXPath(hXPath).selectNodes(node)) {
                    if (hElement instanceof Element) {
                        text2 = ((Element) hElement).getText();
                    }
                }

                // print out entailment value for use as gold standard
                for (Object o : node.attributes()) {
                    Attribute attribute = (Attribute) o;
                    String name = attribute.getName().toLowerCase();
                    if (name.equals("value") || name.equals("entailment")) {
                        entailmentOutcome = attribute.getValue();
                        System.out.println(i + ":" + entailmentOutcome);
                    }
                }
            }

            EntailmentPair pair = new EntailmentPair(url.toString());
            pair.setID1("t1-" + i);
            pair.setID2("t2-" + i);
            pair.setText1(text1);
            pair.setText2(text2);
            pair.setEntailmentOutcome(entailmentOutcome);

            pairs.add(pair);
        }
    } catch (JaxenException e) {
        throw new ResourceInitializationException(e);
    } catch (DocumentException e) {
        throw new ResourceInitializationException(e);
    } catch (IOException e) {
        throw new ResourceInitializationException(e);
    } finally {
        IOUtils.closeQuietly(is);
    }

    return pairs;
}

From source file:edu.ku.brc.specify.tools.AppendHelp.java

License:Open Source License

/**
 * Reads a DOM from a stream//from   w ww  .  j a  va  2 s .c  o m
 * @param fileinputStream the stream to be read
 * @return the root element of the DOM
 */
public Element readFileToDOM4J(final File file) throws IOException, DocumentException {
    SAXReader saxReader = new SAXReader();

    try {
        saxReader.setValidation(false);
        saxReader.setStripWhitespaceText(true);
        //saxReader.setIncludeExternalDTDDeclarations(false);
        //saxReader.setIncludeInternalDTDDeclarations(false);
        saxReader.setIgnoreComments(true);
        //saxReader.setXMLFilter(new TransparentFilter(saxReader.getXMLReader()));

        EntityResolver entityResolver = new EntityResolver() {
            public InputSource resolveEntity(String publicId, String systemId) {
                return new InputSource("");
            }
        };
        saxReader.setEntityResolver(entityResolver);

        //saxReader.getXMLFilter().setDTDHandler(null);

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    org.dom4j.Document document = saxReader.read(new FileInputStream(file));
    return document.getRootElement();
}

From source file:es.caib.seycon.ng.servei.PuntEntradaServiceImpl.java

protected String handleValidaXMLPUE(PuntEntrada puntEntrada) throws Exception {
    String contingut = puntEntrada.getXmlPUE();

    if (!"".equals(contingut)) { //$NON-NLS-1$

        try {/*from  w w  w .  j  a  v  a 2  s  . c  o m*/
            // Validem el document
            // new
            // es.caib.seycon.mazinger.compiler.Compile().parse(contingut);
            org.dom4j.io.SAXReader reader = new org.dom4j.io.SAXReader(true);
            // set the validation feature to true to report validation
            // errors
            reader.setFeature("http://xml.org/sax/features/validation", true); //$NON-NLS-1$

            // set the validation/schema feature to true to report
            // validation errors
            // against a schema
            reader.setFeature("http://apache.org/xml/features/validation/schema", true); //$NON-NLS-1$
            // set the validation/schema-full-checking feature to true to
            // enable
            // full schema, grammar-constraint checking
            reader.setFeature("http://apache.org/xml/features/validation/schema-full-checking", //$NON-NLS-1$
                    true);
            // set the schema
            reader.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", //$NON-NLS-1$
                    "/es/caib/seycon/mazinger/Mazinger.xsd"); //$NON-NLS-1$
            // set the entity resolver (to load the schema with
            // getResourceAsStream)
            reader.getXMLReader().setEntityResolver(new SchemaLoader());
            reader.setEntityResolver(new SchemaLoader());

            Document doc = reader.read(new ByteArrayInputStream(contingut.getBytes("UTF-8"))); //$NON-NLS-1$

        } catch (Exception ex) {
            return ex.getMessage(); // Retornem l'excepci com error de
                                    // Validaci
        }
    }
    return ""; //$NON-NLS-1$
}

From source file:eu.scape_project.planning.services.taverna.parser.T2FlowParser.java

License:Apache License

/**
 * Initialises the parser by reading the the t2flow from the input stream
 * and parsing it./*from ww  w.ja  v  a2s.c  o  m*/
 * 
 * @param t2flow
 *            the t2flow
 * @throws TavernaParserException
 *             if the parser could not be initialized
 */
protected void initialise(InputStream t2flow) throws TavernaParserException {

    log.debug("Parsing inputstream");

    T2FLOW_NAMESPACE_MAP.put("t2f", "http://taverna.sf.net/2008/xml/t2flow");

    ValidatingParserFactory vpf = new ValidatingParserFactory();
    try {
        SAXParser parser = vpf.getValidatingParser();
        parser.setProperty(ValidatingParserFactory.JAXP_SCHEMA_SOURCE, ProjectImporter.TAVERNA_SCHEMA_URI);

        SAXReader reader = new SAXReader(parser.getXMLReader());
        reader.setValidation(false);

        SchemaResolver schemaResolver = new SchemaResolver();
        schemaResolver.addSchemaLocation(ProjectImporter.TAVERNA_SCHEMA_URI,
                SCHEMA_LOCATION + ProjectImporter.TAVERNA_SCHEMA);
        reader.setEntityResolver(schemaResolver);

        doc = reader.read(t2flow);
    } catch (DocumentException e) {
        log.error("Error initialising T2FlowParser: {}", e.getMessage());
        throw new TavernaParserException("Error parsing workflow.", e);
    } catch (ParserConfigurationException e) {
        log.error("Error initialising T2FlowParser: {}", e.getMessage());
        throw new TavernaParserException("Error parsing workflow.", e);
    } catch (SAXException e) {
        log.error("Error initialising T2FlowParser: {}", e.getMessage());
        throw new TavernaParserException("Error parsing workflow.", e);
    }
}