Example usage for org.xml.sax XMLReader parse

List of usage examples for org.xml.sax XMLReader parse

Introduction

In this page you can find the example usage for org.xml.sax XMLReader parse.

Prototype

public void parse(String systemId) throws IOException, SAXException;

Source Link

Document

Parse an XML document from a system identifier (URI).

Usage

From source file:org.toobsframework.transformpipeline.domain.ChainedXSLTransletTransformer.java

public String transform(List inputXSLs, String inputXML, Map inputParams,
        IXMLTransformerHelper transformerHelper) throws XMLTransformerException {

    String outputXML = null;//from   w ww .j  a  v  a 2  s . c  o  m
    ByteArrayInputStream xmlInputStream = null;
    ByteArrayOutputStream xmlOutputStream = null;
    try {
        TransformerFactory tFactory = new org.apache.xalan.xsltc.trax.TransformerFactoryImpl();
        try {
            //tFactory.setAttribute("use-classpath", Boolean.TRUE);
            tFactory.setAttribute("auto-translet", Boolean.TRUE);
        } catch (IllegalArgumentException iae) {
            log.error("Error setting XSLTC specific attribute", iae);
            throw new XMLTransformerException(iae);
        }
        setFactoryResolver(tFactory);

        TransformerFactoryImpl traxFactory = (TransformerFactoryImpl) tFactory;

        // Create a TransformerHandler for each stylesheet.
        ArrayList tHandlers = new ArrayList();
        TransformerHandler tHandler = null;

        // Create a SAX XMLReader.
        XMLReader reader = new org.apache.xerces.parsers.SAXParser();

        // transformer3 outputs SAX events to the serializer.
        if (outputProperties == null) {
            outputProperties = OutputPropertiesFactory.getDefaultMethodProperties("html");
        }
        Serializer serializer = SerializerFactory.getSerializer(outputProperties);
        for (int it = 0; it < inputXSLs.size(); it++) {
            String xslTranslet = (String) inputXSLs.get(it);
            Source source = uriResolver.resolve(xslTranslet + ".xsl", "");

            String tPkg = source.getSystemId().substring(0, source.getSystemId().lastIndexOf("/"))
                    .replaceAll("/", ".").replaceAll("-", "_");

            // Package name needs to be set for each TransformerHandler instance
            tFactory.setAttribute("package-name", tPkg);
            tHandler = traxFactory.newTransformerHandler(source);

            // Set parameters and output encoding on each handlers transformer
            Transformer transformer = tHandler.getTransformer();
            transformer.setOutputProperty("encoding", "UTF-8");
            transformer.setErrorListener(tFactory.getErrorListener());
            if (inputParams != null) {
                Iterator paramIt = inputParams.entrySet().iterator();
                while (paramIt.hasNext()) {
                    Map.Entry thisParam = (Map.Entry) paramIt.next();
                    transformer.setParameter((String) thisParam.getKey(), (String) thisParam.getValue());
                }
            }
            if (transformerHelper != null) {
                transformer.setParameter(TRANSFORMER_HELPER, transformerHelper);
            }
            tHandlers.add(tHandler);
        }
        tHandler = null;
        // Link the handlers to each other and to the reader
        for (int th = 0; th < tHandlers.size(); th++) {
            tHandler = (TransformerHandler) tHandlers.get(th);
            if (th == 0) {
                reader.setContentHandler(tHandler);
                reader.setProperty("http://xml.org/sax/properties/lexical-handler", tHandler);
            } else {
                ((TransformerHandler) tHandlers.get(th - 1)).setResult(new SAXResult(tHandler));
            }
        }
        // Parse the XML input document. The input ContentHandler and output ContentHandler
        // work in separate threads to optimize performance.
        InputSource xmlSource = null;
        xmlInputStream = new ByteArrayInputStream((inputXML).getBytes("UTF-8"));
        xmlSource = new InputSource(xmlInputStream);
        xmlOutputStream = new ByteArrayOutputStream();
        serializer.setOutputStream(xmlOutputStream);
        // Link the last handler to the serializer
        ((TransformerHandler) tHandlers.get(tHandlers.size() - 1))
                .setResult(new SAXResult(serializer.asContentHandler()));
        reader.parse(xmlSource);
        outputXML = xmlOutputStream.toString("UTF-8");
    } catch (Exception ex) {
        log.error("Error performing chained transformation: " + ex.getMessage(), ex);
        throw new XMLTransformerException(ex);
    } finally {
        try {
            if (xmlInputStream != null) {
                xmlInputStream.close();
                xmlInputStream = null;
            }
            if (xmlOutputStream != null) {
                xmlOutputStream.close();
                xmlOutputStream = null;
            }
        } catch (IOException ex) {
        }
    }
    return outputXML;
}

From source file:org.unitils.dbunit.datasetfactory.impl.MultiSchemaXmlDataSetFactory.java

protected void readDataSetFile(XMLReader xmlReader, File dataSetFile) {
    InputStream dataSetInputStream = null;
    try {/*w  w w. ja  v  a2 s  .c  o  m*/
        dataSetInputStream = new FileInputStream(dataSetFile);
        xmlReader.parse(new InputSource(dataSetInputStream));

    } catch (Exception e) {
        throw new UnitilsException("Unable to read data set file " + dataSetFile.getName(), e);
    } finally {
        closeQuietly(dataSetInputStream);
    }
}

From source file:org.unitils.dbunit.util.MultiSchemaXmlDataSetReader.java

/**
 * Parses the datasets from the given files.
 * Each schema is given its own dataset and each row is given its own table.
 *
 * @param dataSetFiles The dataset files, not null
 * @return The read data set, not null/*from   w w w  .  j  av a  2s . co m*/
 */
public MultiSchemaDataSet readDataSetXml(File... dataSetFiles) {
    try {
        DataSetContentHandler dataSetContentHandler = new DataSetContentHandler(defaultSchemaName);
        XMLReader xmlReader = createXMLReader();
        xmlReader.setContentHandler(dataSetContentHandler);
        xmlReader.setErrorHandler(dataSetContentHandler);

        for (File dataSetFile : dataSetFiles) {
            InputStream dataSetInputStream = null;
            try {
                dataSetInputStream = new FileInputStream(dataSetFile);
                xmlReader.parse(new InputSource(dataSetInputStream));
            } finally {
                closeQuietly(dataSetInputStream);
            }
        }
        return dataSetContentHandler.getMultiSchemaDataSet();

    } catch (Exception e) {
        throw new UnitilsException("Unable to parse data set xml.", e);
    }

}

From source file:org.webcurator.core.profiles.HeritrixProfile.java

/**
 * Reconstruct a profile from an XML string.
 * @param str The XML string to create the profile from.
 * @return The object representation of the profile string.
 */// w ww . j  a  v a 2s.  c om
public static HeritrixProfile fromString(String str) {
    try {
        XMLReader parser = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
        StringReader reader = new StringReader(str);

        // Create a settings handler. The file is a dummy file simply to allow
        // us to construct the object.
        XMLSettingsHandler settingsHandler = new XMLSettingsHandler(new File("dummy_file"));

        parser.setContentHandler(new CrawlSettingsSAXHandler(settingsHandler.getSettings(null)));
        InputSource source = new InputSource(reader);
        parser.parse(source);

        HeritrixProfile profile = new HeritrixProfile(settingsHandler, null);
        return profile;
    } catch (SAXException ex) {
        throw new WCTRuntimeException(ex);
    } catch (ParserConfigurationException pcex) {
        throw new WCTRuntimeException(pcex);
    } catch (IOException e) {
        throw new WCTRuntimeException(e);
    } catch (InvalidAttributeValueException e) {
        throw new WCTRuntimeException(e);
    }
}

From source file:org.wyona.yanel.impl.resources.TestingControlResource.java

/**
 * //from   w ww  . j  a  va2 s  . c o m
 */
public View getView(String viewId) {
    if (request.getHeader("User-Agent").indexOf("rv:1.7") < 0) {
        ajaxBrowser = true;
    }
    try {
        setLocations();
    } catch (Exception e) {
        // sb.append("<p>Could not get the Locations: " + e + "</p>");
        log.error(e.getMessage(), e);
    }
    View view = new View();
    String mimeType = getMimeType(viewId);
    view.setMimeType(mimeType);

    try {
        org.wyona.yarep.core.Repository repo = getRealm().getRepository();

        if (viewId != null && viewId.equals("source")) {
            view.setInputStream(new java.io.StringBufferInputStream(getScreen()));
            view.setMimeType("application/xml");
            return view;
        }

        String[] xsltPath = getXSLTPath(getPath());
        if (xsltPath != null) {

            // create reader:
            XMLReader xmlReader = XMLReaderFactory.createXMLReader();
            CatalogResolver catalogResolver = new CatalogResolver();
            xmlReader.setEntityResolver(catalogResolver);

            // create xslt transformer:
            SAXTransformerFactory tf = (SAXTransformerFactory) TransformerFactory.newInstance();

            TransformerHandler[] xsltHandlers = new TransformerHandler[xsltPath.length];
            for (int i = 0; i < xsltPath.length; i++) {
                xsltHandlers[i] = tf
                        .newTransformerHandler(new StreamSource(repo.getNode(xsltPath[i]).getInputStream()));
                xsltHandlers[i].getTransformer().setParameter("yanel.path.name",
                        org.wyona.commons.io.PathUtil.getName(getPath()));
                xsltHandlers[i].getTransformer().setParameter("yanel.path", getPath());
                xsltHandlers[i].getTransformer().setParameter("yanel.back2context",
                        PathUtil.backToContext(realm, getPath()));
                xsltHandlers[i].getTransformer().setParameter("yarep.back2realm",
                        PathUtil.backToRealm(getPath()));

                xsltHandlers[i].getTransformer().setParameter("language", getRequestedLanguage());
            }

            // create i18n transformer:
            I18nTransformer2 i18nTransformer = new I18nTransformer2("global", getRequestedLanguage(),
                    getRealm().getDefaultLanguage());
            i18nTransformer.setEntityResolver(catalogResolver);

            // create xinclude transformer:
            XIncludeTransformer xIncludeTransformer = new XIncludeTransformer();
            ResourceResolver resolver = new ResourceResolver(this);
            xIncludeTransformer.setResolver(resolver);

            // create serializer:
            Serializer serializer = SerializerFactory.getSerializer(SerializerFactory.XHTML_STRICT);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();

            // chain everything together (create a pipeline):
            xmlReader.setContentHandler(xsltHandlers[0]);
            for (int i = 0; i < xsltHandlers.length - 1; i++) {
                xsltHandlers[i].setResult(new SAXResult(xsltHandlers[i + 1]));
            }
            xsltHandlers[xsltHandlers.length - 1].setResult(new SAXResult(xIncludeTransformer));
            xIncludeTransformer.setResult(new SAXResult(i18nTransformer));
            i18nTransformer.setResult(new SAXResult(serializer.asContentHandler()));
            serializer.setOutputStream(baos);

            // execute pipeline:
            xmlReader.parse(new InputSource(new java.io.StringBufferInputStream(getScreen())));

            // write result into view:
            view.setInputStream(new ByteArrayInputStream(baos.toByteArray()));
            return view;
        }
        log.debug("Mime-Type: " + mimeType);
        view.setInputStream(new java.io.StringBufferInputStream(getScreen()));
        return view;
    } catch (Exception e) {
        log.error(e + " (" + getPath() + ", " + getRealm() + ")", e);
    }
    view.setInputStream(new java.io.StringBufferInputStream(getScreen()));
    return view;
}

From source file:org.xwiki.extension.xar.internal.handler.packager.DefaultPackager.java

public void parseDocument(InputStream in, ContentHandler documentHandler)
        throws ParserConfigurationException, SAXException, IOException, NotADocumentException {
    SAXParser saxParser = this.parserFactory.newSAXParser();
    XMLReader xmlReader = saxParser.getXMLReader();

    RootHandler handler = new RootHandler(this.componentManager);
    handler.setHandler("xwikidoc", documentHandler);
    xmlReader.setContentHandler(handler);

    try {//from  w w w.  j  av a 2s.  c o  m
        xmlReader.parse(new InputSource(new CloseShieldInputStream(in)));
    } catch (UnknownRootElement e) {
        throw new NotADocumentException("Failed to parse stream", e);
    }
}

From source file:org.xwiki.gadgets.internal.GoogleGadgetService.java

/**
 * {@inheritDoc}//from   w w w.java 2 s.c o  m
 * 
 * @see GadgetService#parseUserPrefs(String)
 */
public List<UserPref> parseUserPrefs(String gadgetUri) {
    try {
        XMLReader xr = xmlReaderFactory.createXMLReader();
        UserPrefsHandler upHandler = new UserPrefsHandler();
        xr.setContentHandler(upHandler);
        xr.parse(new InputSource(gadgetUri));

        return upHandler.getResult();
    } catch (Exception e) {
        LOG.error(String.format("Exception while parsing User Preferences from gadget XML at location %s.",
                gadgetUri), e);
        return null;
    }
}

From source file:org.xwiki.gadgets.internal.GoogleGadgetService.java

/**
 * {@inheritDoc}/*from w ww.  j ava  2  s.  c om*/
 * 
 * @see GadgetService#parseModulePrefs(String)
 */
public ModulePrefs parseModulePrefs(String gadgetUri) {
    try {
        XMLReader xr = xmlReaderFactory.createXMLReader();
        ModulePrefsHandler mpHandler = new ModulePrefsHandler();
        xr.setContentHandler(mpHandler);
        xr.parse(new InputSource(gadgetUri));

        return mpHandler.getResult();
    } catch (Exception e) {
        LOG.error(String.format("Exception while parsing Module Preferences from gadget XML at location %s.",
                gadgetUri), e);
        return null;
    }
}

From source file:org.yawlfoundation.yawl.unmarshal.YawlXMLSpecificationValidator.java

public String checkSchema(InputSource input, String version) {
    _errorsString.delete(0, _errorsString.length());
    try {/*from  ww  w  . ja v  a 2  s .c om*/
        XMLReader parser = setUpChecker(version);
        parser.parse(input);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return _errorsString.toString();
}

From source file:org.zuinnote.hadoop.office.format.common.parser.msexcel.MSExcelLowFootprintParser.java

/**
 * Processes a OPCPackage (new Excel format, .xlsx) in Streaming Mode
 * /*ww  w .  ja  v  a2 s .  c  om*/
 * @param pkg
 * @throws OpenXML4JException 
 * @throws IOException 
 */
private void processOPCPackage(OPCPackage pkg) throws FormatNotUnderstoodException {
    LOG.debug("Processing OPCPackage in low footprint mode");
    // check if signature should be verified
    if (this.hocr.getVerifySignature()) {
        LOG.info("Verifying signature of document");
        SignatureConfig sic = new SignatureConfig();
        sic.setOpcPackage(pkg);
        SignatureInfo si = new SignatureInfo();
        si.setSignatureConfig(sic);
        if (!si.verifySignature()) {
            throw new FormatNotUnderstoodException(
                    "Cannot verify signature of OOXML (.xlsx) file: " + this.hocr.getFileName());
        } else {
            LOG.info("Successfully verifed first part signature of OXXML (.xlsx) file: "
                    + this.hocr.getFileName());
        }
        Iterator<SignaturePart> spIter = si.getSignatureParts().iterator();
        while (spIter.hasNext()) {
            SignaturePart currentSP = spIter.next();
            if (!(currentSP.validate())) {
                throw new FormatNotUnderstoodException(
                        "Could not validate all signature parts for file: " + this.hocr.getFileName());
            } else {
                X509Certificate currentCertificate = currentSP.getSigner();
                try {
                    if ((this.hocr.getX509CertificateChain().size() > 0) && (!CertificateChainVerificationUtil
                            .verifyCertificateChain(currentCertificate, this.hocr.getX509CertificateChain()))) {
                        throw new FormatNotUnderstoodException(
                                "Could not validate signature part for principal \""
                                        + currentCertificate.getSubjectX500Principal().getName() + "\" : "
                                        + this.hocr.getFileName());
                    }
                } catch (CertificateException | NoSuchAlgorithmException | NoSuchProviderException
                        | InvalidAlgorithmParameterException e) {
                    LOG.error("Could not validate signature part for principal \""
                            + currentCertificate.getSubjectX500Principal().getName() + "\" : "
                            + this.hocr.getFileName(), e);
                    throw new FormatNotUnderstoodException("Could not validate signature part for principal \""
                            + currentCertificate.getSubjectX500Principal().getName() + "\" : "
                            + this.hocr.getFileName());

                }
            }
        }
        LOG.info("Successfully verifed all signatures of OXXML (.xlsx) file: " + this.hocr.getFileName());
    }
    // continue in lowfootprint mode
    XSSFReader r;
    try {
        r = new XSSFReader(pkg);
    } catch (IOException | OpenXML4JException e) {
        LOG.error(e);
        throw new FormatNotUnderstoodException("Error cannot parse new Excel file (.xlsx)");
    }
    try {
        // read date format
        InputStream workbookDataXML = r.getWorkbookData();
        WorkbookDocument wd = WorkbookDocument.Factory.parse(workbookDataXML);
        this.isDate1904 = wd.getWorkbook().getWorkbookPr().getDate1904();

        // read shared string tables
        if (HadoopOfficeReadConfiguration.OPTION_LOWFOOTPRINT_PARSER_SAX
                .equalsIgnoreCase(this.hocr.getLowFootprintParser())) {
            this.pushSST = new ReadOnlySharedStringsTable(pkg);
        } else if (HadoopOfficeReadConfiguration.OPTION_LOWFOOTPRINT_PARSER_STAX
                .equalsIgnoreCase(this.hocr.getLowFootprintParser())) {
            List<PackagePart> pkgParts = pkg
                    .getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
            if (pkgParts.size() > 0) {
                this.pullSST = new EncryptedCachedDiskStringsTable(pkgParts.get(0), this.hocr.getSstCacheSize(),
                        this.hocr.getCompressSST(), this.ca, this.cm);
            }
        }
        this.styles = r.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) r.getSheetsData();
        int sheetNumber = 0;
        while (iter.hasNext()) {

            // check if we need to parse this sheet?
            boolean parse = false;
            if (this.sheets != null) {
                for (int i = 0; i < this.sheets.length; i++) {
                    if (iter.getSheetName().equals(this.sheets[i])) {
                        parse = true;
                        break;
                    }
                }
            } else {
                parse = true;
            }
            // sheet is supposed to be parsed
            if (parse) {

                InputStream rawSheetInputStream = iter.next();
                this.sheetNameList.add(iter.getSheetName());
                InputSource rawSheetInputSource = new InputSource(rawSheetInputStream);
                if (HadoopOfficeReadConfiguration.OPTION_LOWFOOTPRINT_PARSER_SAX
                        .equalsIgnoreCase(this.hocr.getLowFootprintParser())) {
                    this.event = true;
                    LOG.info("Using SAX parser for low footprint Excel parsing");
                    XMLReader sheetParser = SAXHelper.newXMLReader();
                    XSSFEventParser xssfp = new XSSFEventParser(sheetNumber, iter.getSheetName(),
                            this.spreadSheetCellDAOCache);

                    ContentHandler handler = new XSSFSheetXMLHandler(this.styles, iter.getSheetComments(),
                            this.pushSST, xssfp, this.useDataFormatter, false);
                    sheetParser.setContentHandler(handler);
                    sheetParser.parse(rawSheetInputSource);
                    sheetNumber++;
                } else if (HadoopOfficeReadConfiguration.OPTION_LOWFOOTPRINT_PARSER_STAX
                        .equalsIgnoreCase(this.hocr.getLowFootprintParser())) {
                    LOG.info("Using STAX parser for low footprint Excel parsing");
                    this.event = false;
                    this.pullSheetInputList.add(rawSheetInputStream);
                    this.pullSheetNameList.add(iter.getSheetName());
                    // make shared string table available

                    // everything else is in the getNext method
                } else {
                    LOG.error("Unknown XML parser configured for low footprint mode: \""
                            + this.hocr.getLowFootprintParser() + "\"");
                    throw new FormatNotUnderstoodException(
                            "Unknown XML parser configured for low footprint mode: \""
                                    + this.hocr.getLowFootprintParser() + "\"");
                }

            }
        }
    } catch (InvalidFormatException | IOException e) {
        LOG.error(e);
        throw new FormatNotUnderstoodException("Error cannot parse new Excel file (.xlsx)");
    } catch (SAXException e) {
        LOG.error(e);
        throw new FormatNotUnderstoodException(
                "Parsing Excel sheet in .xlsx format failed. Cannot read XML content");
    } catch (ParserConfigurationException e) {
        LOG.error(e);
        throw new FormatNotUnderstoodException(
                "Parsing Excel sheet in .xlsx format failed. Cannot read XML content");
    } catch (XmlException e) {
        LOG.error(e);
        throw new FormatNotUnderstoodException(
                "Parsing Excel sheet in .xlsx format failed. Cannot read XML content");
    }
    // check skipping of additional lines
    for (int i = 0; i < this.hocr.getSkipLines(); i++) {
        this.getNext();
    }
    // check header
    if (this.hocr.getReadHeader()) {

        LOG.debug("Reading header...");
        Object[] firstRow = this.getNext();
        if (firstRow != null) {
            this.header = new String[firstRow.length];
            for (int i = 0; i < firstRow.length; i++) {
                if ((firstRow[i] != null)
                        && (!"".equals(((SpreadSheetCellDAO) firstRow[i]).getFormattedValue()))) {
                    this.header[i] = ((SpreadSheetCellDAO) firstRow[i]).getFormattedValue();
                }
            }
            this.header = MSExcelParser.sanitizeHeaders(this.header, this.hocr.getColumnNameRegex(),
                    this.hocr.getColumnNameReplace());
        } else {
            this.header = new String[0];
        }
    }
    this.headerParsed = true;

}