Example usage for org.apache.commons.io.input XmlStreamReader getEncoding

Introduction

In this page you can find the example usage for org.apache.commons.io.input XmlStreamReader getEncoding.

Prototype

public String getEncoding()

Source Link

Document

Returns the charset encoding of the XmlStreamReader.

Usage

From source file:cn.wanghaomiao.maven.plugin.seimi.packaging.AbstractWarPackagingTask.java

/**
 * Get the encoding from an XML-file.// www. j  a  v a  2s .  co  m
 *
 * @param webXml the XML-file
 * @return The encoding of the XML-file, or UTF-8 if it's not specified in the file
 * @throws java.io.IOException if an error occurred while reading the file
 */
protected String getEncoding(File webXml) throws IOException {
    XmlStreamReader xmlReader = new XmlStreamReader(webXml);
    try {
        return xmlReader.getEncoding();
    } finally {
        IOUtil.close(xmlReader);
    }
}

From source file:net.yacy.document.parser.GenericXMLParser.java

@Override
public Document[] parse(final DigestURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, final InputStream source) throws Failure {

    /* Limit the size of the in-memory buffer to at most 25% of the available memory :
     * because some room is needed, and before being garbage collected the buffer will be converted to a String, then to a byte array. 
     * Eventual stricter limits should be handled by the caller (see for example crawler.[protocol].maxFileSize configuration setting). */
    final long availableMemory = MemoryControl.available();
    final long maxBytes = (long) (availableMemory * 0.25);
    final int maxChars;
    if ((maxBytes / Character.BYTES) > Integer.MAX_VALUE) {
        maxChars = Integer.MAX_VALUE;
    } else {//from   w w w  .j a  v a 2s  .  com
        maxChars = ((int) maxBytes) / Character.BYTES;
    }

    try (/* Automatically closed by this try-with-resources statement*/ CharBuffer writer = new CharBuffer(
            maxChars);) {

        /* Use commons-io XmlStreamReader advanced rules to help with charset detection when source contains no BOM or XML declaration
         * (detection algorithm notably also include ContentType transmitted by HTTP headers, here eventually present as mimeType and charset parameters),  */
        final XmlStreamReader reader = new XmlStreamReader(source, mimeType, true, charset);
        final InputSource saxSource = new InputSource(reader);
        final String detectedCharset = reader.getEncoding();

        final List<AnchorURL> detectedURLs = new ArrayList<>();

        final GenericXMLContentHandler saxHandler = new GenericXMLContentHandler(writer, detectedURLs);
        final SAXParser saxParser = getParser();
        saxParser.parse(saxSource, saxHandler);

        if (writer.isOverflow()) {
            throw new Parser.Failure("Not enough Memory available for generic the XML parser : "
                    + Formatter.bytesToString(availableMemory), location);
        }

        /* create the parsed document */
        Document[] docs = null;
        final byte[] contentBytes = UTF8.getBytes(writer.toString());
        docs = new Document[] { new Document(location, mimeType, detectedCharset, this, null, null, null, null,
                "", null, null, 0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) };
        return docs;
    } catch (Parser.Failure e) {
        throw e;
    } catch (final Exception e) {
        throw new Parser.Failure("Unexpected error while parsing XML file. " + e.getMessage(), location);
    }

}

From source file:net.yacy.document.parser.GenericXMLParser.java

/**
 * {@inheritDoc}/*from   ww  w  . ja v  a 2  s  . c o  m*/
 * @param maxBytes the maximum number of content bytes to process. Be careful with to small values : 
 *    a Failure exception can eventually be thrown when maxBytes value is so small that the parser can even not fill its buffers on input stream and parse the document declaration.
 */
@Override
public Document[] parseWithLimits(DigestURL location, String mimeType, String charsetName,
        VocabularyScraper scraper, int timezoneOffset, InputStream source, int maxLinks, long maxBytes)
        throws Failure, InterruptedException, UnsupportedOperationException {
    /* Limit the size of the in-memory buffer to at most 25% of the available memory :
     * because some room is needed, and before being garbage collected the buffer will be converted to a String, then to a byte array. 
     * Eventual stricter limits should be handled by the caller (see for example crawler.[protocol].maxFileSize configuration setting). */
    final long availableMemory = MemoryControl.available();
    final long maxTextBytes = (long) (availableMemory * 0.25);
    final int maxChars;
    if ((maxTextBytes / Character.BYTES) > Integer.MAX_VALUE) {
        maxChars = Integer.MAX_VALUE;
    } else {
        maxChars = ((int) maxTextBytes) / Character.BYTES;
    }

    try (/* Automatically closed by this try-with-resources statement*/ CharBuffer writer = new CharBuffer(
            maxChars);) {

        final Set<AnchorURL> detectedURLs = new HashSet<>();
        final GenericXMLContentHandler saxHandler = new GenericXMLContentHandler(writer, detectedURLs,
                maxLinks);

        StrictLimitInputStream limitedSource = new StrictLimitInputStream(source, maxBytes);

        /* Use commons-io XmlStreamReader advanced rules to help with charset detection when source contains no BOM or XML declaration
         * (detection algorithm notably also include ContentType transmitted by HTTP headers, here eventually present as mimeType and charset parameters),  */
        final XmlStreamReader reader = new XmlStreamReader(limitedSource, mimeType, true, charsetName);
        final InputSource saxSource = new InputSource(reader);
        final String detectedCharset = reader.getEncoding();

        final SAXParser saxParser = getParser();
        boolean limitExceeded = false;
        try {
            saxParser.parse(saxSource, saxHandler);
        } catch (SAXException e) {
            if (!(e.getCause() instanceof SizeLimitExceededException)) {
                /* Only transmit to upper layer exceptions that are not caused by the maxLinks limit being reached */
                throw e;
            }
            limitExceeded = true;
        } catch (StreamLimitException e) {
            limitExceeded = true;
        }

        if (writer.isOverflow()) {
            throw new Parser.Failure("Not enough Memory available for generic the XML parser : "
                    + Formatter.bytesToString(availableMemory), location);
        }

        /* Create the parsed document with eventually only partial part of the text and links */
        final byte[] contentBytes = UTF8.getBytes(writer.toString());
        Document[] docs = new Document[] {
                new Document(location, mimeType, detectedCharset, this, null, null, null, null, "", null, null,
                        0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) };
        docs[0].setPartiallyParsed(limitExceeded);
        return docs;
    } catch (final Exception e) {
        throw new Parser.Failure("Unexpected error while parsing XML file. " + e.getMessage(), location);
    }
}

From source file:org.apache.maven.plugin.acr.AcrMojo.java

/**
 * Get the encoding from an XML-file.//from  w  w w  . ja  v  a  2  s.  com
 *
 * @param xmlFile the XML-file
 * @return The encoding of the XML-file, or UTF-8 if it's not specified in the file
 * @throws IOException if an error occurred while reading the file
 */
private String getEncoding(File xmlFile) throws IOException {
    XmlStreamReader xmlReader = null;
    try {
        xmlReader = new XmlStreamReader(xmlFile);
        return xmlReader.getEncoding();
    } finally {
        IOUtils.closeQuietly(xmlReader);
    }
}

From source file:org.apache.maven.plugin.changes.ChangesMojo.java

/**
 * Parses specified changes.xml file. It also makes filtering if needed. If specified file doesn't exist
 * it will log warning and return <code>null</code>.
 *
 * @param changesXml changes xml file to parse
 * @param project maven project to parse changes for
 * @param additionalProperties additional properties used for filtering
 * @return parsed <code>ChangesXML</code> instance or null if file doesn't exist
 * @throws MavenReportException if any errors occurs while parsing
 *//*w ww.j  a v a  2 s .co  m*/
private ChangesXML getChangesFromFile(File changesXml, MavenProject project, Properties additionalProperties)
        throws MavenReportException {
    if (!changesXml.exists()) {
        getLog().warn("changes.xml file " + changesXml.getAbsolutePath() + " does not exist.");
        return null;
    }

    if (filteringChanges) {
        if (!filteredOutputDirectory.exists()) {
            filteredOutputDirectory.mkdirs();
        }
        XmlStreamReader xmlStreamReader = null;
        try {
            // so we get encoding from the file itself
            xmlStreamReader = new XmlStreamReader(changesXml);
            String encoding = xmlStreamReader.getEncoding();
            File resultFile = new File(filteredOutputDirectory,
                    project.getGroupId() + "." + project.getArtifactId() + "-changes.xml");

            final MavenFileFilterRequest mavenFileFilterRequest = new MavenFileFilterRequest(changesXml,
                    resultFile, true, project, Collections.EMPTY_LIST, false, encoding, session,
                    additionalProperties);
            mavenFileFilter.copyFile(mavenFileFilterRequest);
            changesXml = resultFile;
        } catch (IOException e) {
            throw new MavenReportException("Exception during filtering changes file : " + e.getMessage(), e);
        } catch (MavenFilteringException e) {
            throw new MavenReportException("Exception during filtering changes file : " + e.getMessage(), e);
        } finally {
            if (xmlStreamReader != null) {
                IOUtil.close(xmlStreamReader);
            }
        }

    }
    return new ChangesXML(changesXml, getLog());
}

From source file:org.apache.maven.plugins.pdf.DocumentModelBuilder.java

/**
 * Extract the encoding./* w ww  . j a  v a2  s  . co m*/
 *
 * @param project the MavenProject to extract the encoding name from.
 * @return the project encoding if defined, or UTF-8 otherwise, or null if project is null.
 */
private static String getProjectModelEncoding(MavenProject project) {
    if (project == null) {
        return null;
    }

    String encoding = project.getModel().getModelEncoding();
    // Workaround for MNG-4289
    XmlStreamReader reader = null;
    try {
        reader = new XmlStreamReader(project.getFile());
        encoding = reader.getEncoding();
    } catch (IOException e) {
        // nop
    } finally {
        IOUtil.close(reader);
    }

    if (StringUtils.isEmpty(encoding)) {
        return "UTF-8";
    }

    return encoding;
}

From source file:org.apache.maven.plugins.pdf.PdfMojo.java

/**
 * @return the DecorationModel instance from <code>site.xml</code>
 * @throws MojoExecutionException if any
 *//*  ww w  . j a v a  2s .co m*/
private DecorationModel getDefaultDecorationModel() throws MojoExecutionException {
    if (this.defaultDecorationModel == null) {
        final Locale locale = getDefaultLocale();

        final File basedir = project.getBasedir();
        final String relativePath = siteTool.getRelativePath(siteDirectory.getAbsolutePath(),
                basedir.getAbsolutePath());

        final File descriptorFile = siteTool.getSiteDescriptorFromBasedir(relativePath, basedir, locale);
        DecorationModel decoration = null;

        if (descriptorFile.exists()) {
            XmlStreamReader reader = null;
            try {
                reader = new XmlStreamReader(descriptorFile);
                String enc = reader.getEncoding();

                String siteDescriptorContent = IOUtil.toString(reader);
                siteDescriptorContent = siteTool.getInterpolatedSiteDescriptorContent(
                        new HashMap<String, String>(2), project, siteDescriptorContent, enc, enc);

                decoration = new DecorationXpp3Reader().read(new StringReader(siteDescriptorContent));
            } catch (XmlPullParserException e) {
                throw new MojoExecutionException("Error parsing site descriptor", e);
            } catch (IOException e) {
                throw new MojoExecutionException("Error reading site descriptor", e);
            } catch (SiteToolException e) {
                throw new MojoExecutionException("Error when interpoling site descriptor", e);
            } finally {
                IOUtil.close(reader);
            }
        }

        this.defaultDecorationModel = decoration;
    }

    return this.defaultDecorationModel;
}

From source file:org.cee.net.impl.XmlStreamReaderFactory.java

@Override
public ReaderSource createReader(InputStream inputStream, String contentTypeHint, String characterEncodingHint)
        throws IOException {
    if (characterEncodingHint == null) {
        characterEncodingHint = "UTF-8";
    }//from  w ww.  j  a  v a  2  s .c  o m
    XmlStreamReader reader = new XmlStreamReader(inputStream, contentTypeHint, true, characterEncodingHint);
    return new ReaderSource(reader, reader.getEncoding());
}