List of usage examples for org.apache.commons.io.input XmlStreamReader getEncoding
public String getEncoding()
From source file:cn.wanghaomiao.maven.plugin.seimi.packaging.AbstractWarPackagingTask.java
/** * Get the encoding from an XML-file.// www. j a v a 2s . co m * * @param webXml the XML-file * @return The encoding of the XML-file, or UTF-8 if it's not specified in the file * @throws java.io.IOException if an error occurred while reading the file */ protected String getEncoding(File webXml) throws IOException { XmlStreamReader xmlReader = new XmlStreamReader(webXml); try { return xmlReader.getEncoding(); } finally { IOUtil.close(xmlReader); } }
From source file:net.yacy.document.parser.GenericXMLParser.java
@Override public Document[] parse(final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final int timezoneOffset, final InputStream source) throws Failure { /* Limit the size of the in-memory buffer to at most 25% of the available memory : * because some room is needed, and before being garbage collected the buffer will be converted to a String, then to a byte array. * Eventual stricter limits should be handled by the caller (see for example crawler.[protocol].maxFileSize configuration setting). */ final long availableMemory = MemoryControl.available(); final long maxBytes = (long) (availableMemory * 0.25); final int maxChars; if ((maxBytes / Character.BYTES) > Integer.MAX_VALUE) { maxChars = Integer.MAX_VALUE; } else {//from w w w .j a v a 2s . com maxChars = ((int) maxBytes) / Character.BYTES; } try (/* Automatically closed by this try-with-resources statement*/ CharBuffer writer = new CharBuffer( maxChars);) { /* Use commons-io XmlStreamReader advanced rules to help with charset detection when source contains no BOM or XML declaration * (detection algorithm notably also include ContentType transmitted by HTTP headers, here eventually present as mimeType and charset parameters), */ final XmlStreamReader reader = new XmlStreamReader(source, mimeType, true, charset); final InputSource saxSource = new InputSource(reader); final String detectedCharset = reader.getEncoding(); final List<AnchorURL> detectedURLs = new ArrayList<>(); final GenericXMLContentHandler saxHandler = new GenericXMLContentHandler(writer, detectedURLs); final SAXParser saxParser = getParser(); saxParser.parse(saxSource, saxHandler); if (writer.isOverflow()) { throw new Parser.Failure("Not enough Memory available for generic the XML parser : " + Formatter.bytesToString(availableMemory), location); } /* create the parsed document */ Document[] docs = null; final byte[] contentBytes = UTF8.getBytes(writer.toString()); docs = new Document[] { new Document(location, mimeType, detectedCharset, this, null, null, null, null, "", null, null, 0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) }; return docs; } catch (Parser.Failure e) { throw e; } catch (final Exception e) { throw new Parser.Failure("Unexpected error while parsing XML file. " + e.getMessage(), location); } }
From source file:net.yacy.document.parser.GenericXMLParser.java
/** * {@inheritDoc}/*from ww w . ja v a 2 s . c o m*/ * @param maxBytes the maximum number of content bytes to process. Be careful with to small values : * a Failure exception can eventually be thrown when maxBytes value is so small that the parser can even not fill its buffers on input stream and parse the document declaration. */ @Override public Document[] parseWithLimits(DigestURL location, String mimeType, String charsetName, VocabularyScraper scraper, int timezoneOffset, InputStream source, int maxLinks, long maxBytes) throws Failure, InterruptedException, UnsupportedOperationException { /* Limit the size of the in-memory buffer to at most 25% of the available memory : * because some room is needed, and before being garbage collected the buffer will be converted to a String, then to a byte array. * Eventual stricter limits should be handled by the caller (see for example crawler.[protocol].maxFileSize configuration setting). */ final long availableMemory = MemoryControl.available(); final long maxTextBytes = (long) (availableMemory * 0.25); final int maxChars; if ((maxTextBytes / Character.BYTES) > Integer.MAX_VALUE) { maxChars = Integer.MAX_VALUE; } else { maxChars = ((int) maxTextBytes) / Character.BYTES; } try (/* Automatically closed by this try-with-resources statement*/ CharBuffer writer = new CharBuffer( maxChars);) { final Set<AnchorURL> detectedURLs = new HashSet<>(); final GenericXMLContentHandler saxHandler = new GenericXMLContentHandler(writer, detectedURLs, maxLinks); StrictLimitInputStream limitedSource = new StrictLimitInputStream(source, maxBytes); /* Use commons-io XmlStreamReader advanced rules to help with charset detection when source contains no BOM or XML declaration * (detection algorithm notably also include ContentType transmitted by HTTP headers, here eventually present as mimeType and charset parameters), */ final XmlStreamReader reader = new XmlStreamReader(limitedSource, mimeType, true, charsetName); final InputSource saxSource = new InputSource(reader); final String detectedCharset = reader.getEncoding(); final SAXParser saxParser = getParser(); boolean limitExceeded = false; try { saxParser.parse(saxSource, saxHandler); } catch (SAXException e) { if (!(e.getCause() instanceof SizeLimitExceededException)) { /* Only transmit to upper layer exceptions that are not caused by the maxLinks limit being reached */ throw e; } limitExceeded = true; } catch (StreamLimitException e) { limitExceeded = true; } if (writer.isOverflow()) { throw new Parser.Failure("Not enough Memory available for generic the XML parser : " + Formatter.bytesToString(availableMemory), location); } /* Create the parsed document with eventually only partial part of the text and links */ final byte[] contentBytes = UTF8.getBytes(writer.toString()); Document[] docs = new Document[] { new Document(location, mimeType, detectedCharset, this, null, null, null, null, "", null, null, 0.0d, 0.0d, contentBytes, detectedURLs, null, null, false, new Date()) }; docs[0].setPartiallyParsed(limitExceeded); return docs; } catch (final Exception e) { throw new Parser.Failure("Unexpected error while parsing XML file. " + e.getMessage(), location); } }
From source file:org.apache.maven.plugin.acr.AcrMojo.java
/** * Get the encoding from an XML-file.//from w w w . ja v a 2 s. com * * @param xmlFile the XML-file * @return The encoding of the XML-file, or UTF-8 if it's not specified in the file * @throws IOException if an error occurred while reading the file */ private String getEncoding(File xmlFile) throws IOException { XmlStreamReader xmlReader = null; try { xmlReader = new XmlStreamReader(xmlFile); return xmlReader.getEncoding(); } finally { IOUtils.closeQuietly(xmlReader); } }
From source file:org.apache.maven.plugin.changes.ChangesMojo.java
/** * Parses specified changes.xml file. It also makes filtering if needed. If specified file doesn't exist * it will log warning and return <code>null</code>. * * @param changesXml changes xml file to parse * @param project maven project to parse changes for * @param additionalProperties additional properties used for filtering * @return parsed <code>ChangesXML</code> instance or null if file doesn't exist * @throws MavenReportException if any errors occurs while parsing *//*w ww.j a v a 2 s .co m*/ private ChangesXML getChangesFromFile(File changesXml, MavenProject project, Properties additionalProperties) throws MavenReportException { if (!changesXml.exists()) { getLog().warn("changes.xml file " + changesXml.getAbsolutePath() + " does not exist."); return null; } if (filteringChanges) { if (!filteredOutputDirectory.exists()) { filteredOutputDirectory.mkdirs(); } XmlStreamReader xmlStreamReader = null; try { // so we get encoding from the file itself xmlStreamReader = new XmlStreamReader(changesXml); String encoding = xmlStreamReader.getEncoding(); File resultFile = new File(filteredOutputDirectory, project.getGroupId() + "." + project.getArtifactId() + "-changes.xml"); final MavenFileFilterRequest mavenFileFilterRequest = new MavenFileFilterRequest(changesXml, resultFile, true, project, Collections.EMPTY_LIST, false, encoding, session, additionalProperties); mavenFileFilter.copyFile(mavenFileFilterRequest); changesXml = resultFile; } catch (IOException e) { throw new MavenReportException("Exception during filtering changes file : " + e.getMessage(), e); } catch (MavenFilteringException e) { throw new MavenReportException("Exception during filtering changes file : " + e.getMessage(), e); } finally { if (xmlStreamReader != null) { IOUtil.close(xmlStreamReader); } } } return new ChangesXML(changesXml, getLog()); }
From source file:org.apache.maven.plugins.pdf.DocumentModelBuilder.java
/** * Extract the encoding./* w ww . j a v a2 s . co m*/ * * @param project the MavenProject to extract the encoding name from. * @return the project encoding if defined, or UTF-8 otherwise, or null if project is null. */ private static String getProjectModelEncoding(MavenProject project) { if (project == null) { return null; } String encoding = project.getModel().getModelEncoding(); // Workaround for MNG-4289 XmlStreamReader reader = null; try { reader = new XmlStreamReader(project.getFile()); encoding = reader.getEncoding(); } catch (IOException e) { // nop } finally { IOUtil.close(reader); } if (StringUtils.isEmpty(encoding)) { return "UTF-8"; } return encoding; }
From source file:org.apache.maven.plugins.pdf.PdfMojo.java
/** * @return the DecorationModel instance from <code>site.xml</code> * @throws MojoExecutionException if any *//* ww w . j a v a 2s .co m*/ private DecorationModel getDefaultDecorationModel() throws MojoExecutionException { if (this.defaultDecorationModel == null) { final Locale locale = getDefaultLocale(); final File basedir = project.getBasedir(); final String relativePath = siteTool.getRelativePath(siteDirectory.getAbsolutePath(), basedir.getAbsolutePath()); final File descriptorFile = siteTool.getSiteDescriptorFromBasedir(relativePath, basedir, locale); DecorationModel decoration = null; if (descriptorFile.exists()) { XmlStreamReader reader = null; try { reader = new XmlStreamReader(descriptorFile); String enc = reader.getEncoding(); String siteDescriptorContent = IOUtil.toString(reader); siteDescriptorContent = siteTool.getInterpolatedSiteDescriptorContent( new HashMap<String, String>(2), project, siteDescriptorContent, enc, enc); decoration = new DecorationXpp3Reader().read(new StringReader(siteDescriptorContent)); } catch (XmlPullParserException e) { throw new MojoExecutionException("Error parsing site descriptor", e); } catch (IOException e) { throw new MojoExecutionException("Error reading site descriptor", e); } catch (SiteToolException e) { throw new MojoExecutionException("Error when interpoling site descriptor", e); } finally { IOUtil.close(reader); } } this.defaultDecorationModel = decoration; } return this.defaultDecorationModel; }
From source file:org.cee.net.impl.XmlStreamReaderFactory.java
@Override public ReaderSource createReader(InputStream inputStream, String contentTypeHint, String characterEncodingHint) throws IOException { if (characterEncodingHint == null) { characterEncodingHint = "UTF-8"; }//from w ww. j a v a 2 s .c o m XmlStreamReader reader = new XmlStreamReader(inputStream, contentTypeHint, true, characterEncodingHint); return new ReaderSource(reader, reader.getEncoding()); }