List of usage examples for org.xml.sax InputSource setEncoding
public void setEncoding(String encoding)
From source file:org.apache.marmotta.ucuenca.wk.provider.gs.GoogleScholarProvider.java
@Override public List<String> parseResponse(String resource, String requestUrl, Model triples, InputStream input, String contentType) throws DataRetrievalException { log.debug("Request Successful to {0}", requestUrl); final GSXMLHandler gsXMLHandler = new GSXMLHandler(); gsXMLHandler.clearGSresultList();/*w ww .j a va2 s . c om*/ try { XMLReader xr = XMLReaderFactory.createXMLReader("org.ccil.cowan.tagsoup.Parser"); xr.setContentHandler(gsXMLHandler); InputSource gsxml = new InputSource(input); gsxml.setEncoding("iso-8859-1"); xr.parse(gsxml); final Set<GSresult> gsresultlist = gsXMLHandler.getGSresultList(); Gson gson = new Gson(); JsonArray json = new JsonArray(); for (GSresult d : gsresultlist) { json.add(gson.toJsonTree(d).getAsJsonObject()); } JSONtoRDF parser = new JSONtoRDF(resource, MAPPINGSCHEMA, json, triples); try { parser.parse(); } catch (Exception e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } int numPages = (int) ((double) (gsXMLHandler.getNumResults() / 10)) + 1; int pagesLoaded = 1; Model model = null; while (pagesLoaded < numPages) { String pagenumquery = Integer.toString(pagesLoaded * 10); String moreDataUrl = String.format(API, pagenumquery, stringSearch, authorSearch, advancedSearch); ClientConfiguration conf = new ClientConfiguration(); LDClient ldClient = new LDClient(conf); ClientResponse response = ldClient.retrieveResource(moreDataUrl); Model pageModel = response.getData(); if (model == null) { model = pageModel; } else { model.addAll(pageModel); } pagesLoaded++; } triples.addAll(model); } catch (SAXException | IOException e) { throw new DataRetrievalException("I/O exception while retrieving resource: " + requestUrl, e); } // try { // List<String> candidates = new ArrayList<String>(); // ValueFactory factory = ValueFactoryImpl.getInstance(); // final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); // for(Element element: queryElements(doc, "/result/hits/hit/info/url")) { // String candidate = element.getText(); // triples.add(factory.createStatement(factory.createURI( resource ), FOAF.member, factory.createURI( candidate ) )); // candidates.add(candidate); // } // ClientConfiguration conf = new ClientConfiguration(); // LDClient ldClient = new LDClient(conf); // if(!candidates.isEmpty()) { // Model candidateModel = null; // for(String author: candidates) { // ClientResponse response = ldClient.retrieveResource(author); // Model authorModel = response.getData(); // if(candidateModel == null) { // candidateModel = authorModel; // } else { // candidateModel.addAll(authorModel); // } // } // triples.addAll(candidateModel); // } // }catch (IOException e) { // throw new DataRetrievalException("I/O error while parsing HTML response", e); // }catch (JDOMException e) { // throw new DataRetrievalException("could not parse XML response. It is not in proper XML format", e); // } return Collections.emptyList(); }
From source file:org.apache.myfaces.config.impl.FacesConfigEntityResolver.java
public InputSource resolveEntity(String publicId, String systemId) throws IOException { InputStream stream;/*www . j a v a2s. co m*/ if (systemId.equals(FACES_CONFIG_1_0_DTD_SYSTEM_ID)) { stream = ClassUtils.getResourceAsStream(FACES_CONFIG_1_0_DTD_RESOURCE); } else if (systemId.equals(FACES_CONFIG_1_1_DTD_SYSTEM_ID)) { stream = ClassUtils.getResourceAsStream(FACES_CONFIG_1_1_DTD_RESOURCE); } else if (systemId.startsWith("jar:")) { URL url = new URL(systemId); JarURLConnection conn = (JarURLConnection) url.openConnection(); JarEntry jarEntry = conn.getJarEntry(); if (jarEntry == null) { log.fatal("JAR entry '" + systemId + "' not found."); } //_jarFile.getInputStream(jarEntry); stream = conn.getJarFile().getInputStream(jarEntry); } else { if (_externalContext == null) { stream = ClassUtils.getResourceAsStream(systemId); } else { if (systemId.startsWith("file:")) { systemId = systemId.substring(7); // remove file:// } stream = _externalContext.getResourceAsStream(systemId); } } if (stream == null) { return null; } InputSource is = new InputSource(stream); is.setPublicId(publicId); is.setSystemId(systemId); is.setEncoding("ISO-8859-1"); return is; }
From source file:org.apache.nutch.parse.feed.FeedParser.java
/** * Parses the given feed and extracts out and parsers all linked items within * the feed, using the underlying ROME feed parsing library. * /* www . j a v a 2 s . co m*/ * @param content * A {@link Content} object representing the feed that is being * parsed by this {@link Parser}. * * @return A {@link ParseResult} containing all {@link Parse}d feeds that * were present in the feed file that this {@link Parser} dealt with. * */ public ParseResult getParse(Content content) { SyndFeed feed = null; ParseResult parseResult = new ParseResult(content.getUrl()); EncodingDetector detector = new EncodingDetector(conf); detector.autoDetectClues(content, true); String encoding = detector.guessEncoding(content, defaultEncoding); try { InputSource input = new InputSource(new ByteArrayInputStream(content.getContent())); input.setEncoding(encoding); SyndFeedInput feedInput = new SyndFeedInput(); feed = feedInput.build(input); } catch (Exception e) { // return empty parse LOG.warn( "Parse failed: url: " + content.getUrl() + ", exception: " + StringUtils.stringifyException(e)); return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } List entries = feed.getEntries(); String feedLink = feed.getLink(); try { feedLink = normalizers.normalize(feedLink, URLNormalizers.SCOPE_OUTLINK); if (feedLink != null) feedLink = filters.filter(feedLink); } catch (Exception e) { feedLink = null; } for (Iterator i = entries.iterator(); i.hasNext();) { SyndEntry entry = (SyndEntry) i.next(); addToMap(parseResult, feed, feedLink, entry, content); } String feedDesc = stripTags(feed.getDescriptionEx()); String feedTitle = stripTags(feed.getTitleEx()); parseResult.put(content.getUrl(), new ParseText(feedDesc), new ParseData( new ParseStatus(ParseStatus.SUCCESS), feedTitle, new Outlink[0], content.getMetadata())); return parseResult; }
From source file:org.apache.nutch.parse.html.HtmlParser.java
public ParseResult getParse(Content content) { HTMLMetaTags metaTags = new HTMLMetaTags(); URL base;/*from w w w . j a v a 2s . com*/ try { base = new URL(content.getBaseUrl()); } catch (MalformedURLException e) { return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } String text = ""; String title = ""; Outlink[] outlinks = new Outlink[0]; Metadata metadata = new Metadata(); // parse the content DocumentFragment root; try { byte[] contentInOctets = content.getContent(); InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets)); EncodingDetector detector = new EncodingDetector(conf); detector.autoDetectClues(content, true); detector.addClue(sniffCharacterEncoding(contentInOctets), "sniffed"); String encoding = detector.guessEncoding(content, defaultCharEncoding); metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding); metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding); input.setEncoding(encoding); if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); } root = parse(input); } catch (IOException e) { return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } catch (DOMException e) { return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } catch (SAXException e) { return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } catch (Exception e) { e.printStackTrace(LogUtil.getWarnStream(LOG)); return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf()); } // get meta directives HTMLMetaProcessor.getMetaTags(metaTags, root, base); if (LOG.isTraceEnabled()) { LOG.trace("Meta tags for " + base + ": " + metaTags.toString()); } // check meta directives if (!metaTags.getNoIndex()) { // okay to index StringBuffer sb = new StringBuffer(); if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); } utils.getText(sb, root); // extract text text = sb.toString(); sb.setLength(0); if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); } utils.getTitle(sb, root); // extract title title = sb.toString().trim(); } if (!metaTags.getNoFollow()) { // okay to follow links ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks URL baseTag = utils.getBase(root); if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); } utils.getOutlinks(baseTag != null ? baseTag : base, l, root); outlinks = l.toArray(new Outlink[l.size()]); if (LOG.isTraceEnabled()) { LOG.trace("found " + outlinks.length + " outlinks in " + content.getUrl()); } } ParseStatus status = new ParseStatus(ParseStatus.SUCCESS); if (metaTags.getRefresh()) { status.setMinorCode(ParseStatus.SUCCESS_REDIRECT); status.setArgs(new String[] { metaTags.getRefreshHref().toString(), Integer.toString(metaTags.getRefreshTime()) }); } ParseData parseData = new ParseData(status, title, outlinks, content.getMetadata(), metadata); ParseResult parseResult = ParseResult.createParseResult(content.getUrl(), new ParseImpl(text, parseData)); // run filters on parse ParseResult filteredParse = this.htmlParseFilters.filter(content, parseResult, metaTags, root); if (metaTags.getNoCache()) { // not okay to cache for (Map.Entry<org.apache.hadoop.io.Text, Parse> entry : filteredParse) entry.getValue().getData().getParseMeta().set(Nutch.CACHING_FORBIDDEN_KEY, cachingPolicy); } return filteredParse; }
From source file:org.apache.nutch.store.readable.StoreReadable.java
public Parse getParse(String url, WebPage page) { HTMLMetaTags metaTags = new HTMLMetaTags(); System.out.println("[STORE-READABLE]getParse-------------------------------------------------------------"); String baseUrl = TableUtil.toString(page.getBaseUrl()); URL base;/*from w w w .j a v a 2 s. c om*/ try { base = new URL(baseUrl); } catch (MalformedURLException e) { return ParseStatusUtils.getEmptyParse(e, getConf()); } String text = ""; String title = ""; Outlink[] outlinks = new Outlink[0]; // parse the content DocumentFragment root; try { ByteBuffer contentInOctets = page.getContent(); InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets.array(), contentInOctets.arrayOffset() + contentInOctets.position(), contentInOctets.remaining())); EncodingDetector detector = new EncodingDetector(conf); detector.autoDetectClues(page, true); detector.addClue(sniffCharacterEncoding(contentInOctets), "sniffed"); String encoding = detector.guessEncoding(page, defaultCharEncoding); page.getMetadata().put(new Utf8(Metadata.ORIGINAL_CHAR_ENCODING), ByteBuffer.wrap(Bytes.toBytes(encoding))); page.getMetadata().put(new Utf8(Metadata.CHAR_ENCODING_FOR_CONVERSION), ByteBuffer.wrap(Bytes.toBytes(encoding))); input.setEncoding(encoding); if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); } root = parse(input); } catch (IOException e) { LOG.error("Failed with the following IOException: ", e); return ParseStatusUtils.getEmptyParse(e, getConf()); } catch (DOMException e) { LOG.error("Failed with the following DOMException: ", e); return ParseStatusUtils.getEmptyParse(e, getConf()); } catch (SAXException e) { LOG.error("Failed with the following SAXException: ", e); return ParseStatusUtils.getEmptyParse(e, getConf()); } catch (Exception e) { LOG.error("Failed with the following Exception: ", e); return ParseStatusUtils.getEmptyParse(e, getConf()); } // get meta directives HTMLMetaProcessor.getMetaTags(metaTags, root, base); if (LOG.isTraceEnabled()) { LOG.trace("Meta tags for " + base + ": " + metaTags.toString()); } // check meta directives if (!metaTags.getNoIndex()) { // okay to index StringBuilder sb = new StringBuilder(); if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); } utils.getText(sb, root); // extract text text = sb.toString(); sb.setLength(0); if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); } utils.getTitle(sb, root); // extract title title = sb.toString().trim(); } if (!metaTags.getNoFollow()) { // okay to follow links ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks URL baseTag = utils.getBase(root); if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); } utils.getOutlinks(baseTag != null ? baseTag : base, l, root); outlinks = l.toArray(new Outlink[l.size()]); if (LOG.isTraceEnabled()) { LOG.trace("found " + outlinks.length + " outlinks in " + url); } } ParseStatus status = ParseStatus.newBuilder().build(); status.setMajorCode((int) ParseStatusCodes.SUCCESS); if (metaTags.getRefresh()) { status.setMinorCode((int) ParseStatusCodes.SUCCESS_REDIRECT); status.getArgs().add(new Utf8(metaTags.getRefreshHref().toString())); status.getArgs().add(new Utf8(Integer.toString(metaTags.getRefreshTime()))); } String strJo = addJsonToPage(url, page); // storeJsonToSchema(url, page ,strJo); page.setReadable(new Utf8(strJo)); Parse parse = new Parse(text, title, outlinks, status, strJo); parse = htmlParseFilters.filter(url, page, parse, metaTags, root); if (metaTags.getNoCache()) { // not okay to cache page.getMetadata().put(new Utf8(Nutch.CACHING_FORBIDDEN_KEY), ByteBuffer.wrap(Bytes.toBytes(cachingPolicy))); } parse.setJsonRead(strJo); return parse; }
From source file:org.apache.nutchbase.parse.html.HtmlParserHbase.java
public ParseHbase getParse(String url, RowPart row) { HTMLMetaTags metaTags = new HTMLMetaTags(); String baseUrl = row.getBaseUrl(); URL base;// w ww . ja va 2s. co m try { base = new URL(baseUrl); } catch (MalformedURLException e) { return new ParseStatus(e).getEmptyParseHbase(getConf()); } String text = ""; String title = ""; Outlink[] outlinks = new Outlink[0]; Metadata metadata = new Metadata(); // parse the content DocumentFragment root; try { byte[] contentInOctets = row.getContent(); InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets)); EncodingDetector detector = new EncodingDetector(conf); detector.autoDetectClues(row, true); detector.addClue(sniffCharacterEncoding(contentInOctets), "sniffed"); String encoding = detector.guessEncoding(row, defaultCharEncoding); metadata.set(Metadata.ORIGINAL_CHAR_ENCODING, encoding); metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding); input.setEncoding(encoding); if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); } root = parse(input); } catch (IOException e) { return new ParseStatus(e).getEmptyParseHbase(getConf()); } catch (DOMException e) { return new ParseStatus(e).getEmptyParseHbase(getConf()); } catch (SAXException e) { return new ParseStatus(e).getEmptyParseHbase(getConf()); } catch (Exception e) { e.printStackTrace(LogUtil.getWarnStream(LOG)); return new ParseStatus(e).getEmptyParseHbase(getConf()); } // get meta directives HTMLMetaProcessor.getMetaTags(metaTags, root, base); if (LOG.isTraceEnabled()) { LOG.trace("Meta tags for " + base + ": " + metaTags.toString()); } // check meta directives if (!metaTags.getNoIndex()) { // okay to index StringBuffer sb = new StringBuffer(); if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); } utils.getText(sb, root); // extract text text = sb.toString(); sb.setLength(0); if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); } utils.getTitle(sb, root); // extract title title = sb.toString().trim(); } if (!metaTags.getNoFollow() || ignoreNoFollow) { // okay to follow links ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks URL baseTag = utils.getBase(root); if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); } utils.getOutlinks(baseTag != null ? baseTag : base, l, root); outlinks = l.toArray(new Outlink[l.size()]); if (LOG.isTraceEnabled()) { LOG.trace("found " + outlinks.length + " outlinks in " + url); } } ParseStatus status = new ParseStatus(ParseStatus.SUCCESS); if (metaTags.getRefresh()) { status.setMinorCode(ParseStatus.SUCCESS_REDIRECT); status.setArgs(new String[] { metaTags.getRefreshHref().toString(), Integer.toString(metaTags.getRefreshTime()) }); } ParseHbase parse = new ParseHbase(text, title, outlinks, status); parse = htmlParseFilters.filter(url, row, parse, metaTags, root); if (metaTags.getNoCache()) { // not okay to cache row.putMeta(Nutch.CACHING_FORBIDDEN_KEY, Bytes.toBytes(cachingPolicy)); } return parse; }
From source file:org.apache.solr.analysis.HyphenationCompoundWordTokenFilterFactory.java
public void inform(ResourceLoader loader) { InputStream stream = null;//ww w . j a v a2 s .c om try { if (dictFile != null) // the dictionary can be empty. dictionary = getWordSet(loader, dictFile, false); // TODO: Broken, because we cannot resolve real system id // ResourceLoader should also supply method like ClassLoader to get resource URL stream = loader.openResource(hypFile); final InputSource is = new InputSource(stream); is.setEncoding(encoding); // if it's null let xml parser decide is.setSystemId(hypFile); hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is); } catch (Exception e) { // TODO: getHyphenationTree really shouldn't throw "Exception" throw new RuntimeException(e); } finally { IOUtils.closeQuietly(stream); } }
From source file:org.apache.solr.handler.loader.XMLLoader.java
@Override public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws Exception { final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType()); InputStream is = null;//from ww w. j av a 2s . c om XMLStreamReader parser = null; String tr = req.getParams().get(CommonParams.TR, null); if (tr != null) { final Transformer t = getTransformer(tr, req); final DOMResult result = new DOMResult(); // first step: read XML and build DOM using Transformer (this is no overhead, as XSL always produces // an internal result DOM tree, we just access it directly as input for StAX): try { is = stream.getStream(); final InputSource isrc = new InputSource(is); isrc.setEncoding(charset); final XMLReader xmlr = saxFactory.newSAXParser().getXMLReader(); xmlr.setErrorHandler(xmllog); xmlr.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE); final SAXSource source = new SAXSource(xmlr, isrc); t.transform(source, result); } catch (TransformerException te) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, te.getMessage(), te); } finally { IOUtils.closeQuietly(is); } // second step: feed the intermediate DOM tree into StAX parser: try { parser = inputFactory.createXMLStreamReader(new DOMSource(result.getNode())); this.processUpdate(req, processor, parser); } catch (XMLStreamException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); } finally { if (parser != null) parser.close(); } } // Normal XML Loader else { try { is = stream.getStream(); if (log.isTraceEnabled()) { final byte[] body = IOUtils.toByteArray(is); // TODO: The charset may be wrong, as the real charset is later // determined by the XML parser, the content-type is only used as a hint! log.trace("body", new String(body, (charset == null) ? ContentStreamBase.DEFAULT_CHARSET : charset)); IOUtils.closeQuietly(is); is = new ByteArrayInputStream(body); } parser = (charset == null) ? inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset); this.processUpdate(req, processor, parser); } catch (XMLStreamException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); } finally { if (parser != null) parser.close(); IOUtils.closeQuietly(is); } } }
From source file:org.apache.solr.handler.XsltXMLLoader.java
@Override public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception { final DOMResult result = new DOMResult(); final Transformer t = getTransformer(req); InputStream is = null;/*from ww w.j a va 2 s .c o m*/ XMLStreamReader parser = null; // first step: read XML and build DOM using Transformer (this is no overhead, as XSL always produces // an internal result DOM tree, we just access it directly as input for StAX): try { is = stream.getStream(); final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType()); final InputSource isrc = new InputSource(is); isrc.setEncoding(charset); final SAXSource source = new SAXSource(isrc); t.transform(source, result); } catch (TransformerException te) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, te.getMessage(), te); } finally { IOUtils.closeQuietly(is); } // second step feed the intermediate DOM tree into StAX parser: try { parser = inputFactory.createXMLStreamReader(new DOMSource(result.getNode())); this.processUpdate(processor, parser); } catch (XMLStreamException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); } finally { if (parser != null) parser.close(); } }
From source file:org.codelibs.fess.transformer.FessXpathTransformer.java
@Override protected void storeData(final ResponseData responseData, final ResultData resultData) { final File tempFile = ResponseDataUtil.createResponseBodyFile(responseData); try {/*from w ww . java 2 s . com*/ final DOMParser parser = getDomParser(); BufferedInputStream bis = null; try { bis = new BufferedInputStream(new FileInputStream(tempFile)); final byte[] bomBytes = new byte[UTF8_BOM_SIZE]; bis.mark(UTF8_BOM_SIZE); bis.read(bomBytes); // NOSONAR if (!isUtf8BomBytes(bomBytes)) { bis.reset(); } final InputSource is = new InputSource(bis); if (responseData.getCharSet() != null) { is.setEncoding(responseData.getCharSet()); } parser.parse(is); } catch (final Exception e) { throw new RobotCrawlAccessException("Could not parse " + responseData.getUrl(), e); } finally { IOUtils.closeQuietly(bis); } final Document document = parser.getDocument(); final Map<String, Object> dataMap = new HashMap<String, Object>(); for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) { final String path = entry.getValue(); try { final XObject xObj = getXPathAPI().eval(document, path); final int type = xObj.getType(); switch (type) { case XObject.CLASS_BOOLEAN: final boolean b = xObj.bool(); putResultDataBody(dataMap, entry.getKey(), Boolean.toString(b)); break; case XObject.CLASS_NUMBER: final double d = xObj.num(); putResultDataBody(dataMap, entry.getKey(), Double.toString(d)); break; case XObject.CLASS_STRING: final String str = xObj.str(); putResultDataBody(dataMap, entry.getKey(), str); break; case XObject.CLASS_NULL: case XObject.CLASS_UNKNOWN: case XObject.CLASS_NODESET: case XObject.CLASS_RTREEFRAG: case XObject.CLASS_UNRESOLVEDVARIABLE: default: final Node value = getXPathAPI().selectSingleNode(document, entry.getValue()); putResultDataBody(dataMap, entry.getKey(), value != null ? value.getTextContent() : null); break; } } catch (final TransformerException e) { logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue()); } } FileInputStream fis = null; try { fis = new FileInputStream(tempFile); responseData.setResponseBody(fis); putAdditionalData(dataMap, responseData, document); } catch (final FileNotFoundException e) { logger.warn(tempFile + " does not exist.", e); putAdditionalData(dataMap, responseData, document); } finally { IOUtils.closeQuietly(fis); } try { resultData.setData(SerializeUtil.fromObjectToBinary(dataMap)); } catch (final Exception e) { throw new RobotCrawlAccessException("Could not serialize object: " + responseData.getUrl(), e); } resultData.setEncoding(charsetName); } finally { if (!tempFile.delete()) { logger.warn("Could not delete a temp file: " + tempFile); } } }