List of usage examples for org.apache.commons.io.input BOMInputStream close
public void close() throws IOException
close()
method. From source file:de.uzk.hki.da.model.RightsSectionURNMetsXmlReader.java
/** * Read urn.//w ww . j a v a 2 s . c o m * * @param file the file * @return The URN specified in the METS file or null if the METS file doesn't specify an URN * @throws IOException Signals that an I/O exception has occurred. * @throws ParseException the parse exception * @author Thomas Kleinke */ public String readURN(File file) throws IOException, ParseException { FileInputStream fileInputStream = new FileInputStream(file); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); XMLReader xmlReader = null; SAXParserFactory spf = SAXParserFactory.newInstance(); try { xmlReader = spf.newSAXParser().getXMLReader(); } catch (Exception e) { fileInputStream.close(); bomInputStream.close(); throw new IOException("Error creating SAX parser", e); } xmlReader.setErrorHandler(err); NodeFactory nodeFactory = new PremisXmlReaderNodeFactory(); Builder parser = new Builder(xmlReader, false, nodeFactory); logger.trace("Successfully built builder and XML reader"); try { String urn = null; Document doc = parser.build(bomInputStream); Element root = doc.getRootElement(); Element dmdSecEl = root.getFirstChildElement("dmdSec", METS_NS); if (dmdSecEl == null) return null; Element mdWrapEl = dmdSecEl.getFirstChildElement("mdWrap", METS_NS); if (mdWrapEl == null) return null; Element xmlDataEl = mdWrapEl.getFirstChildElement("xmlData", METS_NS); if (xmlDataEl == null) return null; Element modsEl = xmlDataEl.getFirstChildElement("mods", MODS_NS); if (modsEl == null) return null; Elements identifierEls = modsEl.getChildElements("identifier", MODS_NS); for (int i = 0; i < identifierEls.size(); i++) { Element element = identifierEls.get(i); Attribute attribute = element.getAttribute("type"); if (attribute.getValue().toLowerCase().equals("urn")) urn = element.getValue(); } if (urn != null && urn.equals("")) urn = null; return urn; } catch (ValidityException ve) { throw new IOException(ve); } catch (ParsingException pe) { throw new IOException(pe); } catch (IOException ie) { throw new IOException(ie); } finally { fileInputStream.close(); bomInputStream.close(); } }
From source file:de.uzk.hki.da.metadata.XMPMetadataStructure.java
public XMPMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents) throws FileNotFoundException, JDOMException, IOException { super(workPath, metadataFile, documents); logger.debug("Instantiate new xmp metadata structure with metadata file " + metadataFile.getAbsolutePath() + " ... "); xmpFile = metadataFile;/*from w w w .j a v a 2s .c o m*/ currentDocuments = documents; SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, xmpFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); rdfDoc = builder.build(is); descriptionElements = getXMPDescriptionElements(); fileInputStream.close(); bomInputStream.close(); reader.close(); }
From source file:de.uzk.hki.da.metadata.LidoMetadataStructure.java
public LidoMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents) throws FileNotFoundException, JDOMException, IOException { super(workPath, metadataFile, documents); lidoFile = metadataFile;//from www . j av a2s.c o m currentDocuments = documents; SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, metadataFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); doc = builder.build(is); lidoParser = new LidoParser(doc); lidoLinkResources = lidoParser.getLidoLinkResources(); fileInputStream.close(); bomInputStream.close(); }
From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java
public MetsMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents) throws FileNotFoundException, JDOMException, IOException { super(workPath, metadataFile, documents); metsFile = metadataFile;//from w ww . j a va 2 s . c o m currentDocuments = documents; SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, metsFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); metsDoc = builder.build(is); metsParser = new MetsParser(metsDoc); fileElements = metsParser.getFileElementsFromMetsDoc(metsDoc); fileInputStream.close(); bomInputStream.close(); reader.close(); }
From source file:cn.dreampie.resource.LessSource.java
private String loadResource(Resource resource, Charset charset) throws IOException { BOMInputStream inputStream = new BOMInputStream(resource.getInputStream()); try {/*from w w w .ja va 2s .com*/ if (inputStream.hasBOM()) { logger.debug("BOM found %s", inputStream.getBOMCharsetName()); return IOUtils.toString(inputStream, inputStream.getBOMCharsetName()); } else { logger.debug("Using charset " + charset.name()); return IOUtils.toString(inputStream, charset.name()); } } finally { inputStream.close(); } }
From source file:de.uzk.hki.da.metadata.LidoMetadataStructure.java
/** * Append to each administrativeMetadata in a Lido-File one RightsResourceType-Element and save it. * /* w ww . ja v a2s.c o m*/ * @param targetLidoFile * @param licenseHref * @param displayLabel * @param text * @throws IOException * @throws JDOMException */ public void appendRightsResource(File targetLidoFile, String licenseHref, String displayLabel) throws IOException, JDOMException { SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetLidoFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); Document lidoDoc = builder.build(is); List<Element> lidoElems = lidoDoc.getRootElement().getChildren("lido", C.LIDO_NS); for (int i = 0; i < lidoElems.size(); i++) { appendRightsResourceToLido(lidoElems.get(i), licenseHref, displayLabel); } fileInputStream.close(); bomInputStream.close(); reader.close(); writeDocumentToFile(lidoDoc, Path.makeFile(workPath, targetLidoFile.getPath())); }
From source file:crawlercommons.sitemaps.SiteMapParser.java
/** * Decompress the gzipped content and process the resulting XML Sitemap. * //from w w w . ja va2 s . c o m * @param url * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws MalformedURLException * @throws IOException * @throws UnknownFormatException */ protected AbstractSiteMap processGzip(URL url, byte[] response) throws MalformedURLException, IOException, UnknownFormatException { LOG.debug("Processing gzip"); AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); LOG.debug("XML url = {}", xmlUrl); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }
From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java
/** * Decompress the gzipped content and process the resulting XML Sitemap. * * @param url// w w w . j a v a 2 s . c om * - URL of the gzipped content * @param response * - Gzipped content * @return the site map * @throws UnknownFormatException if there is an error parsing the gzip * @throws IOException if there is an error reading in the gzip {@link java.net.URL} */ protected AbstractSiteMap processGzip(URL url, byte[] response) throws IOException, UnknownFormatException { AbstractSiteMap smi; InputStream is = new ByteArrayInputStream(response); // Remove .gz ending String xmlUrl = url.toString().replaceFirst("\\.gz$", ""); BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is)); InputSource in = new InputSource(decompressed); in.setSystemId(xmlUrl); smi = processXml(url, in); decompressed.close(); return smi; }
From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java
public void makeReplacementsHrefInMetsFile(File targetMetsFile, String currentHref, String targetHref, String mimetype, String loctype) throws IOException, JDOMException { SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder(); logger.debug(":::" + workPath + ":::" + targetMetsFile.getPath()); FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetMetsFile.getPath())); BOMInputStream bomInputStream = new BOMInputStream(fileInputStream); Reader reader = new InputStreamReader(bomInputStream, "UTF-8"); InputSource is = new InputSource(reader); is.setEncoding("UTF-8"); Document metsDoc = builder.build(is); List<Element> metsFileElements = metsParser.getFileElementsFromMetsDoc(metsDoc); for (int i = 0; i < metsFileElements.size(); i++) { Element fileElement = (Element) metsFileElements.get(i); if (metsParser.getHref(fileElement).equals(currentHref)) { setHref(fileElement, targetHref); setMimetype(fileElement, mimetype); setLoctype(fileElement, loctype); }//from w w w . jav a 2 s.c o m } fileInputStream.close(); bomInputStream.close(); reader.close(); writeDocumentToFile(metsDoc, Path.makeFile(workPath, targetMetsFile.getPath())); }
From source file:com.thetdgroup.TextExtractionAdapter.java
private ContentInformation processFile(File fileName) throws IOException { ContentInformation extractedContent = new ContentInformation(); ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); ////from w w w .j a va2 s .com InputStream inputStream = null; BOMInputStream bomInputStream = null; try { inputStream = new FileInputStream(fileName); bomInputStream = new BOMInputStream(inputStream, false); contenthandler = new BodyContentHandler(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName.getName()); Parser parser = new AutoDetectParser(); parser.parse(bomInputStream, contenthandler, metadata); } catch (Exception exception) { extractedContent.hasException(); extractedContent.setException(exception.toString()); } finally { if (bomInputStream != null) { bomInputStream.close(); } if (inputStream != null) { inputStream.close(); } } // // extractedContent.setImportedFileName(fileName.getName()); if (contenthandler != null) { String content = contenthandler.toString().replace("\n", " "); extractedContent.setContentData(content); } if (metadata != null) { // CREATIVE COMMONS extractedContent.setLicenseLocation(metadata.get(Metadata.LICENSE_LOCATION)); extractedContent.setLicenceURL(metadata.get(Metadata.LICENSE_URL)); extractedContent.setWorkType(metadata.get(Metadata.WORK_TYPE)); // DUBLIN CORE extractedContent.setContributor(metadata.get(Metadata.CONTRIBUTOR)); extractedContent.setCoverage(metadata.get(Metadata.COVERAGE)); extractedContent.setCreator(metadata.get(Metadata.CREATOR)); extractedContent.setDate(metadata.get(Metadata.DATE)); extractedContent.setDescription(metadata.get(Metadata.DESCRIPTION)); extractedContent.setFormat(metadata.get(Metadata.FORMAT)); extractedContent.setIdentifier(metadata.get(Metadata.IDENTIFIER)); extractedContent.setLanguage(metadata.get(Metadata.LANGUAGE)); extractedContent.setModified(metadata.get(Metadata.MODIFIED)); extractedContent.setPublisher(metadata.get(Metadata.PUBLISHER)); extractedContent.setRelation(metadata.get(Metadata.RELATION)); extractedContent.setRights(metadata.get(Metadata.RIGHTS)); extractedContent.setDublinSource(metadata.get(org.apache.tika.metadata.DublinCore.SOURCE)); extractedContent.setSubject(metadata.get(Metadata.SUBJECT)); extractedContent.setTitle(metadata.get(Metadata.TITLE)); extractedContent.setType(metadata.get(Metadata.TYPE)); // GEOGRAPHIC //extractedContent.setAltitude(metadata.get(Metadata.ALTITUDE)); //extractedContent.setLatitude(metadata.get(Metadata.LATITUDE)); //extractedContent.setLongitude(metadata.get(Metadata.LONGITUDE)); // HTTP HEADERS extractedContent.setContentDisposition(metadata.get(Metadata.CONTENT_DISPOSITION)); extractedContent.setContentEncoding(metadata.get(Metadata.CONTENT_ENCODING)); extractedContent.setContentLanguage(metadata.get(Metadata.CONTENT_LANGUAGE)); extractedContent.setContentLength(metadata.get(Metadata.CONTENT_LENGTH)); extractedContent.setContentLocation(metadata.get(Metadata.CONTENT_LOCATION)); extractedContent.setContentMD5(metadata.get(Metadata.CONTENT_MD5)); extractedContent.setContentType(metadata.get(Metadata.CONTENT_TYPE)); extractedContent.setLastModifier(metadata.get(Metadata.LAST_MODIFIED)); extractedContent.setLocation(metadata.get(Metadata.LOCATION)); // MESSAGE (EMAIL) //extractedContent.setMessageBCC(metadata.get(Metadata.MESSAGE_BCC)); //extractedContent.setMessageCC(metadata.get(Metadata.MESSAGE_CC)); //extractedContent.setMessageFrom(metadata.get(Metadata.MESSAGE_FROM)); //extractedContent.setMessageRecipientAddress(metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS)); //extractedContent.setMessageTo(metadata.get(Metadata.MESSAGE_TO)); // MS OFFICE extractedContent.setApplicationName(metadata.get(Metadata.APPLICATION_NAME)); extractedContent.setApplicationVersion(metadata.get(Metadata.APPLICATION_VERSION)); extractedContent.setAuthor(metadata.get(Metadata.AUTHOR)); extractedContent.setCategory(metadata.get(Metadata.CATEGORY)); extractedContent.setCharacterCount(metadata.get(Metadata.CHARACTER_COUNT)); extractedContent.setCharacterCountWithSpace(metadata.get(Metadata.CHARACTER_COUNT_WITH_SPACES)); extractedContent.setComments(metadata.get(Metadata.COMMENTS)); extractedContent.setCompany(metadata.get(Metadata.COMPANY)); extractedContent.setContentStatus(metadata.get(Metadata.CONTENT_STATUS)); extractedContent.setCreationDate(metadata.get(Metadata.CREATION_DATE)); extractedContent.setEditTime(metadata.get(Metadata.EDIT_TIME)); extractedContent.setKeywords(metadata.get(Metadata.KEYWORDS)); extractedContent.setLastAuthor(metadata.get(Metadata.LAST_AUTHOR)); extractedContent.setLastPrinted(metadata.get(Metadata.LAST_PRINTED)); extractedContent.setLastSaved(metadata.get(Metadata.LAST_SAVED)); extractedContent.setLineCount(metadata.get(Metadata.LINE_COUNT)); extractedContent.setManager(metadata.get(Metadata.MANAGER)); extractedContent.setNotes(metadata.get(Metadata.NOTES)); extractedContent.setPageCount(metadata.get(Metadata.PAGE_COUNT)); extractedContent.setParagraphCount(metadata.get(Metadata.PARAGRAPH_COUNT)); extractedContent.setPresentationFormat(metadata.get(Metadata.PRESENTATION_FORMAT)); extractedContent.setRevisionNumber(metadata.get(Metadata.REVISION_NUMBER)); extractedContent.setSecurity(metadata.get(Metadata.SECURITY)); extractedContent.setSlideCount(metadata.get(Metadata.SLIDE_COUNT)); extractedContent.setTemplate(metadata.get(Metadata.TEMPLATE)); extractedContent.setTotalTime(metadata.get(Metadata.TOTAL_TIME)); extractedContent.setVersion(metadata.get(Metadata.VERSION)); extractedContent.setWordCount(metadata.get(Metadata.WORD_COUNT)); // CLIMATEFORCAST //extractedContent.setClimateForcastAcknowledgement(metadata.get(org.apache.tika.metadata.ClimateForcast.ACKNOWLEDGEMENT)); //extractedContent.setClimateForcastCommandLine(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMAND_LINE)); //extractedContent.setClimateForcastComment(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMENT)); //extractedContent.setClimateForcastContact(metadata.get(org.apache.tika.metadata.ClimateForcast.CONTACT)); //extractedContent.setClimateForcastConvention(metadata.get(org.apache.tika.metadata.ClimateForcast.CONVENTIONS)); //extractedContent.setClimateForcastExperimentID(metadata.get(org.apache.tika.metadata.ClimateForcast.EXPERIMENT_ID)); //extractedContent.setClimateForcastHistory(metadata.get(org.apache.tika.metadata.ClimateForcast.HISTORY)); //extractedContent.setClimateForcastInstitution(metadata.get(org.apache.tika.metadata.ClimateForcast.INSTITUTION)); //extractedContent.setClimateForcastModelName(metadata.get(org.apache.tika.metadata.ClimateForcast.MODEL_NAME_ENGLISH)); //extractedContent.setClimateForcastProgramID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROGRAM_ID)); //extractedContent.setClimateForcastProjectID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROJECT_ID)); //extractedContent.setClimateForcastRealization(metadata.get(org.apache.tika.metadata.ClimateForcast.REALIZATION)); //extractedContent.setClimateForcastReferences(metadata.get(org.apache.tika.metadata.ClimateForcast.REFERENCES)); //extractedContent.setClimateForcastSource(metadata.get(org.apache.tika.metadata.ClimateForcast.SOURCE)); //extractedContent.setClimateForcastTableID(metadata.get(org.apache.tika.metadata.ClimateForcast.TABLE_ID)); // TIFF //extractedContent.setTIFFBitsPerSample(metadata.get(Metadata.BITS_PER_SAMPLE)); //extractedContent.setTIFFEquipmentMake(metadata.get(Metadata.EQUIPMENT_MAKE)); //extractedContent.setTIFFEquipmentModel(metadata.get(Metadata.EQUIPMENT_MODEL)); //extractedContent.setTIFFExposureLimit(metadata.get(Metadata.EXPOSURE_TIME)); //extractedContent.setTIFFFNumber(metadata.get(Metadata.F_NUMBER)); //extractedContent.setTIFFFlashFired(metadata.get(Metadata.FLASH_FIRED)); //extractedContent.setTIFFFocalLength(metadata.get(Metadata.FOCAL_LENGTH)); //extractedContent.setTIFFImageLength(metadata.get(Metadata.IMAGE_LENGTH)); //extractedContent.setTIFFImageWidth(metadata.get(Metadata.IMAGE_WIDTH)); //extractedContent.setTIFFISOSpeedRating(metadata.get(Metadata.ISO_SPEED_RATINGS)); //extractedContent.setTIFFOrientation(metadata.get(Metadata.ORIENTATION)); //extractedContent.setTIFFOriginalDate(metadata.get(Metadata.ORIGINAL_DATE)); //extractedContent.setTIFFResolutionHorizontal(metadata.get(Metadata.RESOLUTION_HORIZONTAL)); //extractedContent.setTIFFResolutionUnit(metadata.get(Metadata.RESOLUTION_UNIT)); //extractedContent.setTIFFResolutionVertical(metadata.get(Metadata.RESOLUTION_VERTICAL)); //extractedContent.setTIFFSamplePerPixel(metadata.get(Metadata.SAMPLES_PER_PIXEL)); //extractedContent.setTIFFSoftware(metadata.get(Metadata.SOFTWARE)); // TIKA METADATA KEYS extractedContent.setResourceNameKey(metadata.get(Metadata.RESOURCE_NAME_KEY)); // TIKA MIME KEYS extractedContent.setMimeTypeMagic(metadata.get(Metadata.MIME_TYPE_MAGIC)); extractedContent.setTikaMimeType(metadata.get(Metadata.TIKA_MIME_FILE)); } // return extractedContent; }