Example usage for org.apache.commons.io.input BOMInputStream close

List of usage examples for org.apache.commons.io.input BOMInputStream close

Introduction

In this page you can find the example usage for org.apache.commons.io.input BOMInputStream close.

Prototype

public void close() throws IOException 

Source Link

Document

Invokes the delegate's close() method.

Usage

From source file:de.uzk.hki.da.model.RightsSectionURNMetsXmlReader.java

/**
 * Read urn.//w ww .  j a  v  a  2  s  . c o m
 *
 * @param file the file
 * @return The URN specified in the METS file or null if the METS file doesn't specify an URN
 * @throws IOException Signals that an I/O exception has occurred.
 * @throws ParseException the parse exception
 * @author Thomas Kleinke
 */
public String readURN(File file) throws IOException, ParseException {

    FileInputStream fileInputStream = new FileInputStream(file);
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);

    XMLReader xmlReader = null;
    SAXParserFactory spf = SAXParserFactory.newInstance();
    try {
        xmlReader = spf.newSAXParser().getXMLReader();
    } catch (Exception e) {
        fileInputStream.close();
        bomInputStream.close();
        throw new IOException("Error creating SAX parser", e);
    }
    xmlReader.setErrorHandler(err);
    NodeFactory nodeFactory = new PremisXmlReaderNodeFactory();
    Builder parser = new Builder(xmlReader, false, nodeFactory);
    logger.trace("Successfully built builder and XML reader");

    try {
        String urn = null;

        Document doc = parser.build(bomInputStream);
        Element root = doc.getRootElement();

        Element dmdSecEl = root.getFirstChildElement("dmdSec", METS_NS);
        if (dmdSecEl == null)
            return null;

        Element mdWrapEl = dmdSecEl.getFirstChildElement("mdWrap", METS_NS);
        if (mdWrapEl == null)
            return null;

        Element xmlDataEl = mdWrapEl.getFirstChildElement("xmlData", METS_NS);
        if (xmlDataEl == null)
            return null;

        Element modsEl = xmlDataEl.getFirstChildElement("mods", MODS_NS);
        if (modsEl == null)
            return null;

        Elements identifierEls = modsEl.getChildElements("identifier", MODS_NS);
        for (int i = 0; i < identifierEls.size(); i++) {
            Element element = identifierEls.get(i);
            Attribute attribute = element.getAttribute("type");
            if (attribute.getValue().toLowerCase().equals("urn"))
                urn = element.getValue();
        }

        if (urn != null && urn.equals(""))
            urn = null;

        return urn;
    } catch (ValidityException ve) {
        throw new IOException(ve);
    } catch (ParsingException pe) {
        throw new IOException(pe);
    } catch (IOException ie) {
        throw new IOException(ie);
    } finally {
        fileInputStream.close();
        bomInputStream.close();
    }
}

From source file:de.uzk.hki.da.metadata.XMPMetadataStructure.java

public XMPMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents)
        throws FileNotFoundException, JDOMException, IOException {
    super(workPath, metadataFile, documents);

    logger.debug("Instantiate new xmp metadata structure with metadata file " + metadataFile.getAbsolutePath()
            + " ... ");

    xmpFile = metadataFile;/*from  w  w w .j  a v  a 2s  .c o m*/
    currentDocuments = documents;

    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, xmpFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    rdfDoc = builder.build(is);

    descriptionElements = getXMPDescriptionElements();
    fileInputStream.close();
    bomInputStream.close();
    reader.close();
}

From source file:de.uzk.hki.da.metadata.LidoMetadataStructure.java

public LidoMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents)
        throws FileNotFoundException, JDOMException, IOException {
    super(workPath, metadataFile, documents);

    lidoFile = metadataFile;//from www  .  j  av a2s.c o m
    currentDocuments = documents;

    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, metadataFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    doc = builder.build(is);
    lidoParser = new LidoParser(doc);

    lidoLinkResources = lidoParser.getLidoLinkResources();
    fileInputStream.close();
    bomInputStream.close();
}

From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java

public MetsMetadataStructure(Path workPath, File metadataFile, List<de.uzk.hki.da.model.Document> documents)
        throws FileNotFoundException, JDOMException, IOException {
    super(workPath, metadataFile, documents);

    metsFile = metadataFile;//from w ww . j a  va  2 s  . c  o  m
    currentDocuments = documents;

    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, metsFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    metsDoc = builder.build(is);
    metsParser = new MetsParser(metsDoc);

    fileElements = metsParser.getFileElementsFromMetsDoc(metsDoc);
    fileInputStream.close();

    bomInputStream.close();
    reader.close();
}

From source file:cn.dreampie.resource.LessSource.java

private String loadResource(Resource resource, Charset charset) throws IOException {
    BOMInputStream inputStream = new BOMInputStream(resource.getInputStream());
    try {/*from  w  w  w  .ja  va 2s .com*/
        if (inputStream.hasBOM()) {
            logger.debug("BOM found %s", inputStream.getBOMCharsetName());
            return IOUtils.toString(inputStream, inputStream.getBOMCharsetName());
        } else {
            logger.debug("Using charset " + charset.name());
            return IOUtils.toString(inputStream, charset.name());
        }
    } finally {
        inputStream.close();
    }
}

From source file:de.uzk.hki.da.metadata.LidoMetadataStructure.java

/**
 * Append to each administrativeMetadata in a Lido-File one RightsResourceType-Element and save it.
 * /* w  ww  . ja  v a2s.c o  m*/
 * @param targetLidoFile
 * @param licenseHref
 * @param displayLabel
 * @param text
 * @throws IOException
 * @throws JDOMException
 */
public void appendRightsResource(File targetLidoFile, String licenseHref, String displayLabel)
        throws IOException, JDOMException {
    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();

    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetLidoFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    Document lidoDoc = builder.build(is);

    List<Element> lidoElems = lidoDoc.getRootElement().getChildren("lido", C.LIDO_NS);

    for (int i = 0; i < lidoElems.size(); i++) {
        appendRightsResourceToLido(lidoElems.get(i), licenseHref, displayLabel);
    }

    fileInputStream.close();
    bomInputStream.close();
    reader.close();

    writeDocumentToFile(lidoDoc, Path.makeFile(workPath, targetLidoFile.getPath()));
}

From source file:crawlercommons.sitemaps.SiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 * //from  w w w . ja  va2 s . c o  m
 * @param url
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws MalformedURLException
 * @throws IOException
 * @throws UnknownFormatException
 */
protected AbstractSiteMap processGzip(URL url, byte[] response)
        throws MalformedURLException, IOException, UnknownFormatException {

    LOG.debug("Processing gzip");

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    LOG.debug("XML url = {}", xmlUrl);

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}

From source file:com.andyasprou.webcrawler.Utilities.GenericSiteMapParser.java

/**
 * Decompress the gzipped content and process the resulting XML Sitemap.
 *
 * @param url//  w w  w  . j a v  a  2  s  .  c om
 *            - URL of the gzipped content
 * @param response
 *            - Gzipped content
 * @return the site map
 * @throws UnknownFormatException if there is an error parsing the gzip
 * @throws IOException if there is an error reading in the gzip {@link java.net.URL}
 */
protected AbstractSiteMap processGzip(URL url, byte[] response) throws IOException, UnknownFormatException {

    AbstractSiteMap smi;
    InputStream is = new ByteArrayInputStream(response);

    // Remove .gz ending
    String xmlUrl = url.toString().replaceFirst("\\.gz$", "");

    BOMInputStream decompressed = new BOMInputStream(new GZIPInputStream(is));
    InputSource in = new InputSource(decompressed);
    in.setSystemId(xmlUrl);
    smi = processXml(url, in);
    decompressed.close();
    return smi;
}

From source file:de.uzk.hki.da.metadata.MetsMetadataStructure.java

public void makeReplacementsHrefInMetsFile(File targetMetsFile, String currentHref, String targetHref,
        String mimetype, String loctype) throws IOException, JDOMException {
    SAXBuilder builder = XMLUtils.createNonvalidatingSaxBuilder();
    logger.debug(":::" + workPath + ":::" + targetMetsFile.getPath());
    FileInputStream fileInputStream = new FileInputStream(Path.makeFile(workPath, targetMetsFile.getPath()));
    BOMInputStream bomInputStream = new BOMInputStream(fileInputStream);
    Reader reader = new InputStreamReader(bomInputStream, "UTF-8");
    InputSource is = new InputSource(reader);
    is.setEncoding("UTF-8");
    Document metsDoc = builder.build(is);

    List<Element> metsFileElements = metsParser.getFileElementsFromMetsDoc(metsDoc);

    for (int i = 0; i < metsFileElements.size(); i++) {
        Element fileElement = (Element) metsFileElements.get(i);
        if (metsParser.getHref(fileElement).equals(currentHref)) {
            setHref(fileElement, targetHref);
            setMimetype(fileElement, mimetype);
            setLoctype(fileElement, loctype);
        }//from w w  w . jav  a 2  s.c  o m
    }

    fileInputStream.close();
    bomInputStream.close();
    reader.close();

    writeDocumentToFile(metsDoc, Path.makeFile(workPath, targetMetsFile.getPath()));
}

From source file:com.thetdgroup.TextExtractionAdapter.java

private ContentInformation processFile(File fileName) throws IOException {
    ContentInformation extractedContent = new ContentInformation();
    ContentHandler contenthandler = new BodyContentHandler();
    Metadata metadata = new Metadata();

    ////from w w w .j a  va2 s .com
    InputStream inputStream = null;
    BOMInputStream bomInputStream = null;

    try {
        inputStream = new FileInputStream(fileName);
        bomInputStream = new BOMInputStream(inputStream, false);

        contenthandler = new BodyContentHandler();
        metadata.set(Metadata.RESOURCE_NAME_KEY, fileName.getName());

        Parser parser = new AutoDetectParser();
        parser.parse(bomInputStream, contenthandler, metadata);
    } catch (Exception exception) {
        extractedContent.hasException();
        extractedContent.setException(exception.toString());
    } finally {
        if (bomInputStream != null) {
            bomInputStream.close();
        }

        if (inputStream != null) {
            inputStream.close();
        }
    }

    //
    //
    extractedContent.setImportedFileName(fileName.getName());

    if (contenthandler != null) {
        String content = contenthandler.toString().replace("\n", " ");
        extractedContent.setContentData(content);
    }

    if (metadata != null) {
        // CREATIVE COMMONS
        extractedContent.setLicenseLocation(metadata.get(Metadata.LICENSE_LOCATION));
        extractedContent.setLicenceURL(metadata.get(Metadata.LICENSE_URL));
        extractedContent.setWorkType(metadata.get(Metadata.WORK_TYPE));

        // DUBLIN CORE
        extractedContent.setContributor(metadata.get(Metadata.CONTRIBUTOR));
        extractedContent.setCoverage(metadata.get(Metadata.COVERAGE));
        extractedContent.setCreator(metadata.get(Metadata.CREATOR));
        extractedContent.setDate(metadata.get(Metadata.DATE));
        extractedContent.setDescription(metadata.get(Metadata.DESCRIPTION));
        extractedContent.setFormat(metadata.get(Metadata.FORMAT));
        extractedContent.setIdentifier(metadata.get(Metadata.IDENTIFIER));
        extractedContent.setLanguage(metadata.get(Metadata.LANGUAGE));
        extractedContent.setModified(metadata.get(Metadata.MODIFIED));
        extractedContent.setPublisher(metadata.get(Metadata.PUBLISHER));
        extractedContent.setRelation(metadata.get(Metadata.RELATION));
        extractedContent.setRights(metadata.get(Metadata.RIGHTS));
        extractedContent.setDublinSource(metadata.get(org.apache.tika.metadata.DublinCore.SOURCE));
        extractedContent.setSubject(metadata.get(Metadata.SUBJECT));
        extractedContent.setTitle(metadata.get(Metadata.TITLE));
        extractedContent.setType(metadata.get(Metadata.TYPE));

        // GEOGRAPHIC
        //extractedContent.setAltitude(metadata.get(Metadata.ALTITUDE));
        //extractedContent.setLatitude(metadata.get(Metadata.LATITUDE));
        //extractedContent.setLongitude(metadata.get(Metadata.LONGITUDE));

        // HTTP HEADERS
        extractedContent.setContentDisposition(metadata.get(Metadata.CONTENT_DISPOSITION));
        extractedContent.setContentEncoding(metadata.get(Metadata.CONTENT_ENCODING));
        extractedContent.setContentLanguage(metadata.get(Metadata.CONTENT_LANGUAGE));
        extractedContent.setContentLength(metadata.get(Metadata.CONTENT_LENGTH));
        extractedContent.setContentLocation(metadata.get(Metadata.CONTENT_LOCATION));
        extractedContent.setContentMD5(metadata.get(Metadata.CONTENT_MD5));
        extractedContent.setContentType(metadata.get(Metadata.CONTENT_TYPE));
        extractedContent.setLastModifier(metadata.get(Metadata.LAST_MODIFIED));
        extractedContent.setLocation(metadata.get(Metadata.LOCATION));

        // MESSAGE (EMAIL)
        //extractedContent.setMessageBCC(metadata.get(Metadata.MESSAGE_BCC));
        //extractedContent.setMessageCC(metadata.get(Metadata.MESSAGE_CC));
        //extractedContent.setMessageFrom(metadata.get(Metadata.MESSAGE_FROM));
        //extractedContent.setMessageRecipientAddress(metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
        //extractedContent.setMessageTo(metadata.get(Metadata.MESSAGE_TO));

        // MS OFFICE
        extractedContent.setApplicationName(metadata.get(Metadata.APPLICATION_NAME));
        extractedContent.setApplicationVersion(metadata.get(Metadata.APPLICATION_VERSION));
        extractedContent.setAuthor(metadata.get(Metadata.AUTHOR));
        extractedContent.setCategory(metadata.get(Metadata.CATEGORY));
        extractedContent.setCharacterCount(metadata.get(Metadata.CHARACTER_COUNT));
        extractedContent.setCharacterCountWithSpace(metadata.get(Metadata.CHARACTER_COUNT_WITH_SPACES));
        extractedContent.setComments(metadata.get(Metadata.COMMENTS));
        extractedContent.setCompany(metadata.get(Metadata.COMPANY));
        extractedContent.setContentStatus(metadata.get(Metadata.CONTENT_STATUS));
        extractedContent.setCreationDate(metadata.get(Metadata.CREATION_DATE));
        extractedContent.setEditTime(metadata.get(Metadata.EDIT_TIME));
        extractedContent.setKeywords(metadata.get(Metadata.KEYWORDS));
        extractedContent.setLastAuthor(metadata.get(Metadata.LAST_AUTHOR));
        extractedContent.setLastPrinted(metadata.get(Metadata.LAST_PRINTED));
        extractedContent.setLastSaved(metadata.get(Metadata.LAST_SAVED));
        extractedContent.setLineCount(metadata.get(Metadata.LINE_COUNT));
        extractedContent.setManager(metadata.get(Metadata.MANAGER));
        extractedContent.setNotes(metadata.get(Metadata.NOTES));
        extractedContent.setPageCount(metadata.get(Metadata.PAGE_COUNT));
        extractedContent.setParagraphCount(metadata.get(Metadata.PARAGRAPH_COUNT));
        extractedContent.setPresentationFormat(metadata.get(Metadata.PRESENTATION_FORMAT));
        extractedContent.setRevisionNumber(metadata.get(Metadata.REVISION_NUMBER));
        extractedContent.setSecurity(metadata.get(Metadata.SECURITY));
        extractedContent.setSlideCount(metadata.get(Metadata.SLIDE_COUNT));
        extractedContent.setTemplate(metadata.get(Metadata.TEMPLATE));
        extractedContent.setTotalTime(metadata.get(Metadata.TOTAL_TIME));
        extractedContent.setVersion(metadata.get(Metadata.VERSION));
        extractedContent.setWordCount(metadata.get(Metadata.WORD_COUNT));

        // CLIMATEFORCAST
        //extractedContent.setClimateForcastAcknowledgement(metadata.get(org.apache.tika.metadata.ClimateForcast.ACKNOWLEDGEMENT));     
        //extractedContent.setClimateForcastCommandLine(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMAND_LINE));     
        //extractedContent.setClimateForcastComment(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMENT));     
        //extractedContent.setClimateForcastContact(metadata.get(org.apache.tika.metadata.ClimateForcast.CONTACT));     
        //extractedContent.setClimateForcastConvention(metadata.get(org.apache.tika.metadata.ClimateForcast.CONVENTIONS));     
        //extractedContent.setClimateForcastExperimentID(metadata.get(org.apache.tika.metadata.ClimateForcast.EXPERIMENT_ID));     
        //extractedContent.setClimateForcastHistory(metadata.get(org.apache.tika.metadata.ClimateForcast.HISTORY));     
        //extractedContent.setClimateForcastInstitution(metadata.get(org.apache.tika.metadata.ClimateForcast.INSTITUTION));     
        //extractedContent.setClimateForcastModelName(metadata.get(org.apache.tika.metadata.ClimateForcast.MODEL_NAME_ENGLISH));     
        //extractedContent.setClimateForcastProgramID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROGRAM_ID));     
        //extractedContent.setClimateForcastProjectID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROJECT_ID));     
        //extractedContent.setClimateForcastRealization(metadata.get(org.apache.tika.metadata.ClimateForcast.REALIZATION));     
        //extractedContent.setClimateForcastReferences(metadata.get(org.apache.tika.metadata.ClimateForcast.REFERENCES));     
        //extractedContent.setClimateForcastSource(metadata.get(org.apache.tika.metadata.ClimateForcast.SOURCE));     
        //extractedContent.setClimateForcastTableID(metadata.get(org.apache.tika.metadata.ClimateForcast.TABLE_ID));     

        // TIFF
        //extractedContent.setTIFFBitsPerSample(metadata.get(Metadata.BITS_PER_SAMPLE));
        //extractedContent.setTIFFEquipmentMake(metadata.get(Metadata.EQUIPMENT_MAKE));
        //extractedContent.setTIFFEquipmentModel(metadata.get(Metadata.EQUIPMENT_MODEL));
        //extractedContent.setTIFFExposureLimit(metadata.get(Metadata.EXPOSURE_TIME));
        //extractedContent.setTIFFFNumber(metadata.get(Metadata.F_NUMBER));
        //extractedContent.setTIFFFlashFired(metadata.get(Metadata.FLASH_FIRED));
        //extractedContent.setTIFFFocalLength(metadata.get(Metadata.FOCAL_LENGTH));
        //extractedContent.setTIFFImageLength(metadata.get(Metadata.IMAGE_LENGTH));
        //extractedContent.setTIFFImageWidth(metadata.get(Metadata.IMAGE_WIDTH));
        //extractedContent.setTIFFISOSpeedRating(metadata.get(Metadata.ISO_SPEED_RATINGS));
        //extractedContent.setTIFFOrientation(metadata.get(Metadata.ORIENTATION));
        //extractedContent.setTIFFOriginalDate(metadata.get(Metadata.ORIGINAL_DATE));
        //extractedContent.setTIFFResolutionHorizontal(metadata.get(Metadata.RESOLUTION_HORIZONTAL));
        //extractedContent.setTIFFResolutionUnit(metadata.get(Metadata.RESOLUTION_UNIT));
        //extractedContent.setTIFFResolutionVertical(metadata.get(Metadata.RESOLUTION_VERTICAL));
        //extractedContent.setTIFFSamplePerPixel(metadata.get(Metadata.SAMPLES_PER_PIXEL));
        //extractedContent.setTIFFSoftware(metadata.get(Metadata.SOFTWARE));

        // TIKA METADATA KEYS
        extractedContent.setResourceNameKey(metadata.get(Metadata.RESOURCE_NAME_KEY));

        // TIKA MIME KEYS
        extractedContent.setMimeTypeMagic(metadata.get(Metadata.MIME_TYPE_MAGIC));
        extractedContent.setTikaMimeType(metadata.get(Metadata.TIKA_MIME_FILE));
    }

    //
    return extractedContent;
}