List of usage examples for org.apache.pdfbox.pdmodel.common.filespecification PDEmbeddedFile getSize
public int getSize()
From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java
License:Apache License
private void processEmbeddedDocNames(Map<String, COSObjectable> embeddedFileNames, EmbeddedDocumentExtractor embeddedExtractor) throws IOException, SAXException, TikaException { if (embeddedFileNames == null) { return;//from w w w. j av a2s. c o m } for (Map.Entry<String, COSObjectable> ent : embeddedFileNames.entrySet()) { PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue(); PDEmbeddedFile file = spec.getEmbeddedFile(); Metadata metadata = new Metadata(); // TODO: other metadata? metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey()); metadata.set(Metadata.CONTENT_TYPE, file.getSubtype()); metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize())); if (embeddedExtractor.shouldParseEmbedded(metadata)) { TikaInputStream stream = TikaInputStream.get(file.createInputStream()); try { embeddedExtractor.parseEmbedded(stream, new EmbeddedContentHandler(handler), metadata, false); } finally { stream.close(); } } } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
private void extractPDEmbeddedFile(String displayName, String unicodeFileName, String fileName, PDEmbeddedFile file, AttributesImpl attributes) throws SAXException, IOException, TikaException { if (file == null) { //skip silently return;//from w w w .j a v a2 s. co m } fileName = (fileName == null || "".equals(fileName.trim())) ? unicodeFileName : fileName; fileName = (fileName == null || "".equals(fileName.trim())) ? displayName : fileName; // TODO: other metadata? Metadata embeddedMetadata = new Metadata(); embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName); embeddedMetadata.set(Metadata.CONTENT_TYPE, file.getSubtype()); embeddedMetadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize())); embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString()); embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, fileName); if (!embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) { return; } TikaInputStream stream = null; try { stream = TikaInputStream.get(file.createInputStream()); } catch (IOException e) { //store this exception in the parent's metadata EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); return; } try { embeddedDocumentExtractor.parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata, false); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", fileName); xhtml.startElement("div", attributes); xhtml.endElement("div"); } finally { IOUtils.closeQuietly(stream); } }
From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java
License:Apache License
private void extractPDEmbeddedFile(String defaultName, String fileName, PDEmbeddedFile file, EmbeddedDocumentExtractor extractor) throws SAXException, IOException, TikaException { if (file == null) { //skip silently return;/*w w w. j a v a 2 s. c o m*/ } fileName = (fileName == null) ? defaultName : fileName; // TODO: other metadata? Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); metadata.set(Metadata.CONTENT_TYPE, file.getSubtype()); metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize())); metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString()); if (extractor.shouldParseEmbedded(metadata)) { TikaInputStream stream = null; try { stream = TikaInputStream.get(file.createInputStream()); extractor.parseEmbedded(stream, new EmbeddedContentHandler(handler), metadata, false); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", fileName); handler.startElement("div", attributes); handler.endElement("div"); } finally { IOUtils.closeQuietly(stream); } } }