List of usage examples for org.apache.pdfbox.pdmodel.common.filespecification PDComplexFileSpecification getFile
@Override
public String getFile()
From source file:algorithm.PDFFileAttacher.java
License:Apache License
@Override public List<RestoredFile> restore(File originalPdf) throws IOException { RestoredFile copiedPdf = getRestoredCarrier(originalPdf); List<RestoredFile> restoredFiles = new ArrayList<RestoredFile>(); PDDocument document = PDDocument.load(copiedPdf); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode filesTree = namesDictionary.getEmbeddedFiles(); if (filesTree != null) { int i = 0; while (true) { PDComplexFileSpecification fileSpecification = (PDComplexFileSpecification) filesTree .getValue("PericlesMetadata-" + i); if (fileSpecification == null) { break; }//from ww w . jav a 2 s . co m File oldAttachedFile = new File(fileSpecification.getFile()); RestoredFile restoredPayload = new RestoredFile(RESTORED_DIRECTORY + oldAttachedFile.getName()); PDEmbeddedFile embeddedFile = fileSpecification.getEmbeddedFile(); InputStream inputStream = embeddedFile.createInputStream(); FileOutputStream outputStream = new FileOutputStream(restoredPayload); IOUtils.copy(inputStream, outputStream); removeBuggyLineEnding(restoredPayload); restoredPayload.wasPayload = true; restoredPayload.checksumValid = true; restoredPayload.restorationNote = "Checksum wasn't calculated, because this algorithm isn't using restoration metadata. The original payload file survives the encapsulation with this algorithm."; restoredFiles.add(restoredPayload); i++; } } document.close(); copiedPdf.wasCarrier = true; copiedPdf.checksumValid = false; copiedPdf.restorationNote = "Checksum can't be valid, because attached payload files can't be removed from carrier."; restoredFiles.add(copiedPdf); for (RestoredFile file : restoredFiles) { file.algorithm = this; for (RestoredFile relatedFile : restoredFiles) { if (file != relatedFile) { file.relatedFiles.add(relatedFile); } } } return restoredFiles; }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
private void extractMultiOSPDEmbeddedFiles(String displayName, PDComplexFileSpecification spec, AttributesImpl attributes) throws IOException, SAXException, TikaException { if (spec == null) { return;/*from w w w. j av a2 s.c o m*/ } //current strategy is to pull all, not just first non-null extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFile(), spec.getEmbeddedFile(), attributes); extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileMac(), spec.getEmbeddedFileMac(), attributes); extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileDos(), spec.getEmbeddedFileDos(), attributes); extractPDEmbeddedFile(displayName, spec.getFileUnicode(), spec.getFileUnix(), spec.getEmbeddedFileUnix(), attributes); }
From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java
License:Apache License
private void extractMultiOSPDEmbeddedFiles(String defaultName, PDComplexFileSpecification spec, EmbeddedDocumentExtractor extractor) throws IOException, SAXException, TikaException { if (spec == null) { return;//from w w w . ja va 2s.c o m } //current strategy is to pull all, not just first non-null extractPDEmbeddedFile(defaultName, spec.getFile(), spec.getEmbeddedFile(), extractor); extractPDEmbeddedFile(defaultName, spec.getFileMac(), spec.getEmbeddedFileMac(), extractor); extractPDEmbeddedFile(defaultName, spec.getFileDos(), spec.getEmbeddedFileDos(), extractor); extractPDEmbeddedFile(defaultName, spec.getFileUnix(), spec.getEmbeddedFileUnix(), extractor); }
From source file:org.paxle.parser.pdf.impl.PdfParser.java
License:Open Source License
/** * A function to extract the content of embedded files from a PDF document. *//*from www .j ava 2s . c o m*/ protected void extractEmbeddedFiles(URI location, IParserDocument parserDoc, PDDocument pddDoc) throws IOException { final PDDocumentCatalog pddDocCatalog = pddDoc.getDocumentCatalog(); if (pddDocCatalog == null) return; final PDDocumentNameDictionary nameDic = pddDocCatalog.getNames(); if (nameDic == null) return; final PDEmbeddedFilesNameTreeNode embeddedFiles = nameDic.getEmbeddedFiles(); if (embeddedFiles == null) return; @SuppressWarnings("unchecked") final Map<String, Object> names = embeddedFiles.getNames(); if (names == null || names.isEmpty()) return; final IParserContext context = this.contextLocal.getCurrentContext(); for (Entry<String, Object> name : names.entrySet()) { // final String fileDesc = name.getKey(); final Object fileObj = name.getValue(); if (fileObj == null) continue; if (fileObj instanceof PDComplexFileSpecification) { final PDComplexFileSpecification embeddedFileSpec = (PDComplexFileSpecification) fileObj; final PDEmbeddedFile embeddedFile = embeddedFileSpec.getEmbeddedFile(); // getting the embedded file name and mime-type final String fileName = embeddedFileSpec.getFile(); final String fileMimeType = embeddedFile.getSubtype(); if (fileMimeType == null) { this.logger.warn(String.format("No mime-type specified form embedded file '%s#%s'.", location, fileName)); continue; } // getting a parser to parse the content final ISubParser sp = context.getParser(fileMimeType); if (sp == null) { this.logger.warn(String.format("No parser found to parse embedded file '%s#%s' with type '%s'.", location, fileName, fileMimeType)); continue; } // parsing content InputStream embeddedFileStream = null; try { embeddedFileStream = embeddedFile.createInputStream(); final IParserDocument subParserDoc = sp.parse(location, "UTF-8", embeddedFileStream); if (subParserDoc.getMimeType() == null) { subParserDoc.setMimeType(fileMimeType); } parserDoc.addSubDocument(fileName, subParserDoc); } catch (ParserException e) { this.logger.error(String.format( "Unexpected error while parsing parse embedded file '%s#%s' with type '%s': %s", location, fileName, fileMimeType, e.getMessage())); } finally { if (embeddedFileStream != null) try { embeddedFileStream.close(); } catch (Exception e) { this.logger.error(e); } } } } }