List of usage examples for org.apache.pdfbox.io RandomAccessBuffer RandomAccessBuffer
public RandomAccessBuffer()
From source file:eu.europa.ec.markt.dss.signature.pdf.pdfbox.PdfBoxStream.java
License:Open Source License
public PdfBoxStream(byte[] bytes) throws IOException { RandomAccessBuffer storage = new RandomAccessBuffer(); this.wrapped = new COSStream(storage); final OutputStream unfilteredStream = this.wrapped.createUnfilteredStream(); unfilteredStream.write(bytes);/* ww w.ja v a 2 s . c om*/ unfilteredStream.flush(); }
From source file:eu.europa.esig.dss.pdf.pdfbox.PdfBoxSignatureService.java
License:Open Source License
private COSStream getStream(Map<String, COSStream> streams, Token token) throws IOException { COSStream stream = streams.get(token.getDSSIdAsString()); if (stream == null) { RandomAccessBuffer storage = new RandomAccessBuffer(); stream = new COSStream(storage); OutputStream unfilteredStream = stream.createUnfilteredStream(); unfilteredStream.write(token.getEncoded()); unfilteredStream.flush();//w ww.j a v a 2s. com streams.put(token.getDSSIdAsString(), stream); } return stream; }
From source file:eu.europa.esig.dss.pdf.pdfbox.PdfBoxStream.java
License:Open Source License
public PdfBoxStream(byte[] bytes) { try {/*from w w w . j a v a 2 s . c om*/ RandomAccessBuffer storage = new RandomAccessBuffer(); this.wrapped = new COSStream(storage); final OutputStream unfilteredStream = this.wrapped.createUnfilteredStream(); unfilteredStream.write(bytes); unfilteredStream.flush(); } catch (Exception e) { throw new DSSException(e); } }
From source file:mj.ocraptor.extraction.tika.parser.pdf.PDFParser.java
License:Apache License
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { PDDocument pdfDocument = null;/* w ww . j ava2s . c om*/ TemporaryResources tmp = new TemporaryResources(); // config from context, or default if not set via context PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig); try { // PDFBox can process entirely in memory, or can use a temp file // for unpacked / processed resources // Decide which to do based on if we're reading from a file or not // already TikaInputStream tstream = TikaInputStream.cast(stream); if (tstream != null && tstream.hasFile()) { // File based, take that as a cue to use a temporary file RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw"); if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true); } } else { // Go for the normal, stream based in-memory parsing if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer()); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); } } if (pdfDocument.isEncrypted()) { String password = null; // Did they supply a new style Password Provider? PasswordProvider passwordProvider = context.get(PasswordProvider.class); if (passwordProvider != null) { password = passwordProvider.getPassword(metadata); } // Fall back on the old style metadata if set if (password == null && metadata.get(PASSWORD) != null) { password = metadata.get(PASSWORD); } // If no password is given, use an empty string as the default if (password == null) { password = ""; } try { pdfDocument.decrypt(password); } catch (Exception e) { // Ignore } } metadata.set(Metadata.CONTENT_TYPE, "application/pdf"); extractMetadata(pdfDocument, metadata); PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } catch (Exception e) { // TODO: logging e.printStackTrace(); } finally { if (pdfDocument != null) { pdfDocument.close(); } if (tmp != null) { tmp.dispose(); tmp.close(); } } handler.endDocument(); }
From source file:org.apache.tika.parser.pdf.PDFParser.java
License:Apache License
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { PDDocument pdfDocument = null;/*from w ww . ja v a 2s.co m*/ TemporaryResources tmp = new TemporaryResources(); //config from context, or default if not set via context PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig); String password = ""; try { // PDFBox can process entirely in memory, or can use a temp file // for unpacked / processed resources // Decide which to do based on if we're reading from a file or not already TikaInputStream tstream = TikaInputStream.cast(stream); password = getPassword(metadata, context); if (tstream != null && tstream.hasFile()) { // File based, take that as a cue to use a temporary file RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw"); if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile, password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true); } } else { // Go for the normal, stream based in-memory parsing if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer(), password); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); } } metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted())); //if using the classic parser and the doc is encrypted, we must manually decrypt if (!localConfig.getUseNonSequentialParser() && pdfDocument.isEncrypted()) { pdfDocument.decrypt(password); } metadata.set(Metadata.CONTENT_TYPE, "application/pdf"); extractMetadata(pdfDocument, metadata); AccessChecker checker = localConfig.getAccessChecker(); checker.check(metadata); if (handler != null) { if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata); } else { PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } } } catch (CryptographyException e) { //seq parser throws CryptographyException for bad password throw new EncryptedDocumentException(e); } catch (IOException e) { //nonseq parser throws IOException for bad password //At the Tika level, we want the same exception to be thrown if (e.getMessage() != null && e.getMessage().contains("Error (CryptographyException)")) { metadata.set("pdf:encrypted", Boolean.toString(true)); throw new EncryptedDocumentException(e); } //rethrow any other IOExceptions throw e; } finally { if (pdfDocument != null) { pdfDocument.close(); } tmp.dispose(); //TODO: once we migrate to PDFBox 2.0, remove this (PDFBOX-2200) PDFont.clearResources(); } }
From source file:org.lockss.pdf.MockPdfTokenStream.java
License:Open Source License
/** * <p>/* w ww . j a v a 2s . c o m*/ * Makes a fake PDF token stream from parsing the given input stream. * </p> * * @param inputStream * An input stream of PDF token stream source. * @throws IOException * if parsing fails or an I/O error occurs. * @since 1.67 */ public MockPdfTokenStream(InputStream inputStream) throws IOException { PDFStreamParser parser = new PDFStreamParser(inputStream, new RandomAccessBuffer()); parser.parse(); List<Object> pdfBoxTokens = parser.getTokens(); this.pdfTokens = new ArrayList<PdfToken>(pdfBoxTokens.size()); for (Object pdfBoxToken : pdfBoxTokens) { this.pdfTokens.add(convert(pdfBoxToken)); } }