Example usage for org.apache.pdfbox.io RandomAccessBuffer RandomAccessBuffer

List of usage examples for org.apache.pdfbox.io RandomAccessBuffer RandomAccessBuffer

Introduction

In this page you can find the example usage for org.apache.pdfbox.io RandomAccessBuffer RandomAccessBuffer.

Prototype

public RandomAccessBuffer() 

Source Link

Document

Default constructor.

Usage

From source file:eu.europa.ec.markt.dss.signature.pdf.pdfbox.PdfBoxStream.java

License:Open Source License

public PdfBoxStream(byte[] bytes) throws IOException {
    RandomAccessBuffer storage = new RandomAccessBuffer();
    this.wrapped = new COSStream(storage);
    final OutputStream unfilteredStream = this.wrapped.createUnfilteredStream();
    unfilteredStream.write(bytes);/*  ww w.ja  v  a  2 s .  c  om*/
    unfilteredStream.flush();
}

From source file:eu.europa.esig.dss.pdf.pdfbox.PdfBoxSignatureService.java

License:Open Source License

private COSStream getStream(Map<String, COSStream> streams, Token token) throws IOException {
    COSStream stream = streams.get(token.getDSSIdAsString());
    if (stream == null) {
        RandomAccessBuffer storage = new RandomAccessBuffer();
        stream = new COSStream(storage);
        OutputStream unfilteredStream = stream.createUnfilteredStream();
        unfilteredStream.write(token.getEncoded());
        unfilteredStream.flush();//w ww.j  a  v  a 2s. com
        streams.put(token.getDSSIdAsString(), stream);
    }
    return stream;
}

From source file:eu.europa.esig.dss.pdf.pdfbox.PdfBoxStream.java

License:Open Source License

public PdfBoxStream(byte[] bytes) {
    try {/*from  w  w w . j  a  v  a 2 s . c  om*/
        RandomAccessBuffer storage = new RandomAccessBuffer();
        this.wrapped = new COSStream(storage);
        final OutputStream unfilteredStream = this.wrapped.createUnfilteredStream();
        unfilteredStream.write(bytes);
        unfilteredStream.flush();
    } catch (Exception e) {
        throw new DSSException(e);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDFParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDDocument pdfDocument = null;/*  w ww  . j  ava2s .  c  om*/
    TemporaryResources tmp = new TemporaryResources();
    // config from context, or default if not set via context
    PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);

    try {
        // PDFBox can process entirely in memory, or can use a temp file
        // for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not
        // already
        TikaInputStream tstream = TikaInputStream.cast(stream);
        if (tstream != null && tstream.hasFile()) {
            // File based, take that as a cue to use a temporary file
            RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
            }
        } else {
            // Go for the normal, stream based in-memory parsing
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream),
                        new RandomAccessBuffer());
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
        }

        if (pdfDocument.isEncrypted()) {
            String password = null;

            // Did they supply a new style Password Provider?
            PasswordProvider passwordProvider = context.get(PasswordProvider.class);
            if (passwordProvider != null) {
                password = passwordProvider.getPassword(metadata);
            }

            // Fall back on the old style metadata if set
            if (password == null && metadata.get(PASSWORD) != null) {
                password = metadata.get(PASSWORD);
            }

            // If no password is given, use an empty string as the default
            if (password == null) {
                password = "";
            }

            try {
                pdfDocument.decrypt(password);
            } catch (Exception e) {
                // Ignore
            }
        }

        metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
        extractMetadata(pdfDocument, metadata);
        PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);

    } catch (Exception e) {
        // TODO: logging
        e.printStackTrace();
    } finally {

        if (pdfDocument != null) {
            pdfDocument.close();
        }
        if (tmp != null) {
            tmp.dispose();
            tmp.close();
        }
    }
    handler.endDocument();
}

From source file:org.apache.tika.parser.pdf.PDFParser.java

License:Apache License

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    PDDocument pdfDocument = null;/*from   w  ww . ja  v a  2s.co  m*/
    TemporaryResources tmp = new TemporaryResources();
    //config from context, or default if not set via context
    PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
    String password = "";
    try {
        // PDFBox can process entirely in memory, or can use a temp file
        //  for unpacked / processed resources
        // Decide which to do based on if we're reading from a file or not already
        TikaInputStream tstream = TikaInputStream.cast(stream);
        password = getPassword(metadata, context);
        if (tstream != null && tstream.hasFile()) {
            // File based, take that as a cue to use a temporary file
            RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw");
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile, password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true);
            }
        } else {
            // Go for the normal, stream based in-memory parsing
            if (localConfig.getUseNonSequentialParser() == true) {
                pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream),
                        new RandomAccessBuffer(), password);
            } else {
                pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true);
            }
        }
        metadata.set("pdf:encrypted", Boolean.toString(pdfDocument.isEncrypted()));

        //if using the classic parser and the doc is encrypted, we must manually decrypt
        if (!localConfig.getUseNonSequentialParser() && pdfDocument.isEncrypted()) {
            pdfDocument.decrypt(password);
        }

        metadata.set(Metadata.CONTENT_TYPE, "application/pdf");
        extractMetadata(pdfDocument, metadata);

        AccessChecker checker = localConfig.getAccessChecker();
        checker.check(metadata);
        if (handler != null) {
            if (shouldHandleXFAOnly(pdfDocument, localConfig)) {
                handleXFAOnly(pdfDocument, handler, metadata);
            } else {
                PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
            }
        }

    } catch (CryptographyException e) {
        //seq parser throws CryptographyException for bad password
        throw new EncryptedDocumentException(e);
    } catch (IOException e) {
        //nonseq parser throws IOException for bad password
        //At the Tika level, we want the same exception to be thrown
        if (e.getMessage() != null && e.getMessage().contains("Error (CryptographyException)")) {
            metadata.set("pdf:encrypted", Boolean.toString(true));
            throw new EncryptedDocumentException(e);
        }
        //rethrow any other IOExceptions
        throw e;
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
        tmp.dispose();
        //TODO: once we migrate to PDFBox 2.0, remove this (PDFBOX-2200)
        PDFont.clearResources();
    }
}

From source file:org.lockss.pdf.MockPdfTokenStream.java

License:Open Source License

/**
 * <p>/* w  ww . j a  v  a 2s .  c  o  m*/
 * Makes a fake PDF token stream from parsing the given input stream.
 * </p>
 * 
 * @param inputStream
 *          An input stream of PDF token stream source.
 * @throws IOException
 *           if parsing fails or an I/O error occurs.
 * @since 1.67
 */
public MockPdfTokenStream(InputStream inputStream) throws IOException {
    PDFStreamParser parser = new PDFStreamParser(inputStream, new RandomAccessBuffer());
    parser.parse();
    List<Object> pdfBoxTokens = parser.getTokens();
    this.pdfTokens = new ArrayList<PdfToken>(pdfBoxTokens.size());
    for (Object pdfBoxToken : pdfBoxTokens) {
        this.pdfTokens.add(convert(pdfBoxToken));
    }
}