Example usage for org.apache.pdfbox.pdmodel PDDocument close

List of usage examples for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:org.wandora.application.tools.extractors.files.SimplePDFExtractor.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap,
        Topic pdfTopic) {/*  w ww  . j  a va 2  s  .c  o m*/
    PDDocument doc = null;
    try {
        if (locator.startsWith("http://")) {
            doc = PDDocument.load(new URL(locator));
        } else {
            doc = PDDocument.load(new File(locator));
        }
        PDDocumentInformation info = doc.getDocumentInformation();
        DateFormat dateFormatter = new SimpleDateFormat(DEFAULT_DATE_FORMAT);

        // --- PDF PRODUCER ---
        String producer = info.getProducer();
        if (producer != null && producer.length() > 0) {
            Topic producerType = createTopic(topicMap, "pdf-producer");
            setData(pdfTopic, producerType, defaultLang, producer.trim());
        }

        // --- PDF MODIFICATION DATE ---
        Calendar mCal = info.getModificationDate();
        if (mCal != null) {
            String mdate = dateFormatter.format(mCal.getTime());
            if (mdate != null && mdate.length() > 0) {
                Topic modificationDateType = createTopic(topicMap, "pdf-modification-date");
                setData(pdfTopic, modificationDateType, defaultLang, mdate.trim());
            }
        }

        // --- PDF CREATOR ---
        String creator = info.getCreator();
        if (creator != null && creator.length() > 0) {
            Topic creatorType = createTopic(topicMap, "pdf-creator");
            setData(pdfTopic, creatorType, defaultLang, creator.trim());
        }

        // --- PDF CREATION DATE ---
        Calendar cCal = info.getCreationDate();
        if (cCal != null) {
            String cdate = dateFormatter.format(cCal.getTime());
            if (cdate != null && cdate.length() > 0) {
                Topic creationDateType = createTopic(topicMap, "pdf-creation-date");
                setData(pdfTopic, creationDateType, defaultLang, cdate.trim());
            }
        }

        // --- PDF AUTHOR ---
        String author = info.getAuthor();
        if (author != null && author.length() > 0) {
            Topic authorType = createTopic(topicMap, "pdf-author");
            setData(pdfTopic, authorType, defaultLang, author.trim());
        }

        // --- PDF SUBJECT ---
        String subject = info.getSubject();
        if (subject != null && subject.length() > 0) {
            Topic subjectType = createTopic(topicMap, "pdf-subject");
            setData(pdfTopic, subjectType, defaultLang, subject.trim());
        }

        // --- PDF TITLE ---
        String title = info.getSubject();
        if (title != null && title.length() > 0) {
            if (makeVariantFromTitle) {
                pdfTopic.setDisplayName(defaultLang, title);
            } else {
                Topic titleType = createTopic(topicMap, "pdf-title");
                setData(pdfTopic, titleType, defaultLang, title.trim());
            }
        }

        // --- PDF KEYWORDS (SEPARATED WITH SEMICOLON) ---
        String keywords = info.getKeywords();
        if (keywords != null && keywords.length() > 0) {
            Topic keywordType = createTopic(topicMap, "pdf-keyword");
            String[] keywordArray = keywords.split(";");
            String keyword = null;
            for (int i = 0; i < keywordArray.length; i++) {
                keyword = Textbox.trimExtraSpaces(keywordArray[i]);
                if (keyword != null && keyword.length() > 0) {
                    Topic keywordTopic = createTopic(topicMap, keyword, keywordType);
                    createAssociation(topicMap, keywordType, new Topic[] { pdfTopic, keywordTopic });
                }
            }
        }

        // --- PDF TEXT CONTENT ---
        PDFTextStripper stripper = new PDFTextStripper();
        String content = new String();

        if (makePageTopics) {
            int pages = doc.getNumberOfPages();
            String pageContent = null;
            for (int i = 0; i < pages; i++) {
                stripper.setStartPage(i);
                stripper.setEndPage(i);
                pageContent = stripper.getText(doc);
                Topic pageType = createTopic(topicMap, "pdf-page");
                Topic pageTopic = createTopic(topicMap, pdfTopic.getBaseName() + " (page " + i + ")", pageType);
                Topic orderType = createTopic(topicMap, "order");
                Topic orderTopic = createTopic(topicMap, i + ".", orderType);
                Topic contentType = createTopic(topicMap, "pdf-text");
                setData(pageTopic, contentType, defaultLang, pageContent.trim());
                createAssociation(topicMap, pageType, new Topic[] { pdfTopic, pageTopic, orderTopic });
            }
        } else {
            content = stripper.getText(doc);
        }

        if (!makePageTopics && content != null && content.length() > 0) {
            Topic contentType = createTopic(topicMap, "pdf-text");
            setData(pdfTopic, contentType, defaultLang, content.trim());
        }
        doc.close();
    } catch (Exception e) {
        e.printStackTrace();
        try {
            if (doc != null)
                doc.close();
        } catch (Exception ix) {
            e.printStackTrace();
        }
    }
}

From source file:org.wandora.application.tools.extractors.fng.ExtractFNGTextEnrichment.java

License:Open Source License

public void _extractTopicsFromStream(String locator, InputStream inputStream, TopicMap topicMap,
        Topic textTopic) {/*from   w w w . j a  v a 2s . c om*/
    try {
        String lowerCaseLocator = locator.toLowerCase();

        // --- HANDLE PDF ENRICHMENT TEXT ---
        if (lowerCaseLocator.endsWith("pdf")) {

            PDDocument doc = PDDocument.load(new URL(locator));
            PDDocumentInformation info = doc.getDocumentInformation();

            // --- PDF SUBJECT ---
            String subject = info.getSubject();
            if (subject != null && subject.length() > 0) {
                Topic subjectType = createTopic(topicMap, "subject");
                setData(textTopic, subjectType, defaultLang, subject.trim());
            }

            // --- PDF TITLE ---
            String title = info.getTitle();
            if (title != null && title.length() > 0) {
                Topic titleType = createTopic(topicMap, "title");
                setData(textTopic, titleType, defaultLang, title.trim());
            }

            // --- PDF KEYWORDS ---
            String keywords = info.getKeywords();
            if (keywords != null && keywords.length() > 0) {
                Topic keywordType = createTopic(topicMap, "keywords");
                setData(textTopic, keywordType, defaultLang, keywords.trim());
            }

            // --- PDF TEXT CONTENT ---
            PDFTextStripper stripper = new PDFTextStripper();
            String content = stripper.getText(doc);
            setTextEnrichment(textTopic, topicMap, content);
            doc.close();
        }

        // --- HANDLE RTF DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("rtf")) {
            String content = Textbox.RTF2PlainText(inputStream);
            setTextEnrichment(textTopic, topicMap, content);
        }

        // --- HANDLE OFFICE DOCUMENTS ---
        else if (lowerCaseLocator.endsWith("doc") || lowerCaseLocator.endsWith("docx")
                || lowerCaseLocator.endsWith("ppt") || lowerCaseLocator.endsWith("xsl")
                || lowerCaseLocator.endsWith("vsd")) {
            String content = MSOfficeBox.getText(inputStream);
            if (content != null) {
                setTextEnrichment(textTopic, topicMap, content);
            }
        }

        // --- HANDLE TXT DOCUMENTS ---
        else {
            String content = IObox.loadFile(new InputStreamReader(inputStream));
            setTextEnrichment(textTopic, topicMap, content);
        }
    } catch (Exception e) {
        log(e);
    }
}

From source file:org.wandora.piccolo.utils.crawler.handlers.PDFHandler.java

License:Open Source License

public void handle(CrawlerAccess crawler, InputStream in, int depth, URL page) {
    try {/* w w w .  ja va 2s  . com*/
        Document d = new Document();

        PDDocument doc = PDDocument.load(page);
        PDDocumentInformation info = doc.getDocumentInformation();
        PDFTextStripper stripper = new PDFTextStripper();
        String content = stripper.getText(doc);
        doc.close();

        d.add(LuceneCrawler.subject(info.getSubject()));
        d.add(LuceneCrawler.title(info.getTitle()));
        d.add(LuceneCrawler.keywords(info.getKeywords()));
        d.add(LuceneCrawler.content(content));
        d.add(LuceneCrawler.location(page.toString()));

        crawler.addObject(d);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.wandora.utils.PDFbox.java

License:Open Source License

public static String extractTextOutOfPDF(String url) {
    PDDocument doc = null;
    try {// www .ja  va  2  s .com
        if (url.startsWith("file:")) {
            doc = PDDocument.load(new File(url));
        } else {
            doc = PDDocument.load(new URL(url));
        }
        PDFTextStripper stripper = new PDFTextStripper();
        String content = stripper.getText(doc);
        doc.close();
        return content;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:org.wangwei.pdf.AddImageToPDF.java

License:Apache License

/**
 * Add an image to an existing PDF document.
 *
 * @param inputFile The input PDF to add the image to.
 * @param image The filename of the image to put in the PDF.
 * @param outputFile The file to write to the pdf to.
 * @throws IOException If there is an error writing the data.
 * @throws COSVisitorException If there is an error writing the PDF.
 *//*from  w  w  w  .  ja va 2 s  . c o m*/
public void createPDFFromImage(String inputFile, String image, String outputFile)
        throws IOException, COSVisitorException {
    // the document
    PDDocument doc = null;
    try {
        doc = PDDocument.load(inputFile);

        // we will add the image to the first page.
        PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(0);

        PDXObjectImage ximage = null;
        if (image.toLowerCase().endsWith(".jpg")) {
            ximage = new PDJpeg(doc, new FileInputStream(image));
        } else if (image.toLowerCase().endsWith(".tif") || image.toLowerCase().endsWith(".tiff")) {
            ximage = new PDCcitt(doc, new RandomAccessFile(new File(image), "r"));
        } else {
            BufferedImage awtImage = ImageIO.read(new File(image));
            ximage = new PDPixelMap(doc, awtImage);
        }
        PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true);

        // contentStream.drawImage(ximage, 20, 20 );
        // better method inspired by http://stackoverflow.com/a/22318681/535646
        float scale = 1f; // reduce this value if the image is too large
        contentStream.drawXObject(ximage, 20, 20, ximage.getWidth() * scale, ximage.getHeight() * scale);

        contentStream.close();
        doc.save(outputFile);
    } finally {
        if (doc != null) {
            doc.close();
        }
    }
}

From source file:org.wso2.carbon.apimgt.impl.reportgen.ReportGenerator.java

License:Open Source License

/**
 * Generate PDF file for API microgateway request summary
 *
 * @param table object containing table headers and row data
 * @return InputStream pdf as a stream/*from   w  w  w  .  ja v a 2s  .  c om*/
 * @throws IOException
 * @throws COSVisitorException
 */
public InputStream generateMGRequestSummeryPDF(TableData table) throws IOException, COSVisitorException {

    String[] columnHeaders = table.getColumnHeaders();

    PDDocument document = new PDDocument();
    PDPage page = new PDPage();
    page.setMediaBox(PDPage.PAGE_SIZE_A4);
    page.setRotation(0);
    document.addPage(page);

    PDPageContentStream contentStream = new PDPageContentStream(document, page, false, false);

    // add logo
    InputStream in = APIManagerComponent.class.getResourceAsStream("/report/wso2-logo.jpg");
    PDJpeg img = new PDJpeg(document, in);
    contentStream.drawImage(img, 375, 755);

    // Add topic
    contentStream.setFont(PDType1Font.HELVETICA_BOLD, 16);
    writeContent(contentStream, CELL_MARGIN, 770, "API Microgateway request summary");

    // Add generated time
    contentStream.setFont(PDType1Font.HELVETICA_BOLD, FONT_SIZE);
    writeContent(contentStream, CELL_MARGIN, 730, "Report generated on: " + new Date().toString());

    contentStream.setFont(TEXT_FONT, FONT_SIZE);

    // add table with data
    drowTableGrid(contentStream, table.getRows().size());
    writeRowsContent(contentStream, columnHeaders, table.getRows());

    // Add meta data
    // Whenever the summary report structure is updated this should be changed
    String requestCount = table.getRows().get(0).getEntries().get(2);
    document.getDocumentInformation().setCustomMetadataValue(MGW_META, getMetaCount(requestCount));

    contentStream.close();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    document.save(out);
    document.close();

    return new ByteArrayInputStream(out.toByteArray());

}

From source file:org.xcmis.renditions.impl.PDFDocumentRenditionProvider.java

License:Open Source License

/**
 * {@inheritDoc}/*from   w  w  w  .ja  v a2  s.  com*/
 */
public RenditionContentStream getRenditionStream(ContentStream stream) throws IOException {
    PDDocument pdf = null;
    try {
        pdf = PDDocument.load(stream.getStream());
        PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(0);
        BufferedImage image = page.convertToImage();
        // Determine scale and be sure both width and height are not greater the max
        int scale = (int) Math.max(Math.floor((image.getHeight() / maxHeight) + 1.0d),
                Math.floor((image.getWidth() / maxWidth) + 1.0d));
        int height = image.getHeight() / scale;
        int width = image.getWidth() / scale;
        BufferedImage scaledImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
        Graphics2D graphics2D = scaledImage.createGraphics();
        graphics2D.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
                RenderingHints.VALUE_INTERPOLATION_BILINEAR);
        graphics2D.drawImage(image, 0, 0, width, height, null);
        graphics2D.dispose();

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ImageIO.write(scaledImage, "png", out);
        RenditionContentStream renditionStream = new RenditionContentStream(out.toByteArray(), null,
                new MimeType("image", " png"), getKind(), height, width);
        return renditionStream;
    } finally {
        if (pdf != null) {
            pdf.close();
        }
    }
}

From source file:org.xstudiosys.pdfxmp.AddMetadataFromDocInfo.java

License:Apache License

/**
 * This will print the documents data.// w ww  . ja v  a 2 s  .c o m
 *
 * @param args The command line arguments.
 *
 * @throws Exception If there is an error parsing the document.
 */
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        usage();
    } else {
        PDDocument document = null;

        try {
            document = PDDocument.load(args[0]);
            if (document.isEncrypted()) {
                System.err.println("Error: Cannot add metadata to encrypted document.");
                System.exit(1);
            }
            PDDocumentCatalog catalog = document.getDocumentCatalog();
            PDDocumentInformation info = document.getDocumentInformation();

            XMPMetadata metadata = new XMPMetadata();

            XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
            pdfSchema.setKeywords(info.getKeywords());
            pdfSchema.setProducer(info.getProducer());

            XMPSchemaBasic basicSchema = metadata.addBasicSchema();
            basicSchema.setModifyDate(info.getModificationDate());
            basicSchema.setCreateDate(info.getCreationDate());
            basicSchema.setCreatorTool(info.getCreator());
            basicSchema.setMetadataDate(new GregorianCalendar());

            XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
            dcSchema.setTitle(info.getTitle());
            dcSchema.addCreator("PDFBox");
            dcSchema.setDescription(info.getSubject());

            PDMetadata metadataStream = new PDMetadata(document);
            metadataStream.importXMPMetadata(metadata);
            catalog.setMetadata(metadataStream);

            document.save(args[1]);
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }
}

From source file:org.xstudiosys.pdfxmp.Main.java

License:Open Source License

public static void writeInfoDictionary(FileInputStream in, String outputFile, byte[] xmp)
        throws IOException, COSVisitorException {

    PDFParser parser = new PDFParser(in);
    parser.parse();/*from  w  w w  .  ja  v  a  2s  .  c  o  m*/

    PDDocument document = parser.getPDDocument();
    PDDocumentInformation info = document.getDocumentInformation();
    /*
    for (Entry<String, String> entry : XmpUtils.toInfo(xmp).entrySet()) {
       info.setCustomMetadataValue(entry.getKey(), entry.getValue());
    }
    */
    document.setDocumentInformation(info);
    document.save(outputFile);
    document.close();
}

From source file:org.xstudiosys.pdfxmp.XMPUtil.java

License:Open Source License

/**
 * Try to read the given BibTexEntry from the XMP-stream of the given
 * inputstream containing a PDF-file.//w w  w  . ja va2 s .  c  o m
 * 
 * @param inputStream
 *            The inputstream to read from.
 * 
 * @throws IOException
 *             Throws an IOException if the file cannot be read, so the user
 *             than remove a lock or cancel the operation.
 */
@SuppressWarnings("unchecked")
public static List<BibtexEntry> readXMP(InputStream inputStream) throws IOException {

    List<BibtexEntry> result = new LinkedList<BibtexEntry>();

    PDDocument document = null;

    try {
        document = PDDocument.load(inputStream);
        if (document.isEncrypted()) {
            throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
        }

        XMPMetadata meta = getXMPMetadata(document);

        // If we did not find any XMP metadata, search for non XMP metadata
        if (meta != null) {

            List<XMPSchema> schemas = meta.getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);

            for (XMPSchema schema : schemas) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) schema;

                result.add(bib.getBibtexEntry());
            }

            // If we did not find anything have a look if a Dublin Core exists
            if (result.size() == 0) {
                schemas = meta.getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                for (XMPSchema schema : schemas) {
                    XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schema;

                    BibtexEntry entry = getBibtexEntryFromDublinCore(dc);

                    if (entry != null)
                        result.add(entry);
                }
            }
        }
        if (result.size() == 0) {
            BibtexEntry entry = getBibtexEntryFromDocumentInformation(document.getDocumentInformation());

            if (entry != null)
                result.add(entry);
        }
    } finally {
        if (document != null)
            document.close();
    }

    // return null, if no metadata was found
    if (result.size() == 0)
        return null;
    return result;
}