Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation.

Prototype

public PDDocumentInformation getDocumentInformation()

Source Link

Document

This will get the document info dictionary.

Usage

From source file:fr.univ_tours.etu.searcher.LucenePDFDocument.java

License:Apache License

/**
 * This will add the contents to the lucene document.
 *
 * @param document The document to add the contents to.
 * @param is The stream to get the contents from.
 * @param documentLocation The location of the document, used just for debug messages.
 *
 * @throws IOException If there is an error parsing the document.
 *///from   w  w w.jav a2 s  . c  o  m
private void addContent(Document document, InputStream is, String documentLocation) throws IOException {
    PDDocument pdfDocument = null;
    try {
        pdfDocument = PDDocument.load(is);

        // create a writer where to append the text content.
        StringWriter writer = new StringWriter();
        if (stripper == null) {
            stripper = new PDFTextStripper();
        }
        stripper.writeText(pdfDocument, writer);

        // Note: the buffer to string operation is costless;
        // the char array value of the writer buffer and the content string
        // is shared as long as the buffer content is not modified, which will
        // not occur here.
        String contents = writer.getBuffer().toString();

        StringReader reader = new StringReader(contents);

        // Add the tag-stripped contents as a Reader-valued Text field so it will
        // get tokenized and indexed.
        addTextField(document, "contents", reader);

        PDDocumentInformation info = pdfDocument.getDocumentInformation();
        if (info != null) {
            addTextField(document, "Author", info.getAuthor());
            addTextField(document, "CreationDate", info.getCreationDate());
            addTextField(document, "Creator", info.getCreator());
            addTextField(document, "Keywords", info.getKeywords());
            addTextField(document, "ModificationDate", info.getModificationDate());
            addTextField(document, "Producer", info.getProducer());
            addTextField(document, "Subject", info.getSubject());
            addTextField(document, "Title", info.getTitle());
            addTextField(document, "Trapped", info.getTrapped());
        }
        int summarySize = Math.min(contents.length(), 1500);
        String summary = contents.substring(0, summarySize);
        // Add the summary as an UnIndexed field, so that it is stored and returned
        // with hit documents for display.
        addUnindexedField(document, "summary", summary);
    } finally {
        if (pdfDocument != null) {
            pdfDocument.close();
        }
    }
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java

License:Open Source License

private void setMetadata(PDDocument doc, AppendParameter appendParameter)
        throws IOException, TransformerException, BadFieldValueException, XmpSerializationException {
    Calendar now = Calendar.getInstance();
    PDDocumentCatalog catalog = doc.getDocumentCatalog();

    PDMetadata metadata = new PDMetadata(doc);
    catalog.setMetadata(metadata);//from w  w  w.  j  a  v  a 2 s. co  m

    XMPMetadata xmp = XMPMetadata.createXMPMetadata();
    PDFAIdentificationSchema pdfaid = new PDFAIdentificationSchema(xmp);
    pdfaid.setPart(Integer.valueOf(3));
    pdfaid.setConformance("B");
    xmp.addSchema(pdfaid);

    DublinCoreSchema dublicCore = new DublinCoreSchema(xmp);
    xmp.addSchema(dublicCore);

    XMPBasicSchema basicSchema = new XMPBasicSchema(xmp);
    basicSchema.setCreatorTool(PRODUCER);
    basicSchema.setCreateDate(now);
    xmp.addSchema(basicSchema);

    PDDocumentInformation pdi = doc.getDocumentInformation();
    pdi.setModificationDate(now);
    pdi.setProducer(PRODUCER);
    pdi.setAuthor(getAuthor());
    doc.setDocumentInformation(pdi);

    AdobePDFSchema pdf = new AdobePDFSchema(xmp);
    pdf.setProducer(PRODUCER);
    xmp.addSchema(pdf);

    PDMarkInfo markinfo = new PDMarkInfo();
    markinfo.setMarked(true);
    doc.getDocumentCatalog().setMarkInfo(markinfo);

    xmp.addSchema(zfDefaultXmp.getPDFExtensionSchema());
    XMPSchemaZugferd1p0 zf = new XMPSchemaZugferd1p0(xmp);
    zf.setConformanceLevel(appendParameter.zugferdConformanceLevel());
    zf.setVersion(appendParameter.zugferdVersion());
    xmp.addSchema(zf);

    new XmpSerializer().serialize(xmp, metadata.createOutputStream(), true);
}

From source file:merge_split.MergeSplit.java

License:Apache License

private void MergeButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_MergeButtonActionPerformed
    try {/*www.  ja va2 s .  c  om*/
        PDDocument samplePdf = new PDDocument();
        ArrayList<PDDocument> list = new ArrayList<>();
        for (int i = 0; i < dtm.getRowCount(); i++) {
            File file = new File((String) dtm.getValueAt(i, 0));
            String code = (String) dtm.getValueAt(i, 3);
            PDDocument doc1;
            if (code.equals("ok")) {
                doc1 = PDDocument.load(file);
            } else {
                doc1 = PDDocument.load(file, code);

            }
            list.add(doc1);
            doc1.setAllSecurityToBeRemoved(true);
            TreeSet tree = findPages((String) dtm.getValueAt(i, 2));
            for (int j = 0; j < doc1.getNumberOfPages(); j++) {
                if (tree.contains(j + 1)) {
                    samplePdf.addPage(doc1.getPage(j));
                }

            }

        }
        System.out.println("Number:" + samplePdf.getNumberOfPages());

        String destination = jTextField1.getText() + "\\" + jTextField2.getText() + ".pdf";
        PDDocumentInformation info = samplePdf.getDocumentInformation();
        info.setAuthor(jTextField3.getText());
        File output = new File(destination);

        samplePdf.save(output);

        samplePdf.close();
        for (int i = 0; i < list.size(); i++) {
            list.get(i).close();
        }
    } catch (IOException ex) {

        JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.",
                "Input warning", JOptionPane.WARNING_MESSAGE);
    }

}

From source file:merge_split.MergeSplit.java

License:Apache License

private void RotateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButtonActionPerformed
    try {/*ww  w.j av  a 2s. c om*/

        PDDocument samplePdf = new PDDocument();
        File file = new File(RotateFileField.getText());
        PDDocument doc1;
        if (rotatecode.equals("ok")) {
            doc1 = PDDocument.load(file);
        } else {
            doc1 = PDDocument.load(file, rotatecode);

        }
        doc1.setAllSecurityToBeRemoved(true);
        TreeSet tree = findPages(RotatePagesField.getText());
        for (int j = 0; j < doc1.getNumberOfPages(); j++) {
            PDPage page = doc1.getPage(j);

            if (tree.contains(j + 1)) {

                if (Rotate90.isSelected()) {
                    page.setRotation(90);
                    samplePdf.addPage(page);
                } else if (Rotate180.isSelected()) {
                    page.setRotation(180);
                    samplePdf.addPage(page);
                } else if (Rotate270.isSelected()) {
                    page.setRotation(270);
                    samplePdf.addPage(page);
                }
            } else {
                samplePdf.addPage(page);

            }

        }

        System.out.println("Number:" + samplePdf.getNumberOfPages());

        String destination = RotateDestinationField.getText() + "\\" + RotateNameField.getText() + ".pdf";
        PDDocumentInformation info = samplePdf.getDocumentInformation();
        info.setAuthor(RotateAuthorField.getText());
        File output = new File(destination);

        samplePdf.save(output);

        samplePdf.close();
    } catch (IOException ex) {
        Logger.getLogger(MergeSplit.class.getName()).log(Level.SEVERE, null, ex);

        JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.",
                "Input warning", JOptionPane.WARNING_MESSAGE);
    }
}

From source file:merge_split.MergeSplit.java

License:Apache License

private void RotateButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButton1ActionPerformed
    PDDocument document = new PDDocument();
    InputStream in = null;//from  w ww. ja va  2s.c o m
    BufferedImage bimg = null;

    try {
        in = new FileInputStream((String) ImageFileField.getText());

        bimg = ImageIO.read(in);
    } catch (IOException ex) {
        JOptionPane.showMessageDialog(null, "Image could not be read.", "Image could not be read",
                JOptionPane.WARNING_MESSAGE);
    }
    float width = bimg.getWidth();
    float height = bimg.getHeight();
    PDPage page = new PDPage(new PDRectangle(width, height));
    document.addPage(page);
    PDImageXObject imgpdf;
    try {
        imgpdf = PDImageXObject.createFromFile((String) ImageFileField.getText(), document);

        try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
            contentStream.drawImage(imgpdf, 0, 0);
        }

        in.close();
    } catch (IOException ex) {
        JOptionPane.showMessageDialog(null, "Image could not be converted.", "Proccess could not be finished",
                JOptionPane.WARNING_MESSAGE);
    }
    String destination = ImageDestinationField.getText() + "\\" + ImageNameField.getText() + ".pdf";
    PDDocumentInformation info = document.getDocumentInformation();
    info.setAuthor(ImageAuthorField.getText());
    File output = new File(destination);

    try {
        document.save(output);
    } catch (IOException ex) {
        JOptionPane.showMessageDialog(null, "Not all fields were filled.", "Input Problem",
                JOptionPane.WARNING_MESSAGE);
    }
    try {
        document.close();
    } catch (IOException ex) {
        JOptionPane.showMessageDialog(null, "Not all fields were filled.", "Input Problem",
                JOptionPane.WARNING_MESSAGE);
    }
}

From source file:merge_split.MergeSplit.java

License:Apache License

private void SplitButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SplitButtonActionPerformed
    try {/*from ww  w  .  j  a va 2  s .  c o m*/

        File file = new File(SplitFileField.getText());
        PDDocument doc1;
        if (splitcode.equals("ok")) {
            doc1 = PDDocument.load(file);
        } else {
            doc1 = PDDocument.load(file, splitcode);

        }
        doc1.setAllSecurityToBeRemoved(true);

        if (MultipleButton.isSelected()) {
            PDDocument pdf1 = new PDDocument();
            PDDocument pdf2 = new PDDocument();
            TreeSet tree = findPages(SplitPagesField.getText());
            for (int j = 0; j < doc1.getNumberOfPages(); j++) {
                PDPage page = doc1.getPage(j);
                if (tree.contains(j + 1)) {
                    pdf1.addPage(page);
                } else {
                    pdf2.addPage(page);

                }
            }
            String destination1 = SplitDestinationField.getText() + "\\" + SplitNameField.getText() + "1.pdf";
            String destination2 = SplitDestinationField.getText() + "\\" + SplitNameField.getText() + "2.pdf";

            PDDocumentInformation info = pdf1.getDocumentInformation();
            info.setAuthor(SplitAuthorField.getText());
            PDDocumentInformation info2 = pdf2.getDocumentInformation();
            info2.setAuthor(SplitAuthorField.getText());
            if (pdf1.getNumberOfPages() > 0) {
                File output1 = new File(destination1);
                pdf1.save(output1);
            }
            if (pdf2.getNumberOfPages() > 0) {
                File output2 = new File(destination2);
                pdf2.save(output2);
            }
            pdf1.close();
            pdf2.close();
        } else if (SingleButton.isSelected()) {

            for (int j = 0; j < doc1.getNumberOfPages(); j++) {
                PDDocument pdf1 = new PDDocument();

                PDPage page = doc1.getPage(j);
                pdf1.addPage(page);
                int pagenumber = j + 1;
                String destination1 = SplitDestinationField.getText() + "\\" + SplitNameField.getText()
                        + pagenumber + ".pdf";

                PDDocumentInformation info = pdf1.getDocumentInformation();
                info.setAuthor(SplitAuthorField.getText());

                if (pdf1.getNumberOfPages() > 0) {
                    File output1 = new File(destination1);
                    pdf1.save(output1);
                }

                pdf1.close();
            }

        }
        doc1.close();

    } catch (IOException ex) {
        JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.",
                "Input warning", JOptionPane.WARNING_MESSAGE);
        java.util.logging.Logger.getLogger(MergeSplit.class.getName()).log(java.util.logging.Level.SEVERE, null,
                ex);

    }
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDFParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException {
    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    addMetadata(metadata, TikaCoreProperties.TITLE, info.getTitle());
    addMetadata(metadata, TikaCoreProperties.CREATOR, info.getAuthor());
    addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
    addMetadata(metadata, "producer", info.getProducer());
    // TODO: Move to description in Tika 2.0
    addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    try {//from   w ww .ja v  a2s  .c  om
        // TODO Remove these in Tika 2.0
        addMetadata(metadata, "created", info.getCreationDate());
        addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
    } catch (IOException e) {
        // Invalid date format, just ignore
    }
    try {
        Calendar modified = info.getModificationDate();
        addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
    } catch (IOException e) {
        // Invalid date format, just ignore
    }

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList(new String[] { "Author", "Creator", "CreationDate", "ModDate",
            "Keywords", "Producer", "Subject", "Title", "Trapped" });
    for (COSName key : info.getDictionary().keySet()) {
        String name = key.getName();
        if (!handledMetadata.contains(name)) {
            addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
        }
    }
}

From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java

License:Open Source License

@Override
public Reader getText(URI uri, int page, StringBuilder title)
        throws FileNotFoundException, MalformedURLException, IOException
//-----------------------------------------------------------------------------------------
{
    FileWriter writer = null;/*from  w w  w  . java  2s .com*/
    PDDocument pdf = null;
    PDFTextStripper stripper = null;
    java.io.File tmpPdf = null;
    try {
        tmpPdf = Utils.uri2File(uri);
        if (tmpPdf != null)
            pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true);
        else
            pdf = PDDocument.load(uri.toURL(), true);
        PDDocumentInformation pdfInfo = pdf.getDocumentInformation();
        String s = pdfInfo.getTitle();
        if ((s == null) || (s.length() == 0))
            s = uri.getPath();
        if (title != null)
            title.append(s);
        stripper = new PDFTextStripper();
        if (page >= 0) {
            stripper.setStartPage(page);
            stripper.setEndPage(page);
        } else {
            stripper.setStartPage(1);
            stripper.setEndPage(pdf.getNumberOfPages());
        }
        java.io.File f = java.io.File.createTempFile("pdf", ".tmp");
        writer = new FileWriter(f);
        stripper.writeText(pdf, writer);
        try {
            writer.close();
            writer = null;
        } catch (Exception _e) {
        }
        stripper.resetEngine();
        return new FileReader(f);
    } finally {
        if (stripper != null)
            try {
                stripper.resetEngine();
            } catch (Exception _e) {
            }
        if (pdf != null)
            try {
                pdf.close();
            } catch (Exception _e) {
            }
        if (writer != null)
            try {
                writer.close();
            } catch (Exception _e) {
            }
        if ((tmpPdf != null)
                && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0))
            tmpPdf.delete();
    }
}

From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java

License:Open Source License

@Override
public long index(String href, URI uri, boolean followLinks, Object... extraParams) throws IOException
//-----------------------------------------------------------------------------------------------------
{
    if (m_indexWriter == null) {
        logger.error("PDFIndexer: index writer is null");
        return -1;
    }// w  w  w . j  a  v  a 2s  . c  om
    PDDocument pdf = null;
    PDFTextStripper stripper = null;
    Reader reader = null;
    Writer writer = null;
    java.io.File tmpPdf = null;
    try {
        tmpPdf = Utils.uri2File(uri);
        if (tmpPdf != null)
            pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true);
        else
            pdf = PDDocument.load(uri.toURL(), true);
        PDDocumentInformation pdfInfo = pdf.getDocumentInformation();
        String title = pdfInfo.getTitle();
        if ((title == null) || (title.isEmpty()))
            title = uri.getPath();
        stripper = new PDFTextStripper();
        int noPages = pdf.getNumberOfPages();
        stripper.setSuppressDuplicateOverlappingText(false);
        if (noPages != PDDocument.UNKNOWN_NUMBER_OF_PAGES) {
            for (int page = 1; page <= noPages; page++) {
                stripper.setStartPage(page);
                stripper.setEndPage(page);
                writer = new StringWriter();
                stripper.writeText(pdf, writer);
                reader = new StringReader(writer.toString());
                Document doc = new Document();
                doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO));
                doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
                doc.add(new Field("page", Integer.toString(page), Field.Store.YES, Field.Index.NO));
                if (addDocument(doc))
                    AjaxIndexer.incrementCount();
                try {
                    writer.close();
                    writer = null;
                } catch (Exception _e) {
                }
                try {
                    reader.close();
                    reader = null;
                } catch (Exception _e) {
                }
                if ((page % 50) == 0) {
                    try {
                        System.runFinalization();
                        System.gc();
                    } catch (Exception _e) {
                    }
                }
            }
        } else {
            java.io.File f = java.io.File.createTempFile("pdf", ".tmp");
            writer = new FileWriter(f);
            stripper.writeText(pdf, writer);
            try {
                writer.close();
                writer = null;
            } catch (Exception _e) {
            }
            reader = new FileReader(f);
            Document doc = new Document();
            doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO));
            doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.add(new Field("page", "-1", Field.Store.YES, Field.Index.NO));
            if (addDocument(doc))
                AjaxIndexer.incrementCount();
            try {
                reader.close();
                reader = null;
            } catch (Exception _e) {
            }
            try {
                System.runFinalization();
                System.gc();
            } catch (Exception _e) {
            }
        }
        return 1;
    } catch (Exception e) {
        logger.error("Error indexing PDF text from " + uri.toString(), e);
        return -1;
    } finally {
        if (stripper != null)
            try {
                stripper.resetEngine();
            } catch (Exception _e) {
            }
        if (pdf != null)
            try {
                pdf.close();
            } catch (Exception _e) {
            }
        if (writer != null)
            try {
                writer.close();
            } catch (Exception _e) {
            }
        if (reader != null)
            try {
                reader.close();
            } catch (Exception _e) {
            }
        if ((tmpPdf != null)
                && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0))
            tmpPdf.delete();
    }
}

From source file:net.padaf.preflight.xmp.SynchronizedMetaDataValidation.java

License:Apache License

/**
 * Check if document information entries and XMP information are synchronized
 * //  w  w  w .  j  a  va 2 s .  c o  m
 * @param document
 *          the PDF Document
 * @param metadata
 *          the XMP MetaData
 * @return List of validation errors
 * @throws ValidationException
 */
public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata)
        throws ValidationException {
    List<ValidationError> ve = new ArrayList<ValidationError>();

    if (document == null) {
        throw new ValidationException("Document provided is null");
    } else {
        PDDocumentInformation dico = document.getDocumentInformation();
        if (metadata == null) {
            throw new ValidationException("Metadata provided are null");
        } else {
            DublinCoreSchema dc = metadata.getDublinCoreSchema();

            // TITLE
            analyzeTitleProperty(dico, dc, ve);
            // AUTHOR
            analyzeAuthorProperty(dico, dc, ve);
            // SUBJECT
            analyzeSubjectProperty(dico, dc, ve);

            AdobePDFSchema pdf = metadata.getAdobePDFSchema();

            // KEYWORDS
            analyzeKeywordsProperty(dico, pdf, ve);
            // PRODUCER
            analyzeProducerProperty(dico, pdf, ve);

            XMPBasicSchema xmp = metadata.getXMPBasicSchema();

            // CREATOR TOOL
            analyzeCreatorToolProperty(dico, xmp, ve);

            // CREATION DATE
            analyzeCreationDateProperty(dico, xmp, ve);

            // MODIFY DATE
            analyzeModifyDateProperty(dico, xmp, ve);

        }

    }
    return ve;
}