Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog.

Prototype

public PDDocumentCatalog getDocumentCatalog() 

Source Link

Document

This will get the document CATALOG.

Usage

From source file:hightlighting.PDFTextAnnotator.java

License:Apache License

/**
 * Highlights a pattern within the PDF with the default color 
 * Returns the list of added annotations for further modification
 * Note: it will process every page, but cannot process patterns that span multiple pages 
 * Note: it will not work for top-bottom text (such as Chinese)
 * /*from   w  w w  .  ja  va 2 s .  com*/
 * @param pdf
 *          PDDocument
 * @param pattern
 *          Pattern (regex)
 * @throws Exception
 */
public List<PDAnnotationTextMarkup> highlight(PDDocument pdf, Pattern pattern) throws Exception {
    if (textCache == null) {
        throw new Exception("TextCache was not initilized, please run initialize on the document first");
    }

    List<PDPage> pages = pdf.getDocumentCatalog().getAllPages();

    ArrayList<PDAnnotationTextMarkup> highligts = new ArrayList<PDAnnotationTextMarkup>();

    for (int pageIndex = getStartPage() - 1; pageIndex < getEndPage()
            && pageIndex < pages.size(); pageIndex++) {
        PDPage page = pages.get(pageIndex);
        List<PDAnnotation> annotations = page.getAnnotations();

        List<Match> matches = this.textCache.getTextPositions(pageIndex + 1, pattern);

        for (Match match : matches) {
            List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(match.positions);

            if (textBoundingBoxes.size() > 0) {

                float[] quads = new float[8];
                int cursor = 0;
                for (PDRectangle rect : textBoundingBoxes) {
                    PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(
                            PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
                    markup.setRectangle(rect);
                    float[] tmp = computeQuads(rect);
                    for (int i = 0; i < tmp.length; i++) {
                        quads[cursor + i] = tmp[i];
                    }
                    //cursor = cursor + 8;
                    markup.setQuadPoints(quads);

                    markup.setConstantOpacity((float) 0.8);
                    markup.setColour(getDefaultColor());
                    markup.setPrinted(true);
                    markup.setContents(match.str);

                    annotations.add(markup);
                    highligts.add(markup);
                }
            }
        }
    }
    return highligts;
}

From source file:indexer.Indexer.java

public static void writePDF(String file_location, Map<String, Set> map) throws IOException {
    PDFMergerUtility finalDoc = new PDFMergerUtility();
    PDDocument document = PDDocument.load(file_location);
    PDPage page = (PDPage) document.getDocumentCatalog().getAllPages().get(0);
    PDPageContentStream contentStream = new PDPageContentStream(document, page, true, true);
    contentStream.beginText();//ww w . j a  v  a 2 s . c  om
    contentStream.setFont(PDType1Font.HELVETICA, 12);

    for (Map.Entry<String, Set> entry : map.entrySet()) {
        contentStream.drawString(entry.getKey() + "- " + entry.getValue().toString());
    }

    contentStream.endText();
    contentStream.close();
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try {
        document.save(out);
    } catch (COSVisitorException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex);
    }
    finalDoc.addSource(new ByteArrayInputStream(out.toByteArray()));
    document.close();
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java

License:Open Source License

private static void attachZugferdFile(PDDocument doc, InputStream zugferdFile) throws IOException {
    PDEmbeddedFilesNameTreeNode fileNameTreeNode = new PDEmbeddedFilesNameTreeNode();

    PDEmbeddedFile embeddedFile = createEmbeddedFile(doc, zugferdFile);
    PDComplexFileSpecification fileSpecification = createFileSpecification(embeddedFile);

    COSDictionary dict = fileSpecification.getCOSDictionary();
    dict.setName("AFRelationship", "Alternative");
    dict.setString("UF", ZF_FILE_NAME);

    fileNameTreeNode.setNames(singletonMap(ZF_FILE_NAME, fileSpecification));

    setNamesDictionary(doc, fileNameTreeNode);

    COSArray cosArray = new COSArray();
    cosArray.add(fileSpecification);/*from   ww  w  . ja v  a  2 s  .c o  m*/
    doc.getDocumentCatalog().getCOSDictionary().setItem("AF", cosArray);
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java

License:Open Source License

private static void setNamesDictionary(PDDocument doc, PDEmbeddedFilesNameTreeNode fileNameTreeNode) {
    PDDocumentCatalog documentCatalog = doc.getDocumentCatalog();
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog);
    namesDictionary.setEmbeddedFiles(fileNameTreeNode);
    documentCatalog.setNames(namesDictionary);
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java

License:Open Source License

private void setMetadata(PDDocument doc, AppendParameter appendParameter)
        throws IOException, TransformerException, BadFieldValueException, XmpSerializationException {
    Calendar now = Calendar.getInstance();
    PDDocumentCatalog catalog = doc.getDocumentCatalog();

    PDMetadata metadata = new PDMetadata(doc);
    catalog.setMetadata(metadata);//from www . j av  a2  s .  c  o m

    XMPMetadata xmp = XMPMetadata.createXMPMetadata();
    PDFAIdentificationSchema pdfaid = new PDFAIdentificationSchema(xmp);
    pdfaid.setPart(Integer.valueOf(3));
    pdfaid.setConformance("B");
    xmp.addSchema(pdfaid);

    DublinCoreSchema dublicCore = new DublinCoreSchema(xmp);
    xmp.addSchema(dublicCore);

    XMPBasicSchema basicSchema = new XMPBasicSchema(xmp);
    basicSchema.setCreatorTool(PRODUCER);
    basicSchema.setCreateDate(now);
    xmp.addSchema(basicSchema);

    PDDocumentInformation pdi = doc.getDocumentInformation();
    pdi.setModificationDate(now);
    pdi.setProducer(PRODUCER);
    pdi.setAuthor(getAuthor());
    doc.setDocumentInformation(pdi);

    AdobePDFSchema pdf = new AdobePDFSchema(xmp);
    pdf.setProducer(PRODUCER);
    xmp.addSchema(pdf);

    PDMarkInfo markinfo = new PDMarkInfo();
    markinfo.setMarked(true);
    doc.getDocumentCatalog().setMarkInfo(markinfo);

    xmp.addSchema(zfDefaultXmp.getPDFExtensionSchema());
    XMPSchemaZugferd1p0 zf = new XMPSchemaZugferd1p0(xmp);
    zf.setConformanceLevel(appendParameter.zugferdConformanceLevel());
    zf.setVersion(appendParameter.zugferdVersion());
    xmp.addSchema(zf);

    new XmpSerializer().serialize(xmp, metadata.createOutputStream(), true);
}

From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceExtractor.java

License:Open Source License

private static final InputStream extractZugferdFileAttachment(PDDocument doc) throws IOException {
    PDDocumentNameDictionary nameDictionary = new PDDocumentNameDictionary(doc.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode embeddedFiles = listEmbeddedFiles(nameDictionary);
    return extractZugferdXmlAttachment(embeddedFiles);
}

From source file:jeanderson.br.converte.Converte.java

public static void converter(String nome) {
    File arquivo = new File(
            "/Users/" + System.getProperty("user.name") + "/Google Drive/Notas Fiscais/PDF/" + nome + ".pdf");
    if (arquivo.exists()) {
        try {/*  w ww  . jav  a  2s  .  com*/
            PDDocument documento = PDDocument.load(arquivo);
            List<PDPage> lista = documento.getDocumentCatalog().getAllPages();
            int numeroDePagina = 1;
            for (PDPage paginas : lista) {
                BufferedImage imagem = paginas.convertToImage();
                File saida = new File("/Users/" + System.getProperty("user.name")
                        + "/Google Drive/Notas Fiscais/PNG/" + nome + numeroDePagina + ".png");
                ImageIO.write(imagem, "png", saida);
                numeroDePagina++;
            }
            documento.close();
        } catch (IOException ex) {
            Relatar.bug(Converte.class.getName(), ex.toString());
            Logger.getLogger(Converte.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:main.java.vasolsim.common.GenericUtils.java

License:Open Source License

/**
 * renders a pdf to images//from ww  w  .j a v  a 2  s  .  co m
 *
 * @param file pdf file
 *
 * @return images
 *
 * @throws IOException
 */
public static BufferedImage[] renderPDF(File file) throws IOException {
    PDDocument doc = PDDocument.load(file);
    @SuppressWarnings("unchecked")
    List<PDPage> pages = doc.getDocumentCatalog().getAllPages();
    Iterator<PDPage> iterator = pages.iterator();
    BufferedImage[] images = new BufferedImage[pages.size()];
    for (int i = 0; iterator.hasNext(); i++)
        images[i] = iterator.next().convertToImage();

    doc.close();

    return images;
}

From source file:main.java.vasolsim.tclient.form.QuestionSetNode.java

License:Open Source License

public void redrawNode(boolean apply) {
    HBox horizontalRoot = new HBox();

    VBox verticalRoot = new VBox();
    verticalRoot.getStyleClass().add("borders");
    horizontalRoot.getChildren().add(verticalRoot);

    Label questionSetInfoLabel = new Label(TeacherClient.QUESTION_SET_INFO_LABEL_TEXT);
    questionSetInfoLabel.getStyleClass().add("lbltext");

    Label questionSetNameLabel = new Label(TeacherClient.QUESTION_SET_NAME_LABEL_TEXT);
    questionSetNameLabel.getStyleClass().add("lbltext");

    final TextField questionSetNameField = new TextField();
    questionSetNameField.setPrefWidth(400);

    Button applyNameButton = new Button("Apply");

    HBox spacer = new HBox();
    spacer.setPrefHeight(2);//from   w w w  .  j a  v a2 s  . com
    spacer.setPrefWidth(2000);
    spacer.getStyleClass().add("lblspacer");

    Label resourceFileInfoLabel = new Label(TeacherClient.RESOURCE_FILE_INFO_LABEL_TEXT);
    resourceFileInfoLabel.getStyleClass().add("lbltext");
    resourceFileInfoLabel.setWrapText(true);

    final Label resourceFileLabel = new Label(lastPath == null ? "File: none" : "File: " + lastPath);
    resourceFileLabel.getStyleClass().add("lbltext");
    resourceFileLabel.setWrapText(true);

    HBox buttonBox = new HBox();
    buttonBox.getStyleClass().add("helementspacing");

    Button loadResourceButton = new Button("Load");
    Button removeResourceButton = new Button("Remove");

    buttonBox.getChildren().addAll(loadResourceButton, removeResourceButton);

    TilePane imageContainer = new TilePane();
    imageContainer.setPrefWidth(2000);
    imageContainer.setVgap(10);
    imageContainer.setHgap(10);
    if (qSet.getResources() != null)
        for (Image i : qSet.getFxResources()) {
            ImageView iv = new ImageView(i);
            iv.setPreserveRatio(true);
            iv.setFitWidth(150);
            iv.getStyleClass().add("pic");
            imageContainer.getChildren().add(iv);
        }
    else {
        Label noImg = new Label("no resource to display");
        noImg.getStyleClass().add("lbltext");
        noImg.setWrapText(true);
        imageContainer.getChildren().add(noImg);
    }

    final ProgressBar bar = new ProgressBar();
    bar.managedProperty().bind(bar.visibleProperty());
    bar.setVisible(false);
    bar.setPrefWidth(2000);

    /*
     * add elements
     */
    verticalRoot.getChildren().addAll(questionSetInfoLabel, questionSetNameLabel, questionSetNameField,
            applyNameButton, spacer, resourceFileLabel, bar, imageContainer, buttonBox);

    /*
     * Init listeners
     */
    applyNameButton.setOnMouseClicked(new EventHandler<MouseEvent>() {
        @Override
        public void handle(MouseEvent mouseEvent) {
            if (questionSetNameField.getText() != null && questionSetNameField.getText().trim().length() > 0) {
                boundTreeElement.label.setText(questionSetNameField.getText());
                questionSetNameField.clear();
            }
        }
    });

    loadResourceButton.setOnMouseClicked(new EventHandler<MouseEvent>() {
        @Override
        public void handle(MouseEvent mouseEvent) {
            FileChooser fc = new FileChooser();
            File resource = fc.showOpenDialog(TeacherClient.stage);
            String tmpPath;
            try {
                tmpPath = resource.getCanonicalPath();
            } catch (IOException e) {
                tmpPath = resource.getAbsolutePath();
            }

            if (tmpPath.equals(""))
                tmpPath = lastPath;
            else
                lastPath = tmpPath;

            final String path = tmpPath;

            Task pdfRender = new Task<Void>() {
                @Override
                protected Void call() throws Exception {
                    int maxProgress = getPDFPages(new File(path)) * 2;
                    PDDocument doc = PDDocument.load(new File(path));
                    @SuppressWarnings("unchecked")
                    List<PDPage> pages = doc.getDocumentCatalog().getAllPages();
                    Iterator<PDPage> iterator = pages.iterator();
                    BufferedImage[] images = new BufferedImage[pages.size()];
                    for (int i = 0; iterator.hasNext(); i++) {
                        images[i] = iterator.next().convertToImage();
                        updateProgress(i, maxProgress);
                    }

                    doc.close();
                    qSet.setResources(images);

                    Image[] fxImages = new Image[images.length];
                    for (int i = 0; i < images.length; i++) {
                        fxImages[i] = convertBufferedImageToFXImage(images[i]);
                        updateProgress(images.length + i, maxProgress);
                    }

                    qSet.setFxResources(fxImages);

                    return null;
                }
            };
            bar.setVisible(true);
            bar.progressProperty().bind(pdfRender.progressProperty());
            resourceFileLabel.setText("File: " + tmpPath);
            new Thread(pdfRender).start();
            //qSet.loadPDFResource(tmpPath);
            //redrawNode(true);

            pdfRender.setOnSucceeded(new EventHandler<WorkerStateEvent>() {
                @Override
                public void handle(WorkerStateEvent workerStateEvent) {
                    bar.setVisible(false);
                    qSet.setResourceType(ResourceType.PNG);
                    redrawNode(true);
                }
            });

            pdfRender.setOnFailed(new EventHandler<WorkerStateEvent>() {
                @Override
                public void handle(WorkerStateEvent workerStateEvent) {
                    bar.setVisible(false);
                    qSet.setResources(null);
                    qSet.setFxResources(null);
                    PopupManager.showMessage("PDF failed to open");
                    redrawNode(false);
                }
            });
        }
    });

    removeResourceButton.setOnMouseClicked(new EventHandler<MouseEvent>() {
        @Override
        public void handle(MouseEvent mouseEvent) {
            qSet.removeResource();
            lastPath = null;
            redrawNode(true);
        }
    });

    questionSetNode = horizontalRoot;

    if (apply) {
        CenterNode.addScrollRoot();
        CenterNode.getScrollRoot().setContent(questionSetNode);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler)
        throws IOException, SAXException, TikaException {
    PDDocumentCatalog catalog = document.getDocumentCatalog();
    PDDocumentNameDictionary names = catalog.getNames();
    if (names == null) {
        return;/*from  ww w  . ja  v a  2 s.  c o m*/
    }
    PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();

    if (embeddedFiles == null) {
        return;
    }

    EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class);
    if (embeddedExtractor == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    }

    Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames();
    // For now, try to get the embeddedFileNames out of embeddedFiles or its
    // kids.
    // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    // If there is a need we could add a fully recursive search to find a
    // non-null
    // Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames, embeddedExtractor);
    } else {
        List<PDNameTreeNode> kids = embeddedFiles.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode n : kids) {
            Map<String, COSObjectable> childNames = n.getNames();
            if (childNames != null) {
                processEmbeddedDocNames(childNames, embeddedExtractor);
            }
        }
    }
}