List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog
public PDDocumentCatalog getDocumentCatalog()
From source file:hightlighting.PDFTextAnnotator.java
License:Apache License
/** * Highlights a pattern within the PDF with the default color * Returns the list of added annotations for further modification * Note: it will process every page, but cannot process patterns that span multiple pages * Note: it will not work for top-bottom text (such as Chinese) * /*from w w w . ja va 2 s . com*/ * @param pdf * PDDocument * @param pattern * Pattern (regex) * @throws Exception */ public List<PDAnnotationTextMarkup> highlight(PDDocument pdf, Pattern pattern) throws Exception { if (textCache == null) { throw new Exception("TextCache was not initilized, please run initialize on the document first"); } List<PDPage> pages = pdf.getDocumentCatalog().getAllPages(); ArrayList<PDAnnotationTextMarkup> highligts = new ArrayList<PDAnnotationTextMarkup>(); for (int pageIndex = getStartPage() - 1; pageIndex < getEndPage() && pageIndex < pages.size(); pageIndex++) { PDPage page = pages.get(pageIndex); List<PDAnnotation> annotations = page.getAnnotations(); List<Match> matches = this.textCache.getTextPositions(pageIndex + 1, pattern); for (Match match : matches) { List<PDRectangle> textBoundingBoxes = getTextBoundingBoxes(match.positions); if (textBoundingBoxes.size() > 0) { float[] quads = new float[8]; int cursor = 0; for (PDRectangle rect : textBoundingBoxes) { PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup( PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT); markup.setRectangle(rect); float[] tmp = computeQuads(rect); for (int i = 0; i < tmp.length; i++) { quads[cursor + i] = tmp[i]; } //cursor = cursor + 8; markup.setQuadPoints(quads); markup.setConstantOpacity((float) 0.8); markup.setColour(getDefaultColor()); markup.setPrinted(true); markup.setContents(match.str); annotations.add(markup); highligts.add(markup); } } } } return highligts; }
From source file:indexer.Indexer.java
public static void writePDF(String file_location, Map<String, Set> map) throws IOException { PDFMergerUtility finalDoc = new PDFMergerUtility(); PDDocument document = PDDocument.load(file_location); PDPage page = (PDPage) document.getDocumentCatalog().getAllPages().get(0); PDPageContentStream contentStream = new PDPageContentStream(document, page, true, true); contentStream.beginText();//ww w . j a v a 2 s . c om contentStream.setFont(PDType1Font.HELVETICA, 12); for (Map.Entry<String, Set> entry : map.entrySet()) { contentStream.drawString(entry.getKey() + "- " + entry.getValue().toString()); } contentStream.endText(); contentStream.close(); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { document.save(out); } catch (COSVisitorException ex) { Logger.getLogger(Indexer.class.getName()).log(Level.SEVERE, null, ex); } finalDoc.addSource(new ByteArrayInputStream(out.toByteArray())); document.close(); }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java
License:Open Source License
private static void attachZugferdFile(PDDocument doc, InputStream zugferdFile) throws IOException { PDEmbeddedFilesNameTreeNode fileNameTreeNode = new PDEmbeddedFilesNameTreeNode(); PDEmbeddedFile embeddedFile = createEmbeddedFile(doc, zugferdFile); PDComplexFileSpecification fileSpecification = createFileSpecification(embeddedFile); COSDictionary dict = fileSpecification.getCOSDictionary(); dict.setName("AFRelationship", "Alternative"); dict.setString("UF", ZF_FILE_NAME); fileNameTreeNode.setNames(singletonMap(ZF_FILE_NAME, fileSpecification)); setNamesDictionary(doc, fileNameTreeNode); COSArray cosArray = new COSArray(); cosArray.add(fileSpecification);/*from ww w . ja v a 2 s .c o m*/ doc.getDocumentCatalog().getCOSDictionary().setItem("AF", cosArray); }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java
License:Open Source License
private static void setNamesDictionary(PDDocument doc, PDEmbeddedFilesNameTreeNode fileNameTreeNode) { PDDocumentCatalog documentCatalog = doc.getDocumentCatalog(); PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(documentCatalog); namesDictionary.setEmbeddedFiles(fileNameTreeNode); documentCatalog.setNames(namesDictionary); }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java
License:Open Source License
private void setMetadata(PDDocument doc, AppendParameter appendParameter) throws IOException, TransformerException, BadFieldValueException, XmpSerializationException { Calendar now = Calendar.getInstance(); PDDocumentCatalog catalog = doc.getDocumentCatalog(); PDMetadata metadata = new PDMetadata(doc); catalog.setMetadata(metadata);//from www . j av a2 s . c o m XMPMetadata xmp = XMPMetadata.createXMPMetadata(); PDFAIdentificationSchema pdfaid = new PDFAIdentificationSchema(xmp); pdfaid.setPart(Integer.valueOf(3)); pdfaid.setConformance("B"); xmp.addSchema(pdfaid); DublinCoreSchema dublicCore = new DublinCoreSchema(xmp); xmp.addSchema(dublicCore); XMPBasicSchema basicSchema = new XMPBasicSchema(xmp); basicSchema.setCreatorTool(PRODUCER); basicSchema.setCreateDate(now); xmp.addSchema(basicSchema); PDDocumentInformation pdi = doc.getDocumentInformation(); pdi.setModificationDate(now); pdi.setProducer(PRODUCER); pdi.setAuthor(getAuthor()); doc.setDocumentInformation(pdi); AdobePDFSchema pdf = new AdobePDFSchema(xmp); pdf.setProducer(PRODUCER); xmp.addSchema(pdf); PDMarkInfo markinfo = new PDMarkInfo(); markinfo.setMarked(true); doc.getDocumentCatalog().setMarkInfo(markinfo); xmp.addSchema(zfDefaultXmp.getPDFExtensionSchema()); XMPSchemaZugferd1p0 zf = new XMPSchemaZugferd1p0(xmp); zf.setConformanceLevel(appendParameter.zugferdConformanceLevel()); zf.setVersion(appendParameter.zugferdVersion()); xmp.addSchema(zf); new XmpSerializer().serialize(xmp, metadata.createOutputStream(), true); }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceExtractor.java
License:Open Source License
private static final InputStream extractZugferdFileAttachment(PDDocument doc) throws IOException { PDDocumentNameDictionary nameDictionary = new PDDocumentNameDictionary(doc.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode embeddedFiles = listEmbeddedFiles(nameDictionary); return extractZugferdXmlAttachment(embeddedFiles); }
From source file:jeanderson.br.converte.Converte.java
public static void converter(String nome) { File arquivo = new File( "/Users/" + System.getProperty("user.name") + "/Google Drive/Notas Fiscais/PDF/" + nome + ".pdf"); if (arquivo.exists()) { try {/* w ww . jav a 2s . com*/ PDDocument documento = PDDocument.load(arquivo); List<PDPage> lista = documento.getDocumentCatalog().getAllPages(); int numeroDePagina = 1; for (PDPage paginas : lista) { BufferedImage imagem = paginas.convertToImage(); File saida = new File("/Users/" + System.getProperty("user.name") + "/Google Drive/Notas Fiscais/PNG/" + nome + numeroDePagina + ".png"); ImageIO.write(imagem, "png", saida); numeroDePagina++; } documento.close(); } catch (IOException ex) { Relatar.bug(Converte.class.getName(), ex.toString()); Logger.getLogger(Converte.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:main.java.vasolsim.common.GenericUtils.java
License:Open Source License
/** * renders a pdf to images//from ww w .j a v a 2 s . co m * * @param file pdf file * * @return images * * @throws IOException */ public static BufferedImage[] renderPDF(File file) throws IOException { PDDocument doc = PDDocument.load(file); @SuppressWarnings("unchecked") List<PDPage> pages = doc.getDocumentCatalog().getAllPages(); Iterator<PDPage> iterator = pages.iterator(); BufferedImage[] images = new BufferedImage[pages.size()]; for (int i = 0; iterator.hasNext(); i++) images[i] = iterator.next().convertToImage(); doc.close(); return images; }
From source file:main.java.vasolsim.tclient.form.QuestionSetNode.java
License:Open Source License
public void redrawNode(boolean apply) { HBox horizontalRoot = new HBox(); VBox verticalRoot = new VBox(); verticalRoot.getStyleClass().add("borders"); horizontalRoot.getChildren().add(verticalRoot); Label questionSetInfoLabel = new Label(TeacherClient.QUESTION_SET_INFO_LABEL_TEXT); questionSetInfoLabel.getStyleClass().add("lbltext"); Label questionSetNameLabel = new Label(TeacherClient.QUESTION_SET_NAME_LABEL_TEXT); questionSetNameLabel.getStyleClass().add("lbltext"); final TextField questionSetNameField = new TextField(); questionSetNameField.setPrefWidth(400); Button applyNameButton = new Button("Apply"); HBox spacer = new HBox(); spacer.setPrefHeight(2);//from w w w . j a v a2 s . com spacer.setPrefWidth(2000); spacer.getStyleClass().add("lblspacer"); Label resourceFileInfoLabel = new Label(TeacherClient.RESOURCE_FILE_INFO_LABEL_TEXT); resourceFileInfoLabel.getStyleClass().add("lbltext"); resourceFileInfoLabel.setWrapText(true); final Label resourceFileLabel = new Label(lastPath == null ? "File: none" : "File: " + lastPath); resourceFileLabel.getStyleClass().add("lbltext"); resourceFileLabel.setWrapText(true); HBox buttonBox = new HBox(); buttonBox.getStyleClass().add("helementspacing"); Button loadResourceButton = new Button("Load"); Button removeResourceButton = new Button("Remove"); buttonBox.getChildren().addAll(loadResourceButton, removeResourceButton); TilePane imageContainer = new TilePane(); imageContainer.setPrefWidth(2000); imageContainer.setVgap(10); imageContainer.setHgap(10); if (qSet.getResources() != null) for (Image i : qSet.getFxResources()) { ImageView iv = new ImageView(i); iv.setPreserveRatio(true); iv.setFitWidth(150); iv.getStyleClass().add("pic"); imageContainer.getChildren().add(iv); } else { Label noImg = new Label("no resource to display"); noImg.getStyleClass().add("lbltext"); noImg.setWrapText(true); imageContainer.getChildren().add(noImg); } final ProgressBar bar = new ProgressBar(); bar.managedProperty().bind(bar.visibleProperty()); bar.setVisible(false); bar.setPrefWidth(2000); /* * add elements */ verticalRoot.getChildren().addAll(questionSetInfoLabel, questionSetNameLabel, questionSetNameField, applyNameButton, spacer, resourceFileLabel, bar, imageContainer, buttonBox); /* * Init listeners */ applyNameButton.setOnMouseClicked(new EventHandler<MouseEvent>() { @Override public void handle(MouseEvent mouseEvent) { if (questionSetNameField.getText() != null && questionSetNameField.getText().trim().length() > 0) { boundTreeElement.label.setText(questionSetNameField.getText()); questionSetNameField.clear(); } } }); loadResourceButton.setOnMouseClicked(new EventHandler<MouseEvent>() { @Override public void handle(MouseEvent mouseEvent) { FileChooser fc = new FileChooser(); File resource = fc.showOpenDialog(TeacherClient.stage); String tmpPath; try { tmpPath = resource.getCanonicalPath(); } catch (IOException e) { tmpPath = resource.getAbsolutePath(); } if (tmpPath.equals("")) tmpPath = lastPath; else lastPath = tmpPath; final String path = tmpPath; Task pdfRender = new Task<Void>() { @Override protected Void call() throws Exception { int maxProgress = getPDFPages(new File(path)) * 2; PDDocument doc = PDDocument.load(new File(path)); @SuppressWarnings("unchecked") List<PDPage> pages = doc.getDocumentCatalog().getAllPages(); Iterator<PDPage> iterator = pages.iterator(); BufferedImage[] images = new BufferedImage[pages.size()]; for (int i = 0; iterator.hasNext(); i++) { images[i] = iterator.next().convertToImage(); updateProgress(i, maxProgress); } doc.close(); qSet.setResources(images); Image[] fxImages = new Image[images.length]; for (int i = 0; i < images.length; i++) { fxImages[i] = convertBufferedImageToFXImage(images[i]); updateProgress(images.length + i, maxProgress); } qSet.setFxResources(fxImages); return null; } }; bar.setVisible(true); bar.progressProperty().bind(pdfRender.progressProperty()); resourceFileLabel.setText("File: " + tmpPath); new Thread(pdfRender).start(); //qSet.loadPDFResource(tmpPath); //redrawNode(true); pdfRender.setOnSucceeded(new EventHandler<WorkerStateEvent>() { @Override public void handle(WorkerStateEvent workerStateEvent) { bar.setVisible(false); qSet.setResourceType(ResourceType.PNG); redrawNode(true); } }); pdfRender.setOnFailed(new EventHandler<WorkerStateEvent>() { @Override public void handle(WorkerStateEvent workerStateEvent) { bar.setVisible(false); qSet.setResources(null); qSet.setFxResources(null); PopupManager.showMessage("PDF failed to open"); redrawNode(false); } }); } }); removeResourceButton.setOnMouseClicked(new EventHandler<MouseEvent>() { @Override public void handle(MouseEvent mouseEvent) { qSet.removeResource(); lastPath = null; redrawNode(true); } }); questionSetNode = horizontalRoot; if (apply) { CenterNode.addScrollRoot(); CenterNode.getScrollRoot().setContent(questionSetNode); } }
From source file:mj.ocraptor.extraction.tika.parser.pdf.PDF2XHTML.java
License:Apache License
private void extractEmbeddedDocuments(PDDocument document, ContentHandler handler) throws IOException, SAXException, TikaException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary names = catalog.getNames(); if (names == null) { return;/*from ww w . ja v a 2 s. c o m*/ } PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); if (embeddedFiles == null) { return; } EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class); if (embeddedExtractor == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } Map<String, COSObjectable> embeddedFileNames = embeddedFiles.getNames(); // For now, try to get the embeddedFileNames out of embeddedFiles or its // kids. // This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java // If there is a need we could add a fully recursive search to find a // non-null // Map<String, COSObjectable> that contains the doc info. if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames, embeddedExtractor); } else { List<PDNameTreeNode> kids = embeddedFiles.getKids(); if (kids == null) { return; } for (PDNameTreeNode n : kids) { Map<String, COSObjectable> childNames = n.getNames(); if (childNames != null) { processEmbeddedDocNames(childNames, embeddedExtractor); } } } }