List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentInformation
public PDDocumentInformation getDocumentInformation()
From source file:fr.univ_tours.etu.searcher.LucenePDFDocument.java
License:Apache License
/** * This will add the contents to the lucene document. * * @param document The document to add the contents to. * @param is The stream to get the contents from. * @param documentLocation The location of the document, used just for debug messages. * * @throws IOException If there is an error parsing the document. *///from w w w.jav a2 s . c o m private void addContent(Document document, InputStream is, String documentLocation) throws IOException { PDDocument pdfDocument = null; try { pdfDocument = PDDocument.load(is); // create a writer where to append the text content. StringWriter writer = new StringWriter(); if (stripper == null) { stripper = new PDFTextStripper(); } stripper.writeText(pdfDocument, writer); // Note: the buffer to string operation is costless; // the char array value of the writer buffer and the content string // is shared as long as the buffer content is not modified, which will // not occur here. String contents = writer.getBuffer().toString(); StringReader reader = new StringReader(contents); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. addTextField(document, "contents", reader); PDDocumentInformation info = pdfDocument.getDocumentInformation(); if (info != null) { addTextField(document, "Author", info.getAuthor()); addTextField(document, "CreationDate", info.getCreationDate()); addTextField(document, "Creator", info.getCreator()); addTextField(document, "Keywords", info.getKeywords()); addTextField(document, "ModificationDate", info.getModificationDate()); addTextField(document, "Producer", info.getProducer()); addTextField(document, "Subject", info.getSubject()); addTextField(document, "Title", info.getTitle()); addTextField(document, "Trapped", info.getTrapped()); } int summarySize = Math.min(contents.length(), 1500); String summary = contents.substring(0, summarySize); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. addUnindexedField(document, "summary", summary); } finally { if (pdfDocument != null) { pdfDocument.close(); } } }
From source file:io.konik.carriage.pdfbox.PDFBoxInvoiceAppender.java
License:Open Source License
private void setMetadata(PDDocument doc, AppendParameter appendParameter) throws IOException, TransformerException, BadFieldValueException, XmpSerializationException { Calendar now = Calendar.getInstance(); PDDocumentCatalog catalog = doc.getDocumentCatalog(); PDMetadata metadata = new PDMetadata(doc); catalog.setMetadata(metadata);//from w w w. j a v a 2 s. co m XMPMetadata xmp = XMPMetadata.createXMPMetadata(); PDFAIdentificationSchema pdfaid = new PDFAIdentificationSchema(xmp); pdfaid.setPart(Integer.valueOf(3)); pdfaid.setConformance("B"); xmp.addSchema(pdfaid); DublinCoreSchema dublicCore = new DublinCoreSchema(xmp); xmp.addSchema(dublicCore); XMPBasicSchema basicSchema = new XMPBasicSchema(xmp); basicSchema.setCreatorTool(PRODUCER); basicSchema.setCreateDate(now); xmp.addSchema(basicSchema); PDDocumentInformation pdi = doc.getDocumentInformation(); pdi.setModificationDate(now); pdi.setProducer(PRODUCER); pdi.setAuthor(getAuthor()); doc.setDocumentInformation(pdi); AdobePDFSchema pdf = new AdobePDFSchema(xmp); pdf.setProducer(PRODUCER); xmp.addSchema(pdf); PDMarkInfo markinfo = new PDMarkInfo(); markinfo.setMarked(true); doc.getDocumentCatalog().setMarkInfo(markinfo); xmp.addSchema(zfDefaultXmp.getPDFExtensionSchema()); XMPSchemaZugferd1p0 zf = new XMPSchemaZugferd1p0(xmp); zf.setConformanceLevel(appendParameter.zugferdConformanceLevel()); zf.setVersion(appendParameter.zugferdVersion()); xmp.addSchema(zf); new XmpSerializer().serialize(xmp, metadata.createOutputStream(), true); }
From source file:merge_split.MergeSplit.java
License:Apache License
private void MergeButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_MergeButtonActionPerformed try {/*www. ja va2 s . c om*/ PDDocument samplePdf = new PDDocument(); ArrayList<PDDocument> list = new ArrayList<>(); for (int i = 0; i < dtm.getRowCount(); i++) { File file = new File((String) dtm.getValueAt(i, 0)); String code = (String) dtm.getValueAt(i, 3); PDDocument doc1; if (code.equals("ok")) { doc1 = PDDocument.load(file); } else { doc1 = PDDocument.load(file, code); } list.add(doc1); doc1.setAllSecurityToBeRemoved(true); TreeSet tree = findPages((String) dtm.getValueAt(i, 2)); for (int j = 0; j < doc1.getNumberOfPages(); j++) { if (tree.contains(j + 1)) { samplePdf.addPage(doc1.getPage(j)); } } } System.out.println("Number:" + samplePdf.getNumberOfPages()); String destination = jTextField1.getText() + "\\" + jTextField2.getText() + ".pdf"; PDDocumentInformation info = samplePdf.getDocumentInformation(); info.setAuthor(jTextField3.getText()); File output = new File(destination); samplePdf.save(output); samplePdf.close(); for (int i = 0; i < list.size(); i++) { list.get(i).close(); } } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.", "Input warning", JOptionPane.WARNING_MESSAGE); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void RotateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButtonActionPerformed try {/*ww w.j av a 2s. c om*/ PDDocument samplePdf = new PDDocument(); File file = new File(RotateFileField.getText()); PDDocument doc1; if (rotatecode.equals("ok")) { doc1 = PDDocument.load(file); } else { doc1 = PDDocument.load(file, rotatecode); } doc1.setAllSecurityToBeRemoved(true); TreeSet tree = findPages(RotatePagesField.getText()); for (int j = 0; j < doc1.getNumberOfPages(); j++) { PDPage page = doc1.getPage(j); if (tree.contains(j + 1)) { if (Rotate90.isSelected()) { page.setRotation(90); samplePdf.addPage(page); } else if (Rotate180.isSelected()) { page.setRotation(180); samplePdf.addPage(page); } else if (Rotate270.isSelected()) { page.setRotation(270); samplePdf.addPage(page); } } else { samplePdf.addPage(page); } } System.out.println("Number:" + samplePdf.getNumberOfPages()); String destination = RotateDestinationField.getText() + "\\" + RotateNameField.getText() + ".pdf"; PDDocumentInformation info = samplePdf.getDocumentInformation(); info.setAuthor(RotateAuthorField.getText()); File output = new File(destination); samplePdf.save(output); samplePdf.close(); } catch (IOException ex) { Logger.getLogger(MergeSplit.class.getName()).log(Level.SEVERE, null, ex); JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.", "Input warning", JOptionPane.WARNING_MESSAGE); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void RotateButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_RotateButton1ActionPerformed PDDocument document = new PDDocument(); InputStream in = null;//from w ww. ja va 2s.c o m BufferedImage bimg = null; try { in = new FileInputStream((String) ImageFileField.getText()); bimg = ImageIO.read(in); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Image could not be read.", "Image could not be read", JOptionPane.WARNING_MESSAGE); } float width = bimg.getWidth(); float height = bimg.getHeight(); PDPage page = new PDPage(new PDRectangle(width, height)); document.addPage(page); PDImageXObject imgpdf; try { imgpdf = PDImageXObject.createFromFile((String) ImageFileField.getText(), document); try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) { contentStream.drawImage(imgpdf, 0, 0); } in.close(); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Image could not be converted.", "Proccess could not be finished", JOptionPane.WARNING_MESSAGE); } String destination = ImageDestinationField.getText() + "\\" + ImageNameField.getText() + ".pdf"; PDDocumentInformation info = document.getDocumentInformation(); info.setAuthor(ImageAuthorField.getText()); File output = new File(destination); try { document.save(output); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Not all fields were filled.", "Input Problem", JOptionPane.WARNING_MESSAGE); } try { document.close(); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Not all fields were filled.", "Input Problem", JOptionPane.WARNING_MESSAGE); } }
From source file:merge_split.MergeSplit.java
License:Apache License
private void SplitButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_SplitButtonActionPerformed try {/*from ww w . j a va 2 s . c o m*/ File file = new File(SplitFileField.getText()); PDDocument doc1; if (splitcode.equals("ok")) { doc1 = PDDocument.load(file); } else { doc1 = PDDocument.load(file, splitcode); } doc1.setAllSecurityToBeRemoved(true); if (MultipleButton.isSelected()) { PDDocument pdf1 = new PDDocument(); PDDocument pdf2 = new PDDocument(); TreeSet tree = findPages(SplitPagesField.getText()); for (int j = 0; j < doc1.getNumberOfPages(); j++) { PDPage page = doc1.getPage(j); if (tree.contains(j + 1)) { pdf1.addPage(page); } else { pdf2.addPage(page); } } String destination1 = SplitDestinationField.getText() + "\\" + SplitNameField.getText() + "1.pdf"; String destination2 = SplitDestinationField.getText() + "\\" + SplitNameField.getText() + "2.pdf"; PDDocumentInformation info = pdf1.getDocumentInformation(); info.setAuthor(SplitAuthorField.getText()); PDDocumentInformation info2 = pdf2.getDocumentInformation(); info2.setAuthor(SplitAuthorField.getText()); if (pdf1.getNumberOfPages() > 0) { File output1 = new File(destination1); pdf1.save(output1); } if (pdf2.getNumberOfPages() > 0) { File output2 = new File(destination2); pdf2.save(output2); } pdf1.close(); pdf2.close(); } else if (SingleButton.isSelected()) { for (int j = 0; j < doc1.getNumberOfPages(); j++) { PDDocument pdf1 = new PDDocument(); PDPage page = doc1.getPage(j); pdf1.addPage(page); int pagenumber = j + 1; String destination1 = SplitDestinationField.getText() + "\\" + SplitNameField.getText() + pagenumber + ".pdf"; PDDocumentInformation info = pdf1.getDocumentInformation(); info.setAuthor(SplitAuthorField.getText()); if (pdf1.getNumberOfPages() > 0) { File output1 = new File(destination1); pdf1.save(output1); } pdf1.close(); } } doc1.close(); } catch (IOException ex) { JOptionPane.showMessageDialog(null, "Your input is incorrect. Please fill all the fields.", "Input warning", JOptionPane.WARNING_MESSAGE); java.util.logging.Logger.getLogger(MergeSplit.class.getName()).log(java.util.logging.Level.SEVERE, null, ex); } }
From source file:mj.ocraptor.extraction.tika.parser.pdf.PDFParser.java
License:Apache License
private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException { PDDocumentInformation info = document.getDocumentInformation(); metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); addMetadata(metadata, TikaCoreProperties.TITLE, info.getTitle()); addMetadata(metadata, TikaCoreProperties.CREATOR, info.getAuthor()); addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator()); addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords()); addMetadata(metadata, "producer", info.getProducer()); // TODO: Move to description in Tika 2.0 addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); try {//from w ww .ja v a2s .c om // TODO Remove these in Tika 2.0 addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); } catch (IOException e) { // Invalid date format, just ignore } try { Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); } catch (IOException e) { // Invalid date format, just ignore } // All remaining metadata is custom // Copy this over as-is List<String> handledMetadata = Arrays.asList(new String[] { "Author", "Creator", "CreationDate", "ModDate", "Keywords", "Producer", "Subject", "Title", "Trapped" }); for (COSName key : info.getDictionary().keySet()) { String name = key.getName(); if (!handledMetadata.contains(name)) { addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key)); } } }
From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java
License:Open Source License
@Override public Reader getText(URI uri, int page, StringBuilder title) throws FileNotFoundException, MalformedURLException, IOException //----------------------------------------------------------------------------------------- { FileWriter writer = null;/*from w w w . java 2s .com*/ PDDocument pdf = null; PDFTextStripper stripper = null; java.io.File tmpPdf = null; try { tmpPdf = Utils.uri2File(uri); if (tmpPdf != null) pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true); else pdf = PDDocument.load(uri.toURL(), true); PDDocumentInformation pdfInfo = pdf.getDocumentInformation(); String s = pdfInfo.getTitle(); if ((s == null) || (s.length() == 0)) s = uri.getPath(); if (title != null) title.append(s); stripper = new PDFTextStripper(); if (page >= 0) { stripper.setStartPage(page); stripper.setEndPage(page); } else { stripper.setStartPage(1); stripper.setEndPage(pdf.getNumberOfPages()); } java.io.File f = java.io.File.createTempFile("pdf", ".tmp"); writer = new FileWriter(f); stripper.writeText(pdf, writer); try { writer.close(); writer = null; } catch (Exception _e) { } stripper.resetEngine(); return new FileReader(f); } finally { if (stripper != null) try { stripper.resetEngine(); } catch (Exception _e) { } if (pdf != null) try { pdf.close(); } catch (Exception _e) { } if (writer != null) try { writer.close(); } catch (Exception _e) { } if ((tmpPdf != null) && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0)) tmpPdf.delete(); } }
From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java
License:Open Source License
@Override public long index(String href, URI uri, boolean followLinks, Object... extraParams) throws IOException //----------------------------------------------------------------------------------------------------- { if (m_indexWriter == null) { logger.error("PDFIndexer: index writer is null"); return -1; }// w w w . j a v a 2s . c om PDDocument pdf = null; PDFTextStripper stripper = null; Reader reader = null; Writer writer = null; java.io.File tmpPdf = null; try { tmpPdf = Utils.uri2File(uri); if (tmpPdf != null) pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true); else pdf = PDDocument.load(uri.toURL(), true); PDDocumentInformation pdfInfo = pdf.getDocumentInformation(); String title = pdfInfo.getTitle(); if ((title == null) || (title.isEmpty())) title = uri.getPath(); stripper = new PDFTextStripper(); int noPages = pdf.getNumberOfPages(); stripper.setSuppressDuplicateOverlappingText(false); if (noPages != PDDocument.UNKNOWN_NUMBER_OF_PAGES) { for (int page = 1; page <= noPages; page++) { stripper.setStartPage(page); stripper.setEndPage(page); writer = new StringWriter(); stripper.writeText(pdf, writer); reader = new StringReader(writer.toString()); Document doc = new Document(); doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO)); doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("page", Integer.toString(page), Field.Store.YES, Field.Index.NO)); if (addDocument(doc)) AjaxIndexer.incrementCount(); try { writer.close(); writer = null; } catch (Exception _e) { } try { reader.close(); reader = null; } catch (Exception _e) { } if ((page % 50) == 0) { try { System.runFinalization(); System.gc(); } catch (Exception _e) { } } } } else { java.io.File f = java.io.File.createTempFile("pdf", ".tmp"); writer = new FileWriter(f); stripper.writeText(pdf, writer); try { writer.close(); writer = null; } catch (Exception _e) { } reader = new FileReader(f); Document doc = new Document(); doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO)); doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("page", "-1", Field.Store.YES, Field.Index.NO)); if (addDocument(doc)) AjaxIndexer.incrementCount(); try { reader.close(); reader = null; } catch (Exception _e) { } try { System.runFinalization(); System.gc(); } catch (Exception _e) { } } return 1; } catch (Exception e) { logger.error("Error indexing PDF text from " + uri.toString(), e); return -1; } finally { if (stripper != null) try { stripper.resetEngine(); } catch (Exception _e) { } if (pdf != null) try { pdf.close(); } catch (Exception _e) { } if (writer != null) try { writer.close(); } catch (Exception _e) { } if (reader != null) try { reader.close(); } catch (Exception _e) { } if ((tmpPdf != null) && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0)) tmpPdf.delete(); } }
From source file:net.padaf.preflight.xmp.SynchronizedMetaDataValidation.java
License:Apache License
/** * Check if document information entries and XMP information are synchronized * // w w w . j a va 2 s . c o m * @param document * the PDF Document * @param metadata * the XMP MetaData * @return List of validation errors * @throws ValidationException */ public List<ValidationError> validateMetadataSynchronization(PDDocument document, XMPMetadata metadata) throws ValidationException { List<ValidationError> ve = new ArrayList<ValidationError>(); if (document == null) { throw new ValidationException("Document provided is null"); } else { PDDocumentInformation dico = document.getDocumentInformation(); if (metadata == null) { throw new ValidationException("Metadata provided are null"); } else { DublinCoreSchema dc = metadata.getDublinCoreSchema(); // TITLE analyzeTitleProperty(dico, dc, ve); // AUTHOR analyzeAuthorProperty(dico, dc, ve); // SUBJECT analyzeSubjectProperty(dico, dc, ve); AdobePDFSchema pdf = metadata.getAdobePDFSchema(); // KEYWORDS analyzeKeywordsProperty(dico, pdf, ve); // PRODUCER analyzeProducerProperty(dico, pdf, ve); XMPBasicSchema xmp = metadata.getXMPBasicSchema(); // CREATOR TOOL analyzeCreatorToolProperty(dico, xmp, ve); // CREATION DATE analyzeCreationDateProperty(dico, xmp, ve); // MODIFY DATE analyzeModifyDateProperty(dico, xmp, ve); } } return ve; }