List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:pdfcompressor.PDFCompressor.java
public BufferedImage getBytePreview(int pageNum) throws IOException { PageExtractor extractor = new PageExtractor(sourceDocument, pageNum, pageNum + 1); PDDocument extractedDoc = extractor.extract(); PDPage page = (PDPage) extractedDoc.getDocumentCatalog().getAllPages().get(0); extractedDoc.close(); return ImageIO.read(new ByteArrayInputStream( getImageByteArray(page.convertToImage(BufferedImage.TYPE_INT_BGR, dpi), compressRate))); }
From source file:pdfconverter.converter3.java
@SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, WriteException { workbook = Workbook.createWorkbook(new File(output)); System.out.println("File created"); WritableSheet sheet = workbook.createSheet("Page", 0); ExcelStart(sheet);/*from w w w . ja v a2 s . c o m*/ //Scanner user_input = new Scanner( System.in ); File dir = new File(path); //System.out.println(dir.getPath()); File[] dirList = dir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(".pdf"); } }); int counter = 1; PDDocument pd; PDFTextStripper stripper = new PDFTextStripper(); PDFTextStripperByArea areaSearch = new PDFTextStripperByArea(); PDFTextStripperByArea stripper2 = new PDFTextStripperByArea(); PDFTextStripperByArea stripper3 = new PDFTextStripperByArea(); //PDRectangle rect = new PDRectangle(0, 0, 100, 100); stripper.setStartPage(1); //Start extracting from page 3 stripper.setEndPage(1); //Extract till page 5 File f = new File(dirList[0].getPath()); pd = PDDocument.load(f); //int curHeight = 136; //int rowCount = 37; int curHeight = 116; int rowCount = 39; int rowHeight = 9; int sheetRowCount = 0; int pageStop = 1491; for (int curpage = 800; curpage < pageStop; curpage++) { if (counter > 800) { break; } PDPage page = pd.getPage(curpage); System.out.println("Now parsing page " + curpage); for (int curRow = 0; curRow < 80; curRow++) { Rectangle2D.Float cell = new Rectangle2D.Float(0, curHeight, 80, rowHeight); String name = "cell-1-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); String text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 0, sheetRowCount + 1); cell = new Rectangle2D.Float(80, curHeight, 30, rowHeight); name = "cell-2-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 1, sheetRowCount + 1); cell = new Rectangle2D.Float(110, curHeight, 40, rowHeight); name = "cell-3-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 2, sheetRowCount + 1); cell = new Rectangle2D.Float(150, curHeight, 120, rowHeight); name = "cell-4-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 3, sheetRowCount + 1); cell = new Rectangle2D.Float(270, curHeight, 120, rowHeight); name = "cell-5-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 4, sheetRowCount + 1); cell = new Rectangle2D.Float(390, curHeight, 40, rowHeight); name = "cell-6-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 5, sheetRowCount + 1); cell = new Rectangle2D.Float(430, curHeight, 46, rowHeight); name = "cell-7-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 6, sheetRowCount + 1); cell = new Rectangle2D.Float(476, curHeight, 82, rowHeight); name = "cell-8-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 7, sheetRowCount + 1); cell = new Rectangle2D.Float(558, curHeight, 65, rowHeight); name = "cell-9-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 8, sheetRowCount + 1); cell = new Rectangle2D.Float(623, curHeight, 66, rowHeight); name = "cell-10-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 9, sheetRowCount + 1); cell = new Rectangle2D.Float(689, curHeight, 100, rowHeight); name = "cell-11-" + curRow; areaSearch.addRegion(name, cell); areaSearch.extractRegions(page); text = areaSearch.getTextForRegion(name); areaSearch.removeRegion(name); AddCell(sheet, text, 10, sheetRowCount + 1); sheetRowCount++; curHeight += rowHeight; } //Rectangle2D.Float issueDate = new Rectangle2D.Float(0, 0, 80, page.getMediaBox().getHeight()); //stripper2.addRegion("issueDate", issueDate); //Rectangle2D.Float amount = new Rectangle2D.Float(80, 0, 30, page.getMediaBox().getHeight()); //stripper2.addRegion("amount", amount); //Rectangle2D.Float citation = new Rectangle2D.Float(110, 0, 40, page.getMediaBox().getHeight()); //stripper2.addRegion("citation", citation); //Rectangle2D.Float violation = new Rectangle2D.Float(150, 0, 120, page.getMediaBox().getHeight()); //stripper2.addRegion("violation", violation); //Rectangle2D.Float comment = new Rectangle2D.Float(270, 0, 120, page.getMediaBox().getHeight()); //stripper2.addRegion("comment", comment); //Rectangle2D.Float warning = new Rectangle2D.Float(390, 0, 40, page.getMediaBox().getHeight()); //stripper2.addRegion("warning", warning); //Rectangle2D.Float license = new Rectangle2D.Float(430, 0, 46, page.getMediaBox().getHeight()); //stripper2.addRegion("license", license); //Rectangle2D.Float lot = new Rectangle2D.Float(476, 0, 82, page.getMediaBox().getHeight()); //stripper2.addRegion("lot", lot); //Rectangle2D.Float make = new Rectangle2D.Float(558, 0, 65, page.getMediaBox().getHeight()); //stripper2.addRegion("make", make); //Rectangle2D.Float officer = new Rectangle2D.Float(623, 0, 66, page.getMediaBox().getHeight()); //stripper2.addRegion("officer", officer); //Rectangle2D.Float state = new Rectangle2D.Float(689, 0, 100, page.getMediaBox().getHeight()); //stripper2.addRegion("state", state); //stripper2.extractRegions(page); //String text = stripper2.getTextForRegion("license"); //Rectangle2D.Float row = new Rectangle2D.Float(0, 156, 80, 10); //stripper3.addRegion("row", row); //stripper3.extractRegions(page); //String text = stripper3.getTextForRegion("row"); //System.out.println(text); counter++; curHeight = 116; rowCount = 39; } //AddRow(sheet, text, counter); //counter++; pd.close(); System.out.println("Data extracted to Excel, parsing through Excel data..."); boolean multiline = true; while (multiline) { multiline = false; for (int row = 0; row < sheet.getRows(); row++) { Cell cell = sheet.getCell(0, row); if (cell.getContents().length() < 5) { multiline = true; WritableCell cell2 = sheet.getWritableCell(4, row - 1); WritableCell cell3 = sheet.getWritableCell(4, row); String content = cell2.getContents() + cell3.getContents(); content = content.replace("\n", "").replace("\r", ""); Label l = (Label) cell2; l.setString(content); sheet.removeRow(row); } } } System.out.println("Data extraction complete"); workbook.write(); workbook.close(); }
From source file:pdfedittest.PDFEditTest.java
/** * @param args the command line arguments *//*from www .j av a 2 s . c o m*/ public static void main(String[] args) { // TODO code application logic here PDDocument pd; BufferedWriter wr; try { String outputPath = "rayani.txt"; File input = new File("C:\\Users\\Administrator\\Desktop\\FA Feb 16.pdf"); // The PDF file from where you would like to extract File output = new File(outputPath); // The text file where you are going to store the extracted data pd = PDDocument.load(input); System.out.println(pd.getNumberOfPages()); System.out.println(pd.isEncrypted()); //pd.save("CopyOfInvoice.pdf"); // Creates a copy called "CopyOfInvoice.pdf" PDFTextStripper stripper = new PDFTextStripper(); //stripper.setStartPage(3); //Start extracting from page 3 //stripper.setEndPage(5); //Extract till page 5 //stripper.set wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output))); stripper.writeText(pd, wr); if (pd != null) { pd.close(); } // I use close() to flush the stream. wr.close(); TextParser a = new TextParser(outputPath); a.getUserDetail(""); } catch (Exception e) { e.printStackTrace(); } }
From source file:pdfpositional.PdfPositional.java
/** * @param args the command line arguments *///from w w w . j a v a 2 s . co m public static void main(String[] args) { try { // check file param if (args.length == 0) { throw new ParameterException("No file parameter specified"); } String file = args[args.length - 1]; Pattern patternFile = Pattern.compile("(?i)^[\\w,\\s-()/]+\\.pdf$"); Matcher matcherFile = patternFile.matcher(file); // check file is valid format if (!matcherFile.find()) { throw new ParameterException("File parameter invalid: " + file); } // check if file exists File input = new File(file); if (!input.exists()) { throw new ParameterException("File does not exist: " + file); } // ensure it isnt a directory if (input.isDirectory()) { throw new ParameterException("File is a directory: " + file); } PdfPositional pdfPositional = new PdfPositional(input); pdfPositional.setConversion(new Float(1.388888888889)); pdfPositional.processFileArgument(args[args.length - 1]); Pattern patternArgument = Pattern.compile("^-{2}([^=]+)[=]([\\s\\S]+)$"); Matcher matcher; for (int i = 0; i < args.length - 1; i++) { matcher = patternArgument.matcher(args[i]); while (matcher.find()) { switch (matcher.group(1)) { case "page": pdfPositional.setPageNumber(Integer.parseInt(matcher.group(2))); break; case "output": pdfPositional.setOutputFile(matcher.group(2)); break; } } } PDDocument document; document = PDDocument.load(pdfPositional.getInputFile()); // check for encrypted document if (document.isEncrypted()) { try { document.decrypt(""); } catch (CryptographyException | IOException e) { document.close(); throw new EncryptedDocumentException(); } } List allPages = document.getDocumentCatalog().getAllPages(); if (pdfPositional.hasPageNumber()) { if (document.getNumberOfPages() < pdfPositional.getPageNumber()) { throw new ParameterException("illegal page number"); } PDPage page = (PDPage) allPages.get(pdfPositional.getPageNumber() - 1); PDStream contents = page.getContents(); if (contents != null) { pdfPositional.processStream(page, page.findResources(), page.getContents().getStream()); pdfPositional.addPageDataToPdfData(); pdfPositional.writeJSONToOutputStream(); } } else { for (int i = 0; i < allPages.size(); i++) { pdfPositional.setPageNumber(i + 1); PDPage page = (PDPage) allPages.get(i); PDStream contents = page.getContents(); if (contents != null) { pdfPositional.processStream(page, page.findResources(), page.getContents().getStream()); pdfPositional.addPageDataToPdfData(); pdfPositional.writeJSONToOutputStream(); } page.clear(); } } pdfPositional.destroyOutputStream(); document.close(); System.exit(0); } catch (ParameterException ex) { System.out.println("Parameter Error: " + ex.getMessage()); System.exit(1); } catch (EncryptedDocumentException ex) { System.out.println("Encrypted Document Error"); System.exit(1); } catch (IOException | NumberFormatException ex) { System.out.println("General Error"); System.exit(1); } }
From source file:pdfreader.ColorScheme.java
public void getColor() throws IOException { PDDocument doc = null; try {/*from w w w.j a v a 2s.c o m*/ doc = PDDocument.load("D://My.pdf"); PDFStreamEngine engine = new PDFStreamEngine( ResourceLoader.loadProperties("org//apache//pdfbox//resources//PageDrawer.properties", true)); PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(0); engine.processStream(page, page.findResources(), page.getContents().getStream()); PDGraphicsState graphicState = engine.getGraphicsState(); System.out.println(graphicState.getStrokingColor().getColorSpace().getName()); float colorSpaceValues[] = graphicState.getStrokingColor().getColorSpaceValue(); for (float c : colorSpaceValues) { System.out.println(c * 255); } } finally { if (doc != null) { doc.close(); } } }
From source file:pdfsplicer.SplicerModel.java
License:Open Source License
/** * Create the new PDF, and save it.//from w ww. j a va 2 s.c om * * @param saveFile the file to save it as * @throws IOException if it cannot save the file */ public void makeFinalizedPDF(File saveFile) throws IOException { PDDocument doc = null; PDDocument newdoc = new PDDocument(); for (int i = 0; i < pageEntryPDFList.size(); ++i) { doc = pdfList.get(pageEntryPDFList.get(i)); if (doc.isEncrypted()) { System.out.println("Error: Encrypted PDF"); System.exit(1); } List<Integer> pRange = pageRangeList.get(i); PDFCloneUtility pdfCloner = new PDFCloneUtility(newdoc); for (int pNum : pRange) { PDPage page = doc.getPage(pNum - 1); COSDictionary clonedDict = (COSDictionary) pdfCloner.cloneForNewDocument(page); newdoc.addPage(new PDPage(clonedDict)); } } newdoc.save(saveFile); if (newdoc != null) { newdoc.close(); } }
From source file:pdftotext.Pdfprac2.java
public String pdftoText(String fileName) { PDFParser parser;/* www. j a v a 2s .com*/ String parsedText = null; ; String location = fileName.replaceAll(".pdf", ".txt"); //System.out.println(location); PDFTextStripper pdfStripper = null; PDDocument pdDoc = null; COSDocument cosDoc = null; File file = new File(fileName); if (!file.isFile()) { System.err.println("File " + fileName + " does not exist."); return null; } try { parser = new PDFParser(new FileInputStream(file)); } catch (IOException e) { System.err.println("Unable to open PDF Parser. " + e.getMessage()); return null; } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); //pdfStripper.setStartPage(1); //pdfStripper.setEndPage(5); parsedText = pdfStripper.getText(pdDoc); try (PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(location, true)))) { out.println(parsedText); } } catch (IOException e) { System.err.println("An exception occured in parsing the PDF Document." + e.getMessage()); } finally { try { if (cosDoc != null) cosDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e) { e.printStackTrace(); } } return location; }
From source file:pdfviewer.Pdfviewer.java
public static void generatePDFFile(String date, String pdfFileName, Boolean[][] statusArray, String cycle) throws IOException, COSVisitorException { PDDocument pdf = PDDocument.load(pdfFileName); //String[] names = pdfFileName.split("\\."); String targetFile = ""; if (pdfFileName.contains(".pdf")) { //doesn't contain surfix targetFile = pdfFileName.replace(".pdf", "_forAudit.pdf"); } else {//from w ww .ja va 2 s . c o m targetFile = pdfFileName + "_forAudit.pdf"; } // == prepare for void mark String imageName = "void.jpg"; BufferedImage buffered = ImageIO.read(new File(imageName)); PDJpeg voidMark = new PDJpeg(pdf, buffered); // == end of preparing for void mark List pages = pdf.getDocumentCatalog().getAllPages(); Iterator<PDPage> iter = pages.iterator(); int pageNum = 0; // 0 based int sequenceNum = 1; // start from 0001 while (iter.hasNext()) { PDPage page = iter.next(); PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false); // == date stamp stream.beginText(); stream.setFont(PDType1Font.HELVETICA, 20); stream.moveTextPositionByAmount(200, 20); stream.drawString(date); //date stamp stream.endText(); // == end of date stamp // == void stamp if (statusArray[GlobalVar.VOID_BUTTON_INDEX][pageNum]) { stream.drawImage(voidMark, 100, 200); } // == end of void stamp // == seq stamp if (statusArray[GlobalVar.SELECT_BUTTON_INDEX][pageNum]) { stream.beginText(); stream.setFont(PDType1Font.HELVETICA, 24); stream.moveTextPositionByAmount(600, 400); stream.setTextRotation(3.14 / 2, 600, 400); // rotate text 90 degree at x = 600, y = 400 stream.drawString(cycle + "/" + globalCounterGenerator(sequenceNum)); sequenceNum++; stream.endText(); } // == end of seq stamp stream.close(); pageNum++; } pdf.save(targetFile); pdf.close(); }
From source file:pdfviewer.Pdfviewer.java
public static String dateStampPDFFile(String date, String pdfFileName) throws IOException, COSVisitorException { PDDocument pdf = PDDocument.load(pdfFileName); //String[] names = pdfFileName.split("\\."); String targetFile = ""; if (pdfFileName.contains(".pdf")) { //doesn't contain surfix targetFile = pdfFileName.replace(".pdf", "_DS.pdf"); } else {/*w w w . j a v a 2 s . com*/ targetFile = pdfFileName + "_DS.pdf"; } // String imageName = "void.jpg"; // String fileName = "res.pdf" List pages = pdf.getDocumentCatalog().getAllPages(); Iterator<PDPage> iter = pages.iterator(); int pageNum = 0; while (iter.hasNext()) { PDPage page = iter.next(); PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false); // == date stamp stream.beginText(); stream.setFont(PDType1Font.HELVETICA, 24); stream.moveTextPositionByAmount(100, 300); stream.drawString(date); stream.endText(); // == end of date stamp stream.close(); } pdf.save(targetFile); pdf.close(); return targetFile; }