List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:converter.PDFPac.java
/** * vrati text z pdf//from w w w . j a v a2 s . c o m * * @param url * @return */ private String getTextFromPDF(String url) { String text = ""; try { PDDocument pdDoc = PDDocument.load(new File(url)); PDFTextStripper pdfStripper = new PDFTextStripper(); text = pdfStripper.getText(pdDoc); pdDoc.close(); } catch (IOException ex) { logger.warning("PDFPac soubor nebyl nalezen " + url + "chyba " + ex); } return text; }
From source file:correccioncolorpdfs.CorrectorColorUI.java
private void transformarPDF() { try {//from ww w .j a va2s . com //@see http://stackoverflow.com/questions/18189314/convert-a-pdf-file-to-image String rutaPDFOriginal = rutaPDF + nombrePDF; // Pdf files are read from this folder //String destinationDir = "D:\\Desarrollo\\pruebas\\reportes_negros\\imagenes\\"; // converted images from pdf document are saved here File pdfOriginal = new File(rutaPDFOriginal); // File destinationFile = new File(destinationDir); // if (!destinationFile.exists()) { // destinationFile.mkdir(); // System.out.println("Folder Created -> " + destinationFile.getAbsolutePath()); // } if (pdfOriginal.exists()) { //System.out.println("Images copied to Folder: " + destinationFile.getName()); PDDocument document = PDDocument.load(rutaPDFOriginal); //Documento Fondo Blanco PDDocument documentoCool = new PDDocument(); List<PDPage> list = document.getDocumentCatalog().getAllPages(); System.out.println("Total files to be converted -> " + list.size()); String nombrePDFOriginal = pdfOriginal.getName().replace(".pdf", ""); int pageNumber = 1; for (PDPage page : list) { BufferedImage image = page.convertToImage(); //Inviertiendo colores //@see http://stackoverflow.com/questions/8662349/convert-negative-image-to-positive for (int x = 0; x < image.getWidth(); x++) { for (int y = 0; y < image.getHeight(); y++) { int rgba = image.getRGB(x, y); //Hexa a reemplazar e9e9e1 R=233|G=233|B=225 Color col = new Color(rgba, true); col = new Color(255 - col.getRed(), 255 - col.getGreen(), 255 - col.getBlue()); //Si color es igual al invertido - cambiarlo a blanco if (col.getRGB() == -1447455) { col = new Color(255, 255, 255); } //System.out.println("col.getR = " + col.getRGB()); image.setRGB(x, y, col.getRGB()); } } // File outputfile = new File(destinationDir + fileName + "_" + pageNumber + ".png"); // System.out.println("Image Created -> " + outputfile.getName()); // ImageIO.write(image, "png", outputfile); pageNumber++; //Crear pagina nueva para el PDF Convertido float width = image.getWidth(); float height = image.getHeight(); PDPage paginaSinFondo = new PDPage(new PDRectangle(width, height)); documentoCool.addPage(paginaSinFondo); PDXObjectImage img = new PDJpeg(documentoCool, image); PDPageContentStream contentStream = new PDPageContentStream(documentoCool, paginaSinFondo); contentStream.drawImage(img, 0, 0); contentStream.close(); } document.close(); rutaPDFImprimible = rutaPDF + nombrePDFOriginal + "_imprimible.pdf"; documentoCool.save(rutaPDFImprimible); documentoCool.close(); estadoConversion(true); } else { JOptionPane.showMessageDialog(this, "No se logr identificar la ruta del archivo, por favor verifique que el archivo si existe o no halla sido movido durante el proceso.", "Ruta de archivo no encontrada", JOptionPane.WARNING_MESSAGE); } } catch (IOException | COSVisitorException | HeadlessException e) { estadoConversion(false); JOptionPane.showMessageDialog(this, e.getMessage(), "Error durante el proceso de conversin", JOptionPane.ERROR_MESSAGE); } }
From source file:cr.ac.siua.tec.utils.PDFGenerator.java
License:Open Source License
/** * Encodes PDF object (PDDocument) to base4. */// w ww.ja v a 2 s.c o m public String encodePDF(PDDocument document) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { document.save(out); document.close(); } catch (COSVisitorException | IOException e) { e.printStackTrace(); } byte[] bytes = out.toByteArray(); byte[] encoded = Base64.encodeBase64(bytes); return new String(encoded); }
From source file:CTRL.ExportController.java
/** * Processes requests for both HTTP <code>GET</code> and <code>POST</code> * methods.//w w w.java 2 s . com * * @param request servlet request * @param response servlet response * @throws ServletException if a servlet-specific error occurs * @throws IOException if an I/O error occurs */ protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); HttpSession session = request.getSession(); LijstLesmomentenViewModel vmLesmomentenFinal = (LijstLesmomentenViewModel) session .getAttribute("vmLesmomentenFinal"); // Create a document and add a page to it PDDocument document = new PDDocument(); PDPage page = new PDPage(); document.addPage(page); // Create a new font object selecting one of the PDF base fonts PDFont font = PDType1Font.HELVETICA_BOLD; // Start a new content stream which will "hold" the to be created content PDPageContentStream contentStream = new PDPageContentStream(document, page); // Define a text content stream using the selected font, moving the cursor and drawing the text "Hello World" contentStream.beginText(); contentStream.setFont(font, 16); contentStream.moveTextPositionByAmount(60, 700); // Titel contentStream.drawString("Lessenrooster"); contentStream.setFont(font, 10); contentStream.moveTextPositionByAmount(0, -20); int recordTeller = 0; for (Lesmoment l : vmLesmomentenFinal.getLesmomenten()) { Calendar c = Calendar.getInstance(); c.setTime(l.getDatum()); int dag = c.get(Calendar.DAY_OF_WEEK); String strDag = ""; switch (dag) { case 0: strDag = "Zondag"; break; case 1: strDag = "Maandag"; break; case 2: strDag = "Dinsdag"; break; case 3: strDag = "Woensdag"; break; case 4: strDag = "Donderdag"; break; case 5: strDag = "Vrijdag"; break; case 6: strDag = "Zaterdag"; break; } //Nieuwe page beginnen wanner 25 records werden weggeschreven if (recordTeller == 25) { recordTeller = 0; contentStream.endText(); contentStream.close(); page = new PDPage(); document.addPage(page); contentStream = new PDPageContentStream(document, page); contentStream.beginText(); contentStream.setFont(font, 10); contentStream.moveTextPositionByAmount(60, 700); } else { recordTeller++; } contentStream.drawString(l.getDatum().toString() + " " + strDag + " " + l.getBeginuur() + " " + l.getEinduur() + " " + l.getLokaal() + " " + l.getModule().getCode() + " " + l.getModule().getNaam()); contentStream.moveTextPositionByAmount(0, -20); } contentStream.endText(); // Make sure that the content stream is closed: contentStream.close(); try { // Save the results and ensure that the document is properly closed: document.save("C:\\Temp\\test.pdf"); } catch (COSVisitorException ex) { Logger.getLogger(ExportController.class.getName()).log(Level.SEVERE, null, ex); } document.close(); //Teruggaan naar resultaat pagina RequestDispatcher dispatcher = request.getRequestDispatcher("Resultaat.jsp"); dispatcher.forward(request, response); }
From source file:cz.fi.muni.xkremser.editor.server.fedora.KrameriusImageSupport.java
License:Open Source License
/** * Read image.//from w w w .ja v a 2s. c o m * * @param url * the url * @param type * the type * @param page * the page * @return the image * @throws IOException * Signals that an I/O exception has occurred. */ public static Image readImage(URL url, ImageMimeType type, int page) throws IOException { if (type.javaNativeSupport()) { return ImageIO.read(url.openStream()); } else if ((type.equals(ImageMimeType.DJVU)) || (type.equals(ImageMimeType.VNDDJVU)) || (type.equals(ImageMimeType.XDJVU))) { com.lizardtech.djvu.Document doc = new com.lizardtech.djvu.Document(url); doc.setAsync(false); DjVuPage[] p = new DjVuPage[1]; // read page from the document - index 0, priority 1, favorFast true int size = doc.size(); if ((page != 0) && (page >= size)) { page = 0; } p[0] = doc.getPage(page, 1, true); p[0].setAsync(false); DjVuImage djvuImage = new DjVuImage(p, true); Rectangle pageBounds = djvuImage.getPageBounds(0); Image[] images = djvuImage.getImage(new JPanel(), new Rectangle(pageBounds.width, pageBounds.height)); if (images.length == 1) { Image img = images[0]; return img; } else return null; } else if (type.equals(ImageMimeType.PDF)) { PDDocument document = null; try { document = PDDocument.load(url.openStream()); int resolution = 96; List<?> pages = document.getDocumentCatalog().getAllPages(); PDPage pdPage = (PDPage) pages.get(page); BufferedImage image = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, resolution); return image; } finally { if (document != null) { document.close(); } } } else throw new IllegalArgumentException("unsupported mimetype '" + type.getValue() + "'"); }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param prefix //from w w w. j a v a 2s . co m * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet // * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException("pdfFile must be defined"); } InputStream inputStream = null; if (password != null) { try { log.debug("PDF probably encrypted, trying to decrypt using given password {}", password); ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.entrySet().iterator(); while (xobjIter.hasNext()) { Map.Entry entry = (Map.Entry) xobjIter.next(); String key = (String) entry.getKey(); PDXObject xobj = (PDXObject) entry.getValue(); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.entrySet().iterator(); while (imageIter.hasNext()) { Map.Entry imEntry = (Map.Entry) imageIter.next(); String imKey = (String) imEntry.getKey(); PDXObjectImage image = (PDXObjectImage) imEntry.getValue(); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1 && !binarize) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (RuntimeException ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet//from w ww.ja v a2 s.co m * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess, Boolean silent, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException(pdfFile); } String prefix = null; InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(pdfFile, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.keySet().iterator(); while (xobjIter.hasNext()) { String key = (String) xobjIter.next(); PDXObject xobj = (PDXObject) xobjs.get(key); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String imKey = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(imKey); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.info("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.mzk.editor.server.handler.GetOcrFromPdfHandler.java
License:Open Source License
private String pdftoText(String fileName) throws ActionException { File pdfFile = new File(fileName); if (!pdfFile.isFile()) { LOGGER.error("The file: " + fileName + " does not exist."); throw new ActionException("Unable to parse the pdf file."); }/*www . j a v a 2 s . c om*/ PDFParser parser = null; COSDocument cosDoc = null; PDFTextStripper pdfStripper; PDDocument pdDoc = null; String parsedText; try { parser = new PDFParser(new RandomAccessBufferedFileInputStream(new FileInputStream(pdfFile))); } catch (Exception e) { LOGGER.error("Unable to open PDF Parser.: " + e); e.printStackTrace(); throw new ActionException("Unable to parse the pdf file."); } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); parsedText = pdfStripper.getText(pdDoc); } catch (Exception e) { LOGGER.error("An exception occured in parsing the PDF Document."); e.printStackTrace(); throw new ActionException("Unable to parse the pdf file. " + e); } finally { try { if (cosDoc != null) cosDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e) { e.printStackTrace(); } } return parsedText; }
From source file:cz.mzk.editor.server.newObject.CreateObject.java
License:Open Source License
/** * Insert foxml.//from w w w .ja v a2 s . c om * * @param node the node * @param mods the mods * @param dc the dc * @param attempt the attempt * @return the string * @throws CreateObjectException the create object exception */ private String insertFOXML(NewDigitalObject node, Document mods, Document dc, int attempt) throws CreateObjectException { if (attempt == 0) { throw new CreateObjectException("max number of attempts has been reached"); } boolean isPdf = node.getModel().getTopLevelType() != null && (node.getChildren() == null || node.getChildren().size() == 0) && node.getPath() != null; if (isPdf && attempt == Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) { PDDocument document = null; String newPdfPath = null; try { newPdfPath = imageResolverDAO.getNewImageFilePath(node.getPath()); if (!newPdfPath.endsWith(Constants.PDF_EXTENSION)) { newPdfPath = newPdfPath.concat(Constants.PDF_EXTENSION); } document = PDDocument.load(new File(newPdfPath)); int numberOfPages = document.getNumberOfPages(); LOGGER.warn(newPdfPath + ": Count of pages is 0"); if (numberOfPages > 0 && node.getPageIndex() > numberOfPages) throw new CreateObjectException("The number of page: " + node.getPageIndex() + " to be used for thumbnail is bigger than count of pages in the file: " + numberOfPages); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException("Unable to read the pdf file: " + newPdfPath); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } finally { if (document != null) try { document.close(); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException("Unable to close the pdf file: " + newPdfPath); } } } if (processedPages.containsKey(node.getPath())) { node.setExist(true); node.setUuid(processedPages.get(node.getPath())); } if (processedTracks.containsKey(node.getPath())) { node.setExist(true); node.setUuid(processedTracks.get(node.getPath())); } if (node.getExist()) { // do not create, but append only List<NewDigitalObject> childrenToAdd = node.getChildren(); if (childrenToAdd != null && !childrenToAdd.isEmpty()) { for (NewDigitalObject child : childrenToAdd) { if (!child.getExist()) { String uuid = insertFOXML(child, mods, dc); child.setUuid(Constants.FEDORA_UUID_PREFIX + uuid); append(node, child); } else { insertFOXML(child, mods, dc); } } } return node.getUuid(); } FoxmlBuilder builder = FOXMLBuilderMapping.getBuilder(node); if (builder == null) { throw new CreateObjectException("unknown type " + node.getModel()); } if (node.getUuid() == null || attempt != Constants.MAX_NUMBER_OF_INGEST_ATTEMPTS) { node.setUuid(FoxmlUtils.getRandomUuid()); if (topLevelUuid == null) { topLevelUuid = node.getUuid(); try { digitalObjectDAO.insertNewDigitalObject(node.getUuid(), node.getModel().getValue(), node.getName(), inputDirPath, node.getUuid(), false, userId); } catch (DatabaseException e) { LOGGER.error("DB ERROR!!!: " + e.getMessage() + ": " + e); e.printStackTrace(); } } } boolean isPage = node.getModel() == DigitalObjectModel.PAGE; boolean isTrack = node.getModel() == DigitalObjectModel.TRACK; boolean isSoundUnit = node.getModel() == DigitalObjectModel.SOUND_UNIT; builder.setSignature(node.getSignature()); builder.setBase(base); builder.setUuid(node.getUuid()); builder.setDcXmlContent(dc); builder.setModsXmlContent(mods); builder.setBundle(node.getBundle()); builder.setType(node.getType()); builder.setPolicy(node.getVisible() ? Policy.PUBLIC : Policy.PRIVATE); builder.setDateOrIntPartName(node.getDateOrIntPartName()); builder.setNoteOrIntSubtitle(node.getNoteOrIntSubtitle()); if (!isPage) { builder.setPartNumber(node.getPartNumberOrAlto()); builder.setAditionalInfo(node.getAditionalInfoOrOcr()); } if (node.getModel() == DigitalObjectModel.PAGE) { builder.setPageIndex(node.getPageIndex()); } List<NewDigitalObject> childrenToAdd = node.getChildren(); if (childrenToAdd != null && !childrenToAdd.isEmpty()) { List<RelsExtRelation> relations = builder.getChildren(); for (NewDigitalObject child : childrenToAdd) { if (!child.getExist()) { String uuid = insertFOXML(child, mods, dc); child.setUuid(uuid); } relations.add(new RelsExtRelation(child.getUuid(), NamedGraphModel.getRelationship(node.getModel(), child.getModel()), child.getName())); } } String imageUrl = null; String newFilePath = null; if (isPage || isSoundUnit) { String url = config.getImageServerUrl(); url = addSlash(url); if (!url.startsWith("http://")) { if (url.startsWith("https://")) { url = url.substring(8); } url = "http://" + url; } if (!isSysno(sysno)) { imageUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid()); newFilePath = addSlash(config.getImageServerUnknown()) + getPathFromNonSysno(sysno) + node.getUuid(); } else { String basePath = ""; if (base != null && !"".equals(base)) { basePath = base.toLowerCase() + "/"; } imageUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid()); newFilePath = addSlash(config.getImageServerKnown()) + basePath + getSysnoPath(sysno) + node.getUuid(); } builder.setImageUrl(imageUrl); } else if (isTrack) { String url = config.getRecordingServerUrl(); url = addSlash(url); if (!url.startsWith("http://")) { if (url.startsWith("https://")) { url = url.substring(8); } url = "http://" + url; } String soundUrl; if (!isSysno(sysno)) { soundUrl = url + "meditor" + getPathFromNonSysno(sysno) + (node.getUuid()); newFilePath = addSlash(config.getRecordingServerUnknown()) + getPathFromNonSysno(sysno) + node.getUuid(); } else { String basePath = ""; if (base != null && !"".equals(base)) { basePath = base.toLowerCase() + "/"; } newFilePath = addSlash(config.getRecordingServerKnown()) + basePath + getSysnoPath(sysno) + node.getUuid(); soundUrl = url + basePath + getSysnoPath(sysno) + (node.getUuid()); } //No lossless audio on the input queue String soundPath = null; try { soundPath = imageResolverDAO.getNewImageFilePath(node.getPath()); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } if (builder instanceof TrackBuilder) { if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) { ((TrackBuilder) builder).wavProvided(true); } } builder.setImageUrl(soundUrl); } builder.createDocument(); String foxmlRepresentation = builder.getDocument(false); boolean success = IngestUtils.ingest(foxmlRepresentation, node.getName(), node.getUuid(), node.getModel().getValue(), topLevelUuid, inputDirPath); if (success) ingestedObjects.add(node.getUuid()); if ((isPage || isSoundUnit) && success) { // TODO: StringBuffer boolean copySuccess; String newImagePath = null; try { newImagePath = imageResolverDAO.getNewImageFilePath(node.getPath()); if (newImagePath == null) { throw new CreateObjectException("Unkown file path for " + node.getPath()); } else if (!newImagePath.endsWith(Constants.JPEG_2000_EXTENSION)) { newImagePath = newImagePath.concat(Constants.JPEG_2000_EXTENSION); } copySuccess = IOUtils.copyFile(newImagePath, newFilePath + Constants.JPEG_2000_EXTENSION); if (copySuccess && LOGGER.isInfoEnabled()) { LOGGER.info("image " + newImagePath + " was copied to " + newFilePath + Constants.JPEG_2000_EXTENSION); } } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } } if (isPage) { String ocrPath = node.getAditionalInfoOrOcr(); if (ocrPath != null && !"".equals(ocrPath)) { insertManagedDatastream(DATASTREAM_ID.TEXT_OCR, node.getUuid(), ocrPath, true, "text/plain"); } String altoPath = node.getPartNumberOrAlto(); if (altoPath != null && !"".equals(altoPath)) { insertManagedDatastream(DATASTREAM_ID.ALTO, node.getUuid(), altoPath, true, "text/xml"); } } if (isTrack && success) { boolean copySuccessWav; boolean copySuccessMp3; boolean copySuccessOgg; String soundPath; try { soundPath = imageResolverDAO.getNewImageFilePath(node.getPath()); soundPath = soundPath.substring(0, soundPath.length() - 4); if (new File(soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()).exists()) { copySuccessWav = IOUtils.copyFile( soundPath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.WAV_MIMETYPE.getExtension()); } copySuccessMp3 = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.MP3_MIMETYPE.getExtension()); copySuccessOgg = IOUtils.copyFile(soundPath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension(), newFilePath + Constants.AUDIO_MIMETYPES.OGG_MIMETYPE.getExtension()); } catch (DatabaseException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException(e.getMessage(), e); } } if (!success) { insertFOXML(node, mods, dc, attempt - 1); } else if (isPdf) { handlePdf(node); } if (node.getModel() == DigitalObjectModel.PAGE) processedPages.put(node.getPath(), node.getUuid()); if (node.getModel() == DigitalObjectModel.TRACK) processedTracks.put(node.getPath(), node.getUuid()); return node.getUuid(); }
From source file:de.berber.kindle.annotator.lib.PDFAnnotator.java
License:Apache License
@SuppressWarnings("unchecked") public boolean run() { // read all annotations final List<Annotation> annotations = new KindleAnnotationReader(cc, pdfFile).read(); if (annotations.size() == 0) { return true; }//from ww w . j a v a2s .com PDDocument document = null; // annotate pdf try { document = PDDocument.load(pdfFile); //inDocument.decrypt(pass); // get outline for bookmarks PDDocumentOutline documentOutline = document.getDocumentCatalog().getDocumentOutline(); if (documentOutline == null) { // if there is no document outline we have to create a new one. documentOutline = new PDDocumentOutline(); document.getDocumentCatalog().setDocumentOutline(documentOutline); } assert documentOutline != null; // convert annotations for each page int pageNumber = 0; for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { for (final Annotation dxAnn : annotations) { dxAnn.toPDAnnotation(pageNumber, documentOutline, page); } pageNumber++; } //inDocument.setAllSecurityToBeRemoved(true); document.save(outFile.toString()); } catch (FileNotFoundException e) { LOG.error("Could not find input file " + pdfFile); return false; } catch (IOException e) { LOG.error("IOError while writing result file " + outFile); return false; } catch (COSVisitorException e) { LOG.error("PDFBox error while storing result file " + outFile); return false; } finally { if (document != null) { try { document.close(); } catch (IOException e) { LOG.error("Error while closing PDF document " + pdfFile); } } } return true; }