Example usage for org.apache.pdfbox.pdmodel PDDocument close

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

This will close the underlying COSDocument object.

Usage

From source file:name.marcelomorales.siqisiqi.pdfbox.CoordinatesGenerator.java

License:Apache License

public void generarPdf(OutputStream os, String template, Map<String, Object> m, String path, String coordenates,
        float fontSize, float ancho) throws IOException {
    long t = System.currentTimeMillis();
    PDDocument doc = null;
    try {//from   w w w . j  a v  a2s  .c  o m
        doc = PDDocument.load(new File(path));

        List pages = doc.getDocumentCatalog().getAllPages();

        PDPage sourcePage = (PDPage) pages.get(0);

        boolean append = sourcePage.getContents() != null;
        PDPageContentStream contentStream = new PDPageContentStream(doc, sourcePage, append, true);

        StringReader fileReader = null;
        try {

            fileReader = new StringReader(template);
            List<String> list = CharStreams.readLines(fileReader);
            boolean textHasBegun = false;
            float currentOffset = 0f;
            for (String line : list) {

                if (line == null) {
                    continue;
                }

                if (line.startsWith("#")) {
                    continue;
                }

                final Iterable<String> str = Splitter.on(',').omitEmptyStrings().trimResults().split(line);
                final String[] split = Iterables.toArray(str, String.class);
                if (split == null || split.length < 4) {
                    continue;
                }

                if (Character.isDigit(split[0].charAt(0))) {
                    if (textHasBegun) {
                        contentStream.endText();
                    }
                    contentStream.beginText();
                    textHasBegun = true;
                    contentStream.moveTextPositionByAmount(parseFloat(split[0]), parseFloat(split[1]));
                } else {
                    contentStream.moveTextPositionByAmount(currentOffset, 0);
                }

                if (!textHasBegun) {
                    LOGGER.warn("Hay un posible mal uso de un .ree", new Throwable());
                    contentStream.beginText();
                    textHasBegun = true;
                }

                PDType1Font font;
                if ("b".equals(split[2])) {
                    font = HELVETICA_BOLD;
                } else {
                    font = HELVETICA;
                }
                contentStream.setFont(font, fontSize);

                Object text = null;
                if (split[3].startsWith("\"")) {
                    // TODO: text = substring(split[3], 1, -1);
                } else {
                    // TODO: text = new PropertyModel(m, split[3]).getObject();
                }

                if (text == null) {
                    LOGGER.warn("Propiedad {} no se encuentra", split[3]);
                    //contentStream.drawString("ERROR: propiedad no encontrada");
                    contentStream.drawString(" ");
                } else {
                    String string = text.toString();
                    currentOffset = font.getStringWidth(string) * ancho;
                    contentStream.drawString(string);
                }
            }

            if (textHasBegun) {
                contentStream.endText();
            }
        } finally {
            Closeables.closeQuietly(fileReader);
        }

        contentStream.close();

        try {
            doc.save(os);
        } catch (COSVisitorException e) {
            throw new IOException("Ha ocurrido un error al escribir en el Os", e);
        }
    } finally {
        if (doc != null) {
            doc.close();
        }
        LOGGER.info("Me ha tomado {} milisegundos hacer el pdf", System.currentTimeMillis() - t);
    }
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public JavaBillScrapper(File pdfFile) throws IOException {
    PDDocument doc = PDDocument.load(pdfFile);
    PDFTextStripper stripper = new PDFTextStripper();
    String rawText = stripper.getText(doc);
    String[] textArray = rawText.split("[\\r\\n]+");
    this.billObj = parsePdf(textArray);
    doc.close();
}

From source file:net.anthonypoon.billscrapper.JavaBillScrapper.java

public static void main(String[] args) {
    // TODO code application logic here
    try {//  w w w . ja v a  2  s  . c o  m
        for (String arg : args) {
            if (!arg.startsWith("-")) {
                filePaths.add(arg);
            } else {
                try {
                    options.add(Flags.fromString(arg));
                } catch (IllegalArgumentException ex) {
                    System.err.println("Illegal options: " + arg);
                }
            }
        }
        Collections.sort(filePaths);
        for (String filePath : filePaths) {
            System.out.println("Loading: " + filePath);
            PDDocument doc = PDDocument.load(new File(filePath));
            PDFTextStripper stripper = new PDFTextStripper();
            String rawText = stripper.getText(doc);
            String[] textArray = rawText.split("[\\r\\n]+");
            Bill bill = parsePdf(textArray);
            if (options.contains(Flags.INSERT_INTO_DB)) {
                DatabaseConnector db = new DatabaseConnector();
                DbWriter writer = new DbWriter(db.getConnection());
                boolean isInserted = writer.insertDetail(bill.getBillSummary(), bill.getPhoneSummaryData(),
                        bill.getPhoneDetail());
                writer.commit();
                doc.close();
                if (!isInserted) {
                    System.out.println(filePath + " was not inserted into database.");
                }
            }
        }

    } catch (Exception ex) {
        ex.printStackTrace(System.out);
    }
}

From source file:net.awl.edoc.pdfa.PdfBoxIsartorValidate.java

License:Apache License

public static void coin(File f) {
    nbFile++;//w w  w. j a  va 2  s. c  o m
    // PDFBox
    try {
        PDDocument document = PDDocument.load(f);
        COSDocument cDocument = document.getDocument();

        boolean result = PDFParser.parse(new FileInputStream(f));
        if (result) {
            nbOk++;
        } else {
            nbBad++;
        }
        ;

        document.close();
    } catch (IOException e) {
        System.err.println("Failed for : " + f.getAbsolutePath());
        // } catch (ParseException e) {
        // nbBad++;
    } catch (Throwable e) {
        nbBad++;
    }

}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {// w  w w  .j a  va 2s.  c o m
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:net.bookinaction.TextInfoExtractor.java

License:Apache License

public void doTextPosition(String source, String coord_text, StripperParam stripperParam) throws IOException {

    String source_pdf = source;/*from   w  w  w. ja  v a 2 s  .  co m*/
    String new_file = source.split("\\.")[0] + "-new.pdf";

    PDDocument document = PDDocument.load(new File(source_pdf));

    PrintWriter writer = new PrintWriter(new File(coord_text));

    //s.recordHeader(writer, source_pdf, document.getNumberOfPages(), sParam);

    for (int i = 0; i < document.getNumberOfPages(); i++) {
        getTextPositionFromPage(document, stripperParam, i + 1, writer, true);

    }

    if (document != null) {
        document.save(new_file);
        document.close();
    }

    if (writer != null)
        writer.close();

}

From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java

License:Open Source License

@Override
public Reader getText(URI uri, int page, StringBuilder title)
        throws FileNotFoundException, MalformedURLException, IOException
//-----------------------------------------------------------------------------------------
{
    FileWriter writer = null;/*from   w ww.  j ava  2 s . c  o  m*/
    PDDocument pdf = null;
    PDFTextStripper stripper = null;
    java.io.File tmpPdf = null;
    try {
        tmpPdf = Utils.uri2File(uri);
        if (tmpPdf != null)
            pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true);
        else
            pdf = PDDocument.load(uri.toURL(), true);
        PDDocumentInformation pdfInfo = pdf.getDocumentInformation();
        String s = pdfInfo.getTitle();
        if ((s == null) || (s.length() == 0))
            s = uri.getPath();
        if (title != null)
            title.append(s);
        stripper = new PDFTextStripper();
        if (page >= 0) {
            stripper.setStartPage(page);
            stripper.setEndPage(page);
        } else {
            stripper.setStartPage(1);
            stripper.setEndPage(pdf.getNumberOfPages());
        }
        java.io.File f = java.io.File.createTempFile("pdf", ".tmp");
        writer = new FileWriter(f);
        stripper.writeText(pdf, writer);
        try {
            writer.close();
            writer = null;
        } catch (Exception _e) {
        }
        stripper.resetEngine();
        return new FileReader(f);
    } finally {
        if (stripper != null)
            try {
                stripper.resetEngine();
            } catch (Exception _e) {
            }
        if (pdf != null)
            try {
                pdf.close();
            } catch (Exception _e) {
            }
        if (writer != null)
            try {
                writer.close();
            } catch (Exception _e) {
            }
        if ((tmpPdf != null)
                && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0))
            tmpPdf.delete();
    }
}

From source file:net.homeip.donaldm.doxmentor4j.indexers.PDFIndexer.java

License:Open Source License

@Override
public long index(String href, URI uri, boolean followLinks, Object... extraParams) throws IOException
//-----------------------------------------------------------------------------------------------------
{
    if (m_indexWriter == null) {
        logger.error("PDFIndexer: index writer is null");
        return -1;
    }//  ww w.j a v a2  s  .com
    PDDocument pdf = null;
    PDFTextStripper stripper = null;
    Reader reader = null;
    Writer writer = null;
    java.io.File tmpPdf = null;
    try {
        tmpPdf = Utils.uri2File(uri);
        if (tmpPdf != null)
            pdf = PDDocument.load(tmpPdf.getAbsolutePath(), true);
        else
            pdf = PDDocument.load(uri.toURL(), true);
        PDDocumentInformation pdfInfo = pdf.getDocumentInformation();
        String title = pdfInfo.getTitle();
        if ((title == null) || (title.isEmpty()))
            title = uri.getPath();
        stripper = new PDFTextStripper();
        int noPages = pdf.getNumberOfPages();
        stripper.setSuppressDuplicateOverlappingText(false);
        if (noPages != PDDocument.UNKNOWN_NUMBER_OF_PAGES) {
            for (int page = 1; page <= noPages; page++) {
                stripper.setStartPage(page);
                stripper.setEndPage(page);
                writer = new StringWriter();
                stripper.writeText(pdf, writer);
                reader = new StringReader(writer.toString());
                Document doc = new Document();
                doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO));
                doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
                doc.add(new Field("page", Integer.toString(page), Field.Store.YES, Field.Index.NO));
                if (addDocument(doc))
                    AjaxIndexer.incrementCount();
                try {
                    writer.close();
                    writer = null;
                } catch (Exception _e) {
                }
                try {
                    reader.close();
                    reader = null;
                } catch (Exception _e) {
                }
                if ((page % 50) == 0) {
                    try {
                        System.runFinalization();
                        System.gc();
                    } catch (Exception _e) {
                    }
                }
            }
        } else {
            java.io.File f = java.io.File.createTempFile("pdf", ".tmp");
            writer = new FileWriter(f);
            stripper.writeText(pdf, writer);
            try {
                writer.close();
                writer = null;
            } catch (Exception _e) {
            }
            reader = new FileReader(f);
            Document doc = new Document();
            doc.add(new Field("path", href, Field.Store.YES, Field.Index.NO));
            doc.add(new Field("title", title.toString(), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
            doc.add(new Field("page", "-1", Field.Store.YES, Field.Index.NO));
            if (addDocument(doc))
                AjaxIndexer.incrementCount();
            try {
                reader.close();
                reader = null;
            } catch (Exception _e) {
            }
            try {
                System.runFinalization();
                System.gc();
            } catch (Exception _e) {
            }
        }
        return 1;
    } catch (Exception e) {
        logger.error("Error indexing PDF text from " + uri.toString(), e);
        return -1;
    } finally {
        if (stripper != null)
            try {
                stripper.resetEngine();
            } catch (Exception _e) {
            }
        if (pdf != null)
            try {
                pdf.close();
            } catch (Exception _e) {
            }
        if (writer != null)
            try {
                writer.close();
            } catch (Exception _e) {
            }
        if (reader != null)
            try {
                reader.close();
            } catch (Exception _e) {
            }
        if ((tmpPdf != null)
                && (tmpPdf.getAbsolutePath().toLowerCase().indexOf(System.getProperty("java.io.tmpdir")) >= 0))
            tmpPdf.delete();
    }
}

From source file:net.ontopia.topicmaps.classify.PDFFormatModule.java

License:Apache License

public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
    try {/*from ww  w  .  j  av a2s .c o  m*/
        PDDocument pdoc = PDDocument.load(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
        PDFTextStripper stripper = new PDFTextStripper();
        String s = stripper.getText(pdoc);
        pdoc.close();
        char[] c = s.toCharArray();
        handler.startRegion("document");
        handler.text(c, 0, c.length);
        handler.endRegion();
    } catch (Exception e) {
        throw new OntopiaRuntimeException(e);
    }
}

From source file:net.sf.jabref.imports.PdfContentImporter.java

License:Open Source License

@Override
public List<BibtexEntry> importEntries(InputStream in, OutputPrinter status) throws IOException {
    final ArrayList<BibtexEntry> res = new ArrayList<BibtexEntry>(1);

    PDDocument document;
    try {/* ww  w. j a  v  a 2s  .  c o m*/
        document = PDDocument.load(in);
    } catch (IOException e) {
        PdfContentImporter.logger.log(Level.SEVERE, "Could not load document", e);
        return res;
    }

    try {
        if (document.isEncrypted()) {
            PdfContentImporter.logger.log(Level.INFO, Globals.lang("Encrypted documents are not supported"));
            //return res;
        }

        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setStartPage(1);
        stripper.setEndPage(1);
        stripper.setSortByPosition(true);
        stripper.setParagraphEnd(System.getProperty("line.separator"));
        StringWriter writer = new StringWriter();
        stripper.writeText(document, writer);
        String textResult = writer.toString();

        String doi = DOIUtil.getDOI(textResult);
        if (doi.length() < textResult.length()) {
            // A DOI was found in the text
            // We do NO parsing of the text, but use the DOI fetcher

            ImportInspector i = new ImportInspector() {

                @Override
                public void toFront() {
                }

                @Override
                public void setProgress(int current, int max) {
                }

                @Override
                public void addEntry(BibtexEntry entry) {
                    // add the entry to the result object
                    res.add(entry);
                }
            };
            PdfContentImporter.doiToBibTeXFetcher.processQuery(doi, i, status);
            if (res.size() != 0) {
                // if something has been found, return the result
                return res;
            } else {
                // otherwise, we just parse the PDF
            }
        }

        String author;
        String editor = null;
        String institution = null;
        String abstractT = null;
        String keywords = null;
        String title;
        String conference = null;
        String DOI = null;
        String series = null;
        String volume = null;
        String number = null;
        String pages = null;
        // year is a class variable as the method extractYear() uses it;
        String publisher = null;
        BibtexEntryType type = BibtexEntryType.INPROCEEDINGS;

        final String lineBreak = System.getProperty("line.separator");

        split = textResult.split(lineBreak);

        // idea: split[] contains the different lines
        // blocks are separated by empty lines
        // treat each block
        //   or do special treatment at authors (which are not broken)
        //   therefore, we do a line-based and not a block-based splitting
        // i points to the current line
        // curString (mostly) contains the current block
        //   the different lines are joined into one and thereby separated by " "

        proceedToNextNonEmptyLine();
        if (i >= split.length) {
            // PDF could not be parsed or is empty
            // return empty list
            return res;
        }
        curString = split[i];
        i = i + 1;

        if (curString.length() > 4) {
            // special case: possibly conference as first line on the page
            extractYear();
            if (curString.contains("Conference")) {
                fillCurStringWithNonEmptyLines();
                conference = curString;
                curString = "";
            } else {
                // e.g. Copyright (c) 1998 by the Genetics Society of America
                // future work: get year using RegEx
                String lower = curString.toLowerCase();
                if (lower.contains("copyright")) {
                    fillCurStringWithNonEmptyLines();
                    publisher = curString;
                    curString = "";
                }
            }
        }

        // start: title
        fillCurStringWithNonEmptyLines();
        title = streamlineTitle(curString);
        curString = "";
        //i points to the next non-empty line

        // after title: authors
        author = null;
        while ((i < split.length) && (!split[i].equals(""))) {
            // author names are unlikely to be split among different lines
            // treat them line by line
            curString = streamlineNames(split[i]);
            if (author == null) {
                author = curString;
            } else {
                if (curString.equals("")) {
                    // if split[i] is "and" then "" is returned by streamlineNames -> do nothing
                } else {
                    author = author.concat(" and ").concat(curString);
                }
            }
            i++;
        }
        curString = "";
        i++;

        // then, abstract and keywords follow
        while (i < split.length) {
            curString = split[i];
            if ((curString.length() >= "Abstract".length())
                    && (curString.substring(0, "Abstract".length()).equalsIgnoreCase("Abstract"))) {
                if (curString.length() == "Abstract".length()) {
                    // only word "abstract" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Abstract".length() + 1).trim().concat(lineBreak);
                }
                i++;
                // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
                // whereas we need linebreak as separator
                while ((i < split.length) && (!split[i].equals(""))) {
                    curString = curString.concat(split[i]).concat(lineBreak);
                    i++;
                }
                abstractT = curString;
                i++;
            } else if ((curString.length() >= "Keywords".length())
                    && (curString.substring(0, "Keywords".length()).equalsIgnoreCase("Keywords"))) {
                if (curString.length() == "Keywords".length()) {
                    // only word "Keywords" found -- skip line
                    curString = "";
                } else {
                    curString = curString.substring("Keywords".length() + 1).trim();
                }
                i++;
                fillCurStringWithNonEmptyLines();
                keywords = removeNonLettersAtEnd(curString);
            } else {
                String lower = curString.toLowerCase();

                int pos = lower.indexOf("technical");
                if (pos >= 0) {
                    type = BibtexEntryType.TECHREPORT;
                    pos = curString.trim().lastIndexOf(' ');
                    if (pos >= 0) {
                        // assumption: last character of curString is NOT ' '
                        //   otherwise pos+1 leads to an out-of-bounds exception
                        number = curString.substring(pos + 1);
                    }
                }

                i++;
                proceedToNextNonEmptyLine();
            }
        }

        i = split.length - 1;

        // last block: DOI, detailed information
        // sometimes, this information is in the third last block etc...
        // therefore, read until the beginning of the file 

        while (i >= 0) {
            readLastBlock();
            // i now points to the block before or is -1
            // curString contains the last block, separated by " "

            extractYear();

            int pos = curString.indexOf("(Eds.)");
            if ((pos >= 0) && (publisher == null)) {
                // looks like a Springer last line
                // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
                publisher = "Springer";
                editor = streamlineNames(curString.substring(0, pos - 1));
                curString = curString.substring(pos + "(Eds.)".length() + 2); //+2 because of ":" after (Eds.) and the subsequent space
                String[] springerSplit = curString.split(", ");
                if (springerSplit.length >= 4) {
                    conference = springerSplit[0];

                    String seriesData = springerSplit[1];
                    int lastSpace = seriesData.lastIndexOf(' ');
                    series = seriesData.substring(0, lastSpace);
                    volume = seriesData.substring(lastSpace + 1);

                    pages = springerSplit[2].substring(4);

                    if (springerSplit[3].length() >= 4) {
                        year = springerSplit[3].substring(0, 4);
                    }
                }
            } else {
                if (DOI == null) {
                    pos = curString.indexOf("DOI");
                    if (pos < 0) {
                        pos = curString.indexOf("doi");
                    }
                    if (pos >= 0) {
                        pos += 3;
                        char delimiter = curString.charAt(pos);
                        if ((delimiter == ':') || (delimiter == ' ')) {
                            pos++;
                        }
                        int nextSpace = curString.indexOf(' ', pos);
                        if (nextSpace > 0) {
                            DOI = curString.substring(pos, nextSpace);
                        } else {
                            DOI = curString.substring(pos);
                        }
                    }
                }

                if ((publisher == null) && (curString.contains("IEEE"))) {
                    // IEEE has the conference things at the end
                    publisher = "IEEE";

                    // year is extracted by extractYear
                    // otherwise, we could it determine as follows: 
                    // String yearStr = curString.substring(curString.length()-4);
                    // if (isYear(yearStr)) {
                    //   year = yearStr;
                    // }

                    if (conference == null) {
                        pos = curString.indexOf('$');
                        if (pos > 0) {
                            // we found the price
                            // before the price, the ISSN is stated
                            // skip that
                            pos -= 2;
                            while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
                                pos--;
                            }
                            if (pos > 0) {
                                conference = curString.substring(0, pos);
                            }
                        }
                    }
                }

                //               String lower = curString.toLowerCase();
                //               if (institution == null) {
                //                  
                //               }

            }
        }

        BibtexEntry entry = new BibtexEntry();
        entry.setType(type);

        if (author != null) {
            entry.setField("author", author);
        }
        if (editor != null) {
            entry.setField("editor", editor);
        }
        if (institution != null) {
            entry.setField("institution", institution);
        }
        if (abstractT != null) {
            entry.setField("abstract", abstractT);
        }
        if (keywords != null) {
            entry.setField("keywords", keywords);
        }
        if (title != null) {
            entry.setField("title", title);
        }
        if (conference != null) {
            entry.setField("booktitle", conference);
        }
        if (DOI != null) {
            entry.setField("doi", DOI);
        }
        if (series != null) {
            entry.setField("series", series);
        }
        if (volume != null) {
            entry.setField("volume", volume);
        }
        if (number != null) {
            entry.setField("number", number);
        }
        if (pages != null) {
            entry.setField("pages", pages);
        }
        if (year != null) {
            entry.setField("year", year);
        }
        if (publisher != null) {
            entry.setField("publisher", publisher);
        }

        entry.setField("review", textResult);

        res.add(entry);
    } catch (NoClassDefFoundError e) {
        if (e.getMessage().equals("org/bouncycastle/jce/provider/BouncyCastleProvider")) {
            status.showMessage(Globals.lang(
                    "Java Bouncy Castle library not found. Please download and install it. For more information see http://www.bouncycastle.org/."));
        } else {
            PdfContentImporter.logger.log(Level.SEVERE, e.getLocalizedMessage(), e);
        }
    } finally {
        document.close();
    }

    return res;
}