List of usage examples for org.apache.pdfbox.pdfparser PDFParser getPDDocument
public PDDocument getPDDocument() throws IOException
From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.PdfExtractor.java
License:Open Source License
/** * Gets the text from file content /*from ww w. ja v a2s. c o m*/ * @param file * @param fileExtension * @return */ @Override public String getText(File file, String fileExtension) { FileInputStream fis = null; PDDocument pdDoc = null; StringWriter stringWriter = null; try { fis = new FileInputStream(file); PDFParser parser = new PDFParser(fis); parser.parse(); pdDoc = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stringWriter = new StringWriter(); stripper.writeText(pdDoc, stringWriter); return stringWriter.toString(); } catch (Exception e) { if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Extracting text from the .pdf file " + file.getName() + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } } finally { try { if (stringWriter != null) { stringWriter.close(); } } catch (Exception e) { } try { if (pdDoc != null) { pdDoc.close(); } } catch (Exception e) { LOGGER.info("Closing pdDoc for " + file + " failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } try { if (fis != null) { fis.close(); } } catch (Exception e) { LOGGER.info("Closing the FileInputStream for " + file + " failed with " + e.getMessage()); } } return null; }
From source file:com.exlibris.dps.repository.plugin.riskExtractor.drmlint.PDFBoxWrapper.java
License:Apache License
/** * Check if a PDF file is valid or not//w w w.j a v a 2 s. c om * @param pFile file to check * @return whether the file is valid or not */ public static boolean isValid(File pFile) { boolean ret = false; try { PDFParser parser = new PDFParser(new FileInputStream(pFile)); parser.parse(); File temp = File.createTempFile("drmlint-temp-", ".pdf"); parser.getPDDocument().save(temp); parser.getDocument().close(); temp.delete(); ret = true; } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (COSVisitorException e) { // TODO Auto-generated catch block ret = false; } return ret; }
From source file:com.liferay.portal.util.LuceneFields.java
License:Open Source License
public static Field getFile(String field, File file, String fileExt) throws IOException { fileExt = fileExt.toLowerCase();/*from w w w .ja v a2 s . c o m*/ FileInputStream fis = new FileInputStream(file); Reader reader = new BufferedReader(new InputStreamReader(fis)); String text = null; if (fileExt.equals(".doc")) { try { WordDocument wordDocument = new WordDocument(fis); StringWriter stringWriter = new StringWriter(); wordDocument.writeAllText(stringWriter); text = stringWriter.toString(); stringWriter.close(); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".htm") || fileExt.equals(".html")) { try { DefaultStyledDocument dsd = new DefaultStyledDocument(); HTMLEditorKit htmlEditorKit = new HTMLEditorKit(); htmlEditorKit.read(reader, dsd, 0); text = dsd.getText(0, dsd.getLength()); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".pdf")) { try { PDFParser parser = new PDFParser(fis); parser.parse(); PDDocument pdDoc = parser.getPDDocument(); StringWriter stringWriter = new StringWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.writeText(pdDoc, stringWriter); text = stringWriter.toString(); stringWriter.close(); pdDoc.close(); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".rtf")) { try { DefaultStyledDocument dsd = new DefaultStyledDocument(); RTFEditorKit rtfEditorKit = new RTFEditorKit(); rtfEditorKit.read(reader, dsd, 0); text = dsd.getText(0, dsd.getLength()); } catch (Exception e) { _log.error(e.getMessage()); } } else if (fileExt.equals(".xls")) { try { XLSTextStripper stripper = new XLSTextStripper(fis); text = stripper.getText(); } catch (Exception e) { _log.error(e.getMessage()); } } if (text != null) { return new Field(field, text, Field.Store.YES, Field.Index.NOT_ANALYZED); } else { return new Field(field, reader); } }
From source file:com.openkm.extractor.PdfTextExtractor.java
License:Open Source License
/** * {@inheritDoc}/*w w w . j a v a2s . c o m*/ */ @SuppressWarnings("rawtypes") public String extractText(InputStream stream, String type, String encoding) throws IOException { try { PDFParser parser = new PDFParser(new BufferedInputStream(stream)); try { parser.parse(); PDDocument document = parser.getPDDocument(); if (document.isEncrypted()) { try { document.decrypt(""); document.setAllSecurityToBeRemoved(true); } catch (Exception e) { throw new IOException("Unable to extract text: document encrypted", e); } } CharArrayWriter writer = new CharArrayWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.setLineSeparator("\n"); stripper.writeText(document, writer); String st = writer.toString().trim(); log.debug("TextStripped: '{}'", st); if (Config.SYSTEM_PDF_FORCE_OCR || st.length() <= 1) { log.warn("PDF does not contains text layer"); // Extract images from PDF StringBuilder sb = new StringBuilder(); if (!Config.SYSTEM_PDFIMAGES.isEmpty()) { File tmpPdf = FileUtils.createTempFile("pdf"); File tmpDir = new File(EnvironmentDetector.getTempDir()); String baseName = FileUtils.getFileName(tmpPdf.getName()); document.save(tmpPdf); int pgNum = 1; try { for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { HashMap<String, Object> hm = new HashMap<String, Object>(); hm.put("fileIn", tmpPdf.getPath()); hm.put("firstPage", pgNum); hm.put("lastPage", pgNum++); hm.put("imageRoot", tmpDir + File.separator + baseName); String cmd = TemplateUtils.replace("SYSTEM_PDFIMAGES", Config.SYSTEM_PDFIMAGES, hm); ExecutionUtils.runCmd(cmd); for (File tmp : tmpDir.listFiles()) { if (tmp.getName().startsWith(baseName + "-")) { if (page.findRotation() > 0) { ImageUtils.rotate(tmp, tmp, page.findRotation()); } try { String txt = doOcr(tmp); sb.append(txt).append(" "); log.debug("OCR Extracted: {}", txt); } finally { FileUtils.deleteQuietly(tmp); } } } } } finally { FileUtils.deleteQuietly(tmpPdf); } } else { for (PDPage page : (List<PDPage>) document.getDocumentCatalog().getAllPages()) { PDResources resources = page.getResources(); Map<String, PDXObject> images = resources.getXObjects(); if (images != null) { for (String key : images.keySet()) { PDXObjectImage image = (PDXObjectImage) images.get(key); String prefix = "img-" + key + "-"; File pdfImg = null; try { pdfImg = File.createTempFile(prefix, ".png"); log.debug("Writing image: {}", pdfImg.getPath()); // Won't work until PDFBox 1.8.9 ImageIO.write(image.getRGBImage(), "png", pdfImg); if (page.findRotation() > 0) { ImageUtils.rotate(pdfImg, pdfImg, page.findRotation()); } // Do OCR String txt = doOcr(pdfImg); sb.append(txt).append(" "); log.debug("OCR Extracted: {}", txt); } finally { FileUtils.deleteQuietly(pdfImg); } } } } } return sb.toString(); } else { return writer.toString(); } } finally { try { PDDocument doc = parser.getPDDocument(); if (doc != null) { doc.close(); } } catch (IOException e) { // ignore } } } catch (Exception e) { // it may happen that PDFParser throws a runtime // exception when parsing certain pdf documents log.warn("Failed to extract PDF text content", e); throw new IOException(e.getMessage(), e); } finally { stream.close(); } }
From source file:com.stimulus.archiva.extraction.PDFExtractor.java
License:Open Source License
public Reader getText(InputStream is, Charset charset, IndexInfo indexInfo) throws ExtractionException { logger.debug("extracting pdf file"); File file = null;/* ww w . j a va 2 s.co m*/ PDDocument document = null; Writer output = null; try { PDFParser parser = new PDFParser(is); parser.parse(); document = parser.getPDDocument(); if (document.isEncrypted()) { DocumentEncryption decryptor = new DocumentEncryption(document); if (logger.isDebugEnabled()) { logger.debug("pdf document appears to be encrypted (will attempt decryption)"); } decryptor.decryptDocument(""); } file = File.createTempFile("extract_pdf", ".tmp"); indexInfo.addDeleteFile(file); output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8"); PDFTextStripper stripper = new PDFTextStripper(); stripper.writeText(document, output); /*logger.debug("PDF extraction completed"); BufferedReader reader; try { reader = new BufferedReader(new FileReader(file)); String line = null; while( (line=reader.readLine()) != null) { logger.debug("PDF>"+line); } reader.close(); } catch(Exception e) { logger.error("failed to open txt file",e); }*/ } catch (Throwable e) { throw new ExtractionException("failed to extract pdf (probable password protected document)", e, logger, ChainedException.Level.DEBUG); } finally { try { if (document != null) document.close(); if (output != null) output.close(); } catch (IOException io) { } } try { logger.debug("returning extracted PDF data"); Reader outReader = new FileReader(file); indexInfo.addReader(outReader); return outReader; } catch (Exception ex) { throw new ExtractionException("failed to extract text from powerpoint document", ex, logger, ChainedException.Level.DEBUG); } }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param prefix /* w ww . j a v a 2 s. c om*/ * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet // * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException("pdfFile must be defined"); } InputStream inputStream = null; if (password != null) { try { log.debug("PDF probably encrypted, trying to decrypt using given password {}", password); ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.entrySet().iterator(); while (xobjIter.hasNext()) { Map.Entry entry = (Map.Entry) xobjIter.next(); String key = (String) entry.getKey(); PDXObject xobj = (PDXObject) entry.getValue(); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.entrySet().iterator(); while (imageIter.hasNext()) { Map.Entry imEntry = (Map.Entry) imageIter.next(); String imKey = (String) imEntry.getKey(); PDXObjectImage image = (PDXObjectImage) imEntry.getValue(); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1 && !binarize) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (RuntimeException ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet//from w ww . j a v a 2s . co m * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess, Boolean silent, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException(pdfFile); } String prefix = null; InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(pdfFile, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.keySet().iterator(); while (xobjIter.hasNext()) { String key = (String) xobjIter.next(); PDXObject xobj = (PDXObject) xobjs.get(key); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String imKey = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(imKey); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.info("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:de.csw.linkgenerator.plugin.lucene.textextraction.PDFTextExtractor.java
License:Apache License
public String getText(byte[] data) throws Exception { PDDocument pdfDocument = null;//from ww w .j a v a2 s .c o m try { PDFParser parser = new PDFParser(new ByteArrayInputStream(data)); parser.parse(); pdfDocument = parser.getPDDocument(); Writer writer = new CharArrayWriter(); PDFTextStripper stripper = new PDFTextStripper(); stripper.writeText(pdfDocument, writer); return writer.toString(); } finally { if (pdfDocument != null) pdfDocument.close(); } }
From source file:de.kp.ames.nlp.PdfEngine.java
License:Open Source License
/** * @param stream//from www . ja va 2 s. c o m * @return */ public Set<String> pdfToText(InputStream stream) { try { ENStopwords stopwords = new ENStopwords(); PDFParser parser = new PDFParser(stream); parser.parse(); /* * Build pdf stripper and extract text content */ PDFTextStripper stripper = new PDFTextStripper(); String text = stripper.getText(parser.getPDDocument()); Set<String> terms = new HashSet<String>(); String[] phrases = text.split("\n"); for (String phrase : phrases) { phrase = phrase.trim(); if (phrase.length() == 0) continue; String[] words = phrase.split(" "); for (String word : words) { /* * Filter stopwords */ if (stopwords.isStopword(word)) continue; terms.add(word); } } return terms; } catch (Exception e) { e.printStackTrace(); } finally { } return null; }
From source file:IO.search.SearchWordFile.java
private void search(File scrFile, String word) { String[] arrStr = null;/*from www . java 2 s. c o m*/ String[] arrStrA = null; if (word.contains(" ")) { arrStr = word.split(" "); } else if (word.contains("-")) { arrStrA = word.split("-"); System.out.println("reach"); } boolean is03word = scrFile.getName().matches("^.+\\.(?i)(doc)$"); if (is03word) { try { InputStream is = new FileInputStream(scrFile); WordExtractor ex = new WordExtractor(is); String text2003 = ex.getText(); if (arrStr != null && arrStr.length > 0) { for (int i = 0; i < arrStr.length; i++) { if (text2003.toLowerCase().contains(arrStr[i].toLowerCase())) { nameList.add(scrFile.getPath()); return; } } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2003.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2003.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(docx)$")) { try { OPCPackage opcPackage = POIXMLDocument.openPackage(scrFile.getPath()); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); String text2007 = extractor.getText(); if (arrStr != null && arrStr.length > 0) { for (int i = 0; i < arrStr.length; i++) { if (text2007.toLowerCase().contains(arrStr[i].toLowerCase())) { nameList.add(scrFile.getPath()); return; } } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2007.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2007.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(pdf)$")) { FileInputStream input = null; PDDocument pdfDocument = null; try { input = new FileInputStream(scrFile); PDFParser pdfParser = new PDFParser(input); pdfParser.parse(); pdfDocument = pdfParser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); String content = stripper.getText(pdfDocument); if (arrStr != null && arrStr.length > 0) { for (int i = 0; i < arrStr.length; i++) { if (content.toLowerCase().contains(arrStr[i].toLowerCase())) { nameList.add(scrFile.getPath()); return; } } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (content.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (content.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { input.close(); pdfDocument.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(txt)$")) { BufferedReader in = null; try { in = new BufferedReader((new InputStreamReader(new FileInputStream(scrFile), getCharset(scrFile.getAbsolutePath())))); String line = null; while ((line = in.readLine()) != null) { System.out.println(line); if (arrStr != null && arrStr.length > 0) { for (int i = 0; i < arrStr.length; i++) { if (line.toLowerCase().contains(arrStr[i].toLowerCase())) { nameList.add(scrFile.getPath()); return; } } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (line.contains(arrStrA[i])) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (line.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); return; } } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { in.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } }