List of usage examples for org.apache.poi.xwpf.extractor XWPFWordExtractor close
@Override public void close() throws IOException
From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.DocxExtractor.java
License:Open Source License
/** * Gets the text from file content /*from ww w .j av a2 s . c o m*/ * @param file * @param fileExtension * @return */ @Override public String getText(File file, String fileExtension) { FileInputStream fis = null; XWPFWordExtractor ex = null; try { fis = new FileInputStream(file); XWPFDocument doc = new XWPFDocument(fis); if (doc != null) { ex = new XWPFWordExtractor(doc); return ex.getText(); } } catch (FileNotFoundException e) { LOGGER.info("File " + file.getName() + " not found. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } catch (Exception e) { LOGGER.debug("Extracting text from the .doc file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } finally { try { if (fis != null) { fis.close(); } } catch (IOException e) { LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } if (ex != null) { try { ex.close(); } catch (IOException e) { LOGGER.debug("Closing the text extractor from the .docx file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } } } return null; }
From source file:de.iisys.schub.processMining.similarity.parsing.DocxParser.java
License:Apache License
/** * Only use this method if you don't want to get chapters sometimes. * Otherwise use 'parseDocxAndChapters' and 'getFullText' methods. * //from www . ja v a2s .co m * Parses a .docx Word file and returns its text. * @return * Returns the full text (incl. tables) as string. */ public String parseDocxSimple() { if (theDoc != null) { XWPFWordExtractor extr = new XWPFWordExtractor(theDoc); this.fullText = extr.getText(); try { extr.close(); } catch (IOException e) { e.printStackTrace(); } } return this.fullText; }
From source file:eu.modelwriter.ide.ui.command.ExtractTextHandler.java
License:Open Source License
/** * Extracts text from the given .docx {@link IFile}. * //ww w. ja v a 2 s. c o m * @param file * the .docx {@link IFile} */ private void exctractDocx(final IFile file) { try { FileInputStream fis = new FileInputStream(file.getLocation().toFile()); XWPFDocument docx = new XWPFDocument(fis); XWPFWordExtractor we = new XWPFWordExtractor(docx); final IPath textPath = file.getFullPath().removeFileExtension().addFileExtension("txt"); final IFile textFile = ResourcesPlugin.getWorkspace().getRoot().getFile(textPath); if (textFile.exists()) { textFile.delete(true, new NullProgressMonitor()); } textFile.create(new ByteArrayInputStream(we.getText().getBytes()), true, new NullProgressMonitor()); we.close(); docx.close(); fis.close(); } catch (IOException e) { Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID, UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e)); } catch (CoreException e) { Activator.getDefault().getLog().log(new Status(IStatus.ERROR, Activator.PLUGIN_ID, UNABLE_TO_EXTRACT_TEXT_FROM + file.getFullPath(), e)); } }
From source file:mc.program.Importer.java
public void importDOCX() { try {//w w w . j av a 2s . c o m // Set up objects for getting from .docx file FileInputStream fis = new FileInputStream(sourceFile.getAbsolutePath()); XWPFDocument document = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(document); // Extract text String fileData = extractor.getText(); // Put text into array list Scanner scanner = new Scanner(fileData); while (scanner.hasNext()) { sourceText.add(scanner.next()); } fis.close(); extractor.close(); } catch (Exception ex) { System.out.print(ex); } }
From source file:steffen.haertlein.file.FileObject.java
License:Apache License
private void readWordDocument() { try {/* w w w . ja v a 2 s. c o m*/ FileInputStream fs = new FileInputStream(f); XWPFDocument document; document = new XWPFDocument(OPCPackage.open(fs)); XWPFWordExtractor docxReader = new XWPFWordExtractor(document); String text = docxReader.getText(); docxReader.close(); String[] docxLines = text.split("\n"); for (String line : docxLines) { lines.add(line); } fs.close(); } catch (InvalidFormatException e) { JOptionPane.showMessageDialog(null, "InvalidFormatException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } catch (FileNotFoundException e) { JOptionPane.showMessageDialog(null, "FileNotFoundException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } catch (IOException e) { JOptionPane.showMessageDialog(null, "IOException in readWordDocument", "Fehler", JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } }