List of usage examples for org.apache.poi.xwpf.extractor XWPFWordExtractor getText
public String getText()
From source file:authorslilhelper.FXMLDocumentController.java
License:Open Source License
public void loadWordDocument() { try {/*from w w w. j a v a 2s . c o m*/ JFileChooser chooser = new JFileChooser(); int value = chooser.showOpenDialog(null); if (value == JFileChooser.APPROVE_OPTION) { XWPFDocument docToAppend = new XWPFDocument(new FileInputStream(chooser.getSelectedFile())); XWPFWordExtractor extract = new XWPFWordExtractor(docToAppend); String fullText = extract.getText(); /* for(int q = 0; q < fullText.length(); q++) { if(q < fullText.length() - 3) { if(fullText.charAt(q) == '\n' && fullText.charAt(q+1) == '\t') { fullText = fullText.substring(0, q) + "\n\n" + fullText.substring(q + 1, fullText.length() - 1); } } }*/ //newly bracketed out //Primary.appendText(extract.getText()); String[] buttons = { "Append to end", "Insert at cursor location" }; int result = JOptionPane.showOptionDialog(null, "How would you like to insert the text? ", "Insert Text", JOptionPane.WARNING_MESSAGE, 0, null, buttons, buttons[1]); if (result == 0) { try { //FileReader reader = new FileReader(fileLocation); //InputStream in = IOUtils.toInputStream(fullText, "UTF-8"); StringReader reader = new StringReader(fullText); BufferedReader br = new BufferedReader(reader); String s; int index = 0; //int charCounter = 0; boolean endOfPara = false; while ((s = br.readLine()) != null) { index = 0; while (index < s.length()) //change this { if ((index == characterCapacity) && (index < s.length() - 1)) { while (s.charAt(index) != ' ') { index--; } String n = s.substring(0, index); s = s.substring(index); Primary.appendText(n + "\n"); index = 0; } else if ((index == s.length() - 1) && (index <= characterCapacity) && (s.length() > 0)) { Primary.appendText(s + "\n"); index = s.length(); //OR use break; endOfPara = true; } else { index++; } } if (endOfPara == true) { Primary.appendText("\n"); } endOfPara = false; } } catch (Exception e) { } //Primary.appendText(fullText); //resetOnPaste(); onClickOrKeyPress(); /*save(); Primary.selectAll(); Primary.clear(); reinitializePrimary();*/ String pathToSave = chooser.getSelectedFile().getAbsolutePath(); //adds the loaded file's name/path to the filetracker File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser); FileWriter writ = new FileWriter(filesLoadedLog, true); BufferedWriter bw = new BufferedWriter(writ); //writ.append("Hello World"); // if(filesLoadedLog.exists() == true) // { writ.append(pathToSave + "\n"); // title.setText(fileLocation); // } // else // { // writ.write(fileLocation); // title.setText(fileLocation); // } bw.close(); writ.close(); } if (result == 1) { Primary.insertText(Primary.getCaretPosition(), "\n"); //Primary.nextWord(); //Primary.forward(); int desiredCaretPosition = Primary.getCaretPosition(); Primary.end(); int start = Primary.getCaretPosition(); try { StringReader reader = new StringReader(fullText); BufferedReader br = new BufferedReader(reader); //FileReader reader = new FileReader(fileLocation); //BufferedReader br = new BufferedReader(reader); String s; int index = 0; //int charCounter = 0; boolean endOfPara = false; while ((s = br.readLine()) != null) { index = 0; while (index < s.length()) //change this { if ((index == characterCapacity) && (index < s.length() - 1)) { while (s.charAt(index) != ' ') { index--; } String n = s.substring(0, index); s = s.substring(index); Primary.appendText(n + "\n"); index = 0; } else if ((index == s.length() - 1) && (index <= characterCapacity) && (s.length() > 0)) { Primary.appendText(s + "\n"); index = s.length(); //OR use break; endOfPara = true; } else { index++; } } if (endOfPara == true) { Primary.appendText("\n"); } endOfPara = false; } } catch (Exception e) { } Primary.end(); int end = Primary.getCaretPosition(); Primary.selectRange(start, end); String appendedText = Primary.getSelectedText(); Primary.deleteText(start, end); Primary.insertText(desiredCaretPosition, appendedText); // int caretPosition = Primary.getCaretPosition(); //NEWLY BRACKETED OUT // Primary.insertText(caretPosition, fullText); //NEWLY BRACKETED OUT //resetOnPaste(); onClickOrKeyPress(); /*save(); Primary.selectAll(); Primary.clear(); reinitializePrimary();*/ String pathToSave = chooser.getSelectedFile().getAbsolutePath(); //adds the loaded file's name/path to the filetracker File filesLoadedLog = new File(installationPath + "/FileTracker" + currentUser); FileWriter writ = new FileWriter(filesLoadedLog, true); BufferedWriter bw = new BufferedWriter(writ); //writ.append("Hello World"); // if(filesLoadedLog.exists() == true) // { writ.append(pathToSave + "\n"); // title.setText(fileLocation); // } // else // { // writ.write(fileLocation); // title.setText(fileLocation); // } bw.close(); writ.close(); } } else { } } catch (Exception e) { JOptionPane.showMessageDialog(null, "Images are not supported. If your document contains images, it will not be loaded. Please remove the images or paste the text into another document and try again."); } }
From source file:avoking.com.documentos.scheduler.core.Core.java
private String leerDocx(InputStream docx) throws IOException { //Se crea un documento que la POI entiende pasandole el stream //instanciamos el obj para extraer contenido pasando el documento XWPFWordExtractor xwpf_we = new XWPFWordExtractor(new XWPFDocument(docx)); return xwpf_we.getText(); }
From source file:br.gov.lexml.parser.documentoarticulado.LexMLParserFromTextTest.java
License:Open Source License
private String sampleDocx(String resourceName) { String content = null;/*from w w w.ja v a 2 s . co m*/ try { InputStream input = new BOMInputStream(TestUtil.class.getResourceAsStream(resourceName)); XWPFDocument document = new XWPFDocument(OPCPackage.open(input)); @SuppressWarnings("resource") XWPFWordExtractor wordExtractor = new XWPFWordExtractor(document); content = wordExtractor.getText(); } catch (Exception exep) { exep.printStackTrace(); } return content; }
From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java
/** * doc/docx/*from w w w .j a va 2 s .c om*/ * @param FilePath - * @return ?? ? */ private ArrayList<String> getListOfWordsFromDoc(String FilePath) { FileInputStream fis; List<String> result = new ArrayList<String>(); if (FilePath.substring(FilePath.length() - 1).equals("x")) { //is a docx try { fis = new FileInputStream(new File(FilePath)); XWPFDocument doc = new XWPFDocument(fis); XWPFWordExtractor extract = new XWPFWordExtractor(doc); // System.out.println(extract.getText()); StringBuilder builder = new StringBuilder(); builder.append(extract.getText()); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } } catch (IOException e) { e.printStackTrace(); } } else { //is not a docx try { fis = new FileInputStream(new File(FilePath)); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); StringBuilder builder = new StringBuilder(); builder.append(extractor.getText()); String[] words = builder.toString().split(" "); for (String s : words) { result.add(s); } } catch (IOException e) { e.printStackTrace(); } } return (ArrayList<String>) result; }
From source file:com.aurel.track.lucene.index.associatedFields.textExctractor.DocxExtractor.java
License:Open Source License
/** * Gets the text from file content /* w w w.ja v a 2 s . co m*/ * @param file * @param fileExtension * @return */ @Override public String getText(File file, String fileExtension) { FileInputStream fis = null; XWPFWordExtractor ex = null; try { fis = new FileInputStream(file); XWPFDocument doc = new XWPFDocument(fis); if (doc != null) { ex = new XWPFWordExtractor(doc); return ex.getText(); } } catch (FileNotFoundException e) { LOGGER.info("File " + file.getName() + " not found. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } catch (Exception e) { LOGGER.debug("Extracting text from the .doc file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } finally { try { if (fis != null) { fis.close(); } } catch (IOException e) { LOGGER.debug("Closing the FileInputStream for file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } if (ex != null) { try { ex.close(); } catch (IOException e) { LOGGER.debug("Closing the text extractor from the .docx file " + file.getName() + " failed with " + e.getMessage()); LOGGER.error(ExceptionUtils.getStackTrace(e)); } } } return null; }
From source file:com.bluetech.reader.WordReader.java
License:Apache License
public static String readWordDoc(String filePath) throws FileNotFoundException, IOException { File file = new File(filePath); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); XWPFDocument document = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(document); // String[] fileData = extractor.getText().split("##\\d{4}[_]\\d{4}[a-z]*"); return extractor.getText(); }
From source file:com.docdoku.server.esindexer.ESTools.java
License:Open Source License
private static String microsoftWordDocumentToString(InputStream inputStream) throws IOException { String strRet;/*from w ww. j a v a 2s .c om*/ try (InputStream wordStream = new BufferedInputStream(inputStream)) { if (POIFSFileSystem.hasPOIFSHeader(wordStream)) { WordExtractor wordExtractor = new WordExtractor(wordStream); strRet = wordExtractor.getText(); } else { XWPFWordExtractor wordXExtractor = new XWPFWordExtractor(new XWPFDocument(wordStream)); strRet = wordXExtractor.getText(); } } return strRet; }
From source file:com.jaeksoft.searchlib.parser.DocxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { ParserResultItem result = getNewParserResultItem(); XWPFDocument document = new XWPFDocument(streamLimiter.getNewInputStream()); XWPFWordExtractor word = null; try {/* w ww . java2 s . c o m*/ word = new XWPFWordExtractor(document); CoreProperties info = word.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } String content = word.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(word); } }
From source file:com.jgaap.generics.DocumentHelper.java
License:Open Source License
/** * Extracts text from a Word document and stores it in the document. * /* w ww . j a v a 2 s . c o m*/ * @param inputStream * An input stream pointing to the Word document to be read. * @throws IOException */ static private char[] loadMSWordDocx(InputStream inputStream) throws IOException { XWPFDocument docx = new XWPFDocument(inputStream); XWPFWordExtractor extractor = new XWPFWordExtractor(docx); return extractor.getText().toCharArray(); }
From source file:com.min.word.core.ReadWordFileTest.java
License:Apache License
public static void main(String[] args) throws Exception { System.out.println("---------------- Read File Start ------------------"); XWPFDocument document = new XWPFDocument(new FileInputStream("test.docx")); XWPFWordExtractor we = new XWPFWordExtractor(document); System.out.println(we.getText()); System.out.println("---------------- Read File End ------------------"); }