List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader POIFSReader
POIFSReader
From source file:com.armorize.hackalert.extractor.msword.MSExtractor.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *//*from w w w . j a va2 s.c o m*/ protected void extract(InputStream input) throws Exception { // First, extract properties this.reader = new POIFSReader(); this.properties = new PropertiesBroker(); this.reader.registerListener(new PropertiesReaderListener(this.properties), SummaryInformation.DEFAULT_STREAM_NAME); input.reset(); if (input.available() > 0) { reader.read(input); } // Then, extract text input.reset(); this.text = extractText(input); }
From source file:com.bluecubs.xinco.index.filetypes.XincoIndexMicrosoftPowerpoint.java
License:Apache License
public String getFileContentString(File f) { String text = null;//from w ww . j ava 2 s.c o m try { POIFSReader r = new POIFSReader(); XincoIndexMicrosoftPowerpointPOIFSReaderListener ximpprl = new XincoIndexMicrosoftPowerpointPOIFSReaderListener(); r.registerListener(ximpprl); r.read(new FileInputStream(f)); text = ximpprl.getEventText(); } catch (Exception e) { text = null; } return text; }
From source file:com.duroty.lucene.parser.MSPowerPointParser.java
License:Open Source License
/** * DOCUMENT ME!// w w w . ja va 2 s. c om * * @return DOCUMENT ME! * * @throws ParserException DOCUMENT ME! */ private String getContents() throws ParserException { String contents = ""; try { POIFSReader reader = new POIFSReader(); writer = new ByteArrayOutputStream(); reader.registerListener(this); reader.read(in); contents = writer.toString(); } catch (Exception ex) { throw new ParserException(ex); } return contents; }
From source file:com.flexive.extractor.ExcelExtractor.java
License:Open Source License
/** * Extracts the text informations from the excel file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *///from w w w . ja v a 2s. c om public ExtractedData extract(final InputStream in) { BufferedInputStream bis = null; try { writer = new ByteArrayOutputStream(); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text by processing all sheets HSSFWorkbook wb = new HSSFWorkbook(bis); for (int i = 0; i < wb.getNumberOfSheets(); i++) { HSSFSheet sheet = wb.getSheetAt(i); processSheet(sheet); } // Append summary info to text if (fxsi != null) { writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations())); } writer.flush(); return new ExtractedData(fxsi, writer.toString()); } catch (Exception exc) { exc.printStackTrace(); return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.flexive.extractor.FxSummaryInformation.java
License:Open Source License
/** * Reads the summary information from a document. * * @param input the input stream to read from, will not be closed at the end * @return the summary information//from w w w. j a va 2 s. c o m */ public static FxSummaryInformation getSummaryInformation(InputStream input) { class SummaryStore implements POIFSReaderListener { private FxSummaryInformation fxsi = null; /** * Processes the Summary section. * * @param event the summary section event. */ @Override public void processPOIFSReaderEvent(POIFSReaderEvent event) { try { SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream()); fxsi = new FxSummaryInformation(si); } catch (Exception ex) { /* ignore */ } } protected FxSummaryInformation getFxSummaryInformation() { return fxsi; } } try { POIFSReader reader = new POIFSReader(); SummaryStore st = new SummaryStore(); reader.registerListener(st, "\005SummaryInformation"); reader.read(input); return st.getFxSummaryInformation(); } catch (Exception ex) { return null; } }
From source file:com.flexive.extractor.PowerpointExtractor.java
License:Open Source License
/** * Extracts the text informations from the powerpoint file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *///from w w w .j a v a2 s.c o m public ExtractedData extract(final InputStream in) { try { writer = new ByteArrayOutputStream(); POIFSReader reader = new POIFSReader(); reader.registerListener(this); //FxSummaryInformation.getSummaryInformation(fileName); reader.read(in); if (fxsi != null) { writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations())); } writer.flush(); return new ExtractedData(fxsi, writer.toString()); } catch (Exception ex) { return null; } finally { try { writer.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.flexive.extractor.WordExtractor.java
License:Open Source License
/** * Extracts the text informations from the word file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *///from ww w.ja v a2s .co m public ExtractedData extract(final InputStream in) { ByteArrayOutputStream baos = null; PrintWriter writer = null; BufferedInputStream bis = null; try { baos = new ByteArrayOutputStream(); writer = new PrintWriter(baos); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text WordDocument wd = new WordDocument(bis); wd.writeAllText(writer); if (fxsi != null) { writer.write(fxsi.getFTIndexInformations()); } writer.flush(); return new ExtractedData(fxsi, baos.toString()); } catch (Exception exc) { return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (baos != null) baos.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.knowgate.ole.OLEDocument.java
License:Open Source License
public OLEDocument() { oReader = new POIFSReader(); oListener = new OLEListener(); oReader.registerListener(oListener, "\005SummaryInformation"); }
From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java
License:Open Source License
public Properties extractProperties(InputStream in) throws IOException { PropertiesBroker propertiesBroker = new PropertiesBroker(); POIFSReader reader = new POIFSReader(); reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation"); reader.read(in);//from w w w . j ava 2 s. c o m return propertiesBroker.getProperties(); }
From source file:com.villemos.ispace.aperture.enricher.MicrosoftPropertyReader.java
License:Open Source License
@Handler public void addMSProperties(@Body InformationObject io, @Headers Map<String, Object> headers) { File file = new File(io.hasUri); if (file.exists() && io.hasUri.endsWith(".doc")) { POIFSReader r = new POIFSReader(); r.registerListener(this); try {//from w w w. ja va 2 s . c o m FileInputStream inStream = new FileInputStream(file); r.read(inStream); Iterator<Entry<String, String>> it = msProperties.entrySet().iterator(); while (it.hasNext()) { Entry<String, String> entry = it.next(); io.metadata.put(entry.getKey(), entry.getValue()); } inStream.close(); } catch (Exception e) { e.printStackTrace(); LOG.error("Failed to get properties for .doc file '" + file.getName() + "'."); } } }