List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader read
public void read(final POIFSFileSystem poifs) throws IOException
From source file:com.bluecubs.xinco.index.filetypes.XincoIndexMicrosoftPowerpoint.java
License:Apache License
public String getFileContentString(File f) { String text = null;/*from ww w . j ava 2s. c o m*/ try { POIFSReader r = new POIFSReader(); XincoIndexMicrosoftPowerpointPOIFSReaderListener ximpprl = new XincoIndexMicrosoftPowerpointPOIFSReaderListener(); r.registerListener(ximpprl); r.read(new FileInputStream(f)); text = ximpprl.getEventText(); } catch (Exception e) { text = null; } return text; }
From source file:com.duroty.lucene.parser.MSPowerPointParser.java
License:Open Source License
/** * DOCUMENT ME!//from www . j av a 2s . c o m * * @return DOCUMENT ME! * * @throws ParserException DOCUMENT ME! */ private String getContents() throws ParserException { String contents = ""; try { POIFSReader reader = new POIFSReader(); writer = new ByteArrayOutputStream(); reader.registerListener(this); reader.read(in); contents = writer.toString(); } catch (Exception ex) { throw new ParserException(ex); } return contents; }
From source file:com.flexive.extractor.ExcelExtractor.java
License:Open Source License
/** * Extracts the text informations from the excel file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *///from w ww . j a v a 2 s . co m public ExtractedData extract(final InputStream in) { BufferedInputStream bis = null; try { writer = new ByteArrayOutputStream(); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text by processing all sheets HSSFWorkbook wb = new HSSFWorkbook(bis); for (int i = 0; i < wb.getNumberOfSheets(); i++) { HSSFSheet sheet = wb.getSheetAt(i); processSheet(sheet); } // Append summary info to text if (fxsi != null) { writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations())); } writer.flush(); return new ExtractedData(fxsi, writer.toString()); } catch (Exception exc) { exc.printStackTrace(); return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.flexive.extractor.FxSummaryInformation.java
License:Open Source License
/** * Reads the summary information from a document. * * @param input the input stream to read from, will not be closed at the end * @return the summary information/* ww w. ja v a2 s. co m*/ */ public static FxSummaryInformation getSummaryInformation(InputStream input) { class SummaryStore implements POIFSReaderListener { private FxSummaryInformation fxsi = null; /** * Processes the Summary section. * * @param event the summary section event. */ @Override public void processPOIFSReaderEvent(POIFSReaderEvent event) { try { SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream()); fxsi = new FxSummaryInformation(si); } catch (Exception ex) { /* ignore */ } } protected FxSummaryInformation getFxSummaryInformation() { return fxsi; } } try { POIFSReader reader = new POIFSReader(); SummaryStore st = new SummaryStore(); reader.registerListener(st, "\005SummaryInformation"); reader.read(input); return st.getFxSummaryInformation(); } catch (Exception ex) { return null; } }
From source file:com.flexive.extractor.PowerpointExtractor.java
License:Open Source License
/** * Extracts the text informations from the powerpoint file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *//* w w w . ja v a2 s.co m*/ public ExtractedData extract(final InputStream in) { try { writer = new ByteArrayOutputStream(); POIFSReader reader = new POIFSReader(); reader.registerListener(this); //FxSummaryInformation.getSummaryInformation(fileName); reader.read(in); if (fxsi != null) { writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations())); } writer.flush(); return new ExtractedData(fxsi, writer.toString()); } catch (Exception ex) { return null; } finally { try { writer.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.flexive.extractor.WordExtractor.java
License:Open Source License
/** * Extracts the text informations from the word file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible */// w ww. j a v a 2 s .c om public ExtractedData extract(final InputStream in) { ByteArrayOutputStream baos = null; PrintWriter writer = null; BufferedInputStream bis = null; try { baos = new ByteArrayOutputStream(); writer = new PrintWriter(baos); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text WordDocument wd = new WordDocument(bis); wd.writeAllText(writer); if (fxsi != null) { writer.write(fxsi.getFTIndexInformations()); } writer.flush(); return new ExtractedData(fxsi, baos.toString()); } catch (Exception exc) { return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (baos != null) baos.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java
License:Open Source License
public Properties extractProperties(InputStream in) throws IOException { PropertiesBroker propertiesBroker = new PropertiesBroker(); POIFSReader reader = new POIFSReader(); reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation"); reader.read(in); return propertiesBroker.getProperties(); }
From source file:com.villemos.ispace.aperture.enricher.MicrosoftPropertyReader.java
License:Open Source License
@Handler public void addMSProperties(@Body InformationObject io, @Headers Map<String, Object> headers) { File file = new File(io.hasUri); if (file.exists() && io.hasUri.endsWith(".doc")) { POIFSReader r = new POIFSReader(); r.registerListener(this); try {/*from ww w .j a va2 s. co m*/ FileInputStream inStream = new FileInputStream(file); r.read(inStream); Iterator<Entry<String, String>> it = msProperties.entrySet().iterator(); while (it.hasNext()) { Entry<String, String> entry = it.next(); io.metadata.put(entry.getKey(), entry.getValue()); } inStream.close(); } catch (Exception e) { e.printStackTrace(); LOG.error("Failed to get properties for .doc file '" + file.getName() + "'."); } } }
From source file:lius.index.powerpoint.PPTIndexer.java
License:Apache License
public String getContent() { String contents = ""; try {/*w w w. j a va2 s . c o m*/ POIFSReader reader = new POIFSReader(); writer = new ByteArrayOutputStream(); reader.registerListener(this); reader.read(getStreamToIndex()); contents = writer.toString(); } catch (Exception ex) { logger.error(ex.getMessage()); } return contents; }
From source file:net.sourceforge.docfetcher.model.parse.MSOfficeParser.java
License:Open Source License
protected final ParseResult parse(File file, ParseContext context) throws ParseException { String contents = renderText(file, context.getFilename()); ParseResult parseResult = new ParseResult(contents); POIFSReader reader = new POIFSReader(); MyReaderListener listener = new MyReaderListener(); reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$ InputStream in = null;/*from w w w. j a va 2s . com*/ try { in = new FileInputStream(file); try { reader.read(in); } catch (IllegalArgumentException e) { // Bug #3537738: "IllegalArgumentException: name cannot be empty" throw new ParseException(e); } parseResult.setTitle(listener.title).addAuthor(listener.author).addMiscMetadata(listener.subject) .addMiscMetadata(listener.keywords).addMiscMetadata(listener.comments); } catch (IOException e) { // Ignore, we can live without meta data Util.printErr(e); } finally { Closeables.closeQuietly(in); } return parseResult; }