Example usage for org.apache.poi.hdgf.extractor VisioTextExtractor getSummaryInformation

List of usage examples for org.apache.poi.hdgf.extractor VisioTextExtractor getSummaryInformation

Introduction

In this page you can find the example usage for org.apache.poi.hdgf.extractor VisioTextExtractor getSummaryInformation.

Prototype

public SummaryInformation getSummaryInformation() 

Source Link

Document

Returns the summary information metadata for the document.

Usage

From source file:com.jaeksoft.searchlib.parser.VisioParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    VisioTextExtractor extractor = null;
    try {//from   w  w w.ja  va 2s .  co  m
        extractor = new VisioTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();

        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        for (String text : texts)
            result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(text, " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.opensearchserver.extractor.parser.Visio.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {
    VisioTextExtractor extractor = null;
    try {//from  w  ww .  j  a  va 2s  .  c  o  m
        extractor = new VisioTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        ParserDocument result = getNewParserDocument();
        for (String text : texts)
            result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}

From source file:com.opensearchserver.textextractor.parser.Visio.java

License:Open Source License

@Override
protected void parseContent(InputStream inputStream) throws Exception {
    VisioTextExtractor extractor = null;
    try {/* ww  w  . j a  va 2  s. c  o m*/
        extractor = new VisioTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String[] texts = extractor.getAllText();
        if (texts == null)
            return;
        ParserDocument result = getNewParserDocument();
        for (String text : texts)
            result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}

From source file:net.yacy.document.parser.vsdParser.java

License:Open Source License

@Override
public Document[] parse(final AnchorURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, final InputStream source)
        throws Parser.Failure, InterruptedException {

    Document theDoc = null;//w  w w. ja  va  2s .com

    try {
        String contents = "";
        SummaryInformation summary = null;
        try {
            final VisioTextExtractor extractor = new VisioTextExtractor(source);
            contents = extractor.getText();
            summary = extractor.getSummaryInformation();
        } catch (final Exception e) {
            ConcurrentLog.warn("vsdParser", e.getMessage());
        }

        String author = null;
        String[] keywords = null;
        String title = null;
        if (summary != null) {
            author = summary.getAuthor();
            if (summary.getKeywords() != null) {
                keywords = summary.getKeywords().split("[ ,;]");
            }
            title = summary.getTitle();
        }

        List<String> abstrct = new ArrayList<String>();
        if (contents.length() > 0)
            abstrct.add(((contents.length() > 80) ? contents.substring(0, 80) : contents.trim())
                    .replaceAll("\r\n", " ").replaceAll("\n", " ").replaceAll("\r", " ").replaceAll("\t", " "));

        if (title == null)
            title = location.toNormalform(true);

        // As the result of parsing this function must return a plasmaParserDocument object
        return new Document[] { new Document(location, // url of the source document
                mimeType, // the documents mime type
                "UTF-8", // charset of the document text
                this, null, // language
                keywords, singleList(title), author, "", null, // an array of section headlines
                abstrct, // an abstract
                0.0f, 0.0f, contents, // the parsed document text
                null, // a map of extracted anchors
                null, null, // a treeset of image URLs
                false, new Date()) };
    } catch (final Exception e) {
        if (e instanceof InterruptedException)
            throw (InterruptedException) e;

        // if an unexpected error occures just log the error and raise a new ParserException
        final String errorMsg = "Unable to parse the vsd document '" + location + "':" + e.getMessage();
        AbstractParser.log.severe(errorMsg);
        throw new Parser.Failure(errorMsg, location);
    } finally {
        if (theDoc == null) {
            // if an unexpected error occures just log the error and raise a new Parser.Failure
            final String errorMsg = "Unable to parse the vsd document '" + location
                    + "': possibly out of memory";
            AbstractParser.log.severe(errorMsg);
            throw new Parser.Failure(errorMsg, location);
        }
    }
}