Example usage for edu.stanford.nlp.util ArrayCoreMap get

List of usage examples for edu.stanford.nlp.util ArrayCoreMap get

Introduction

In this page you can find the example usage for edu.stanford.nlp.util ArrayCoreMap get.

Prototype

@Override
@SuppressWarnings("unchecked")
public <VALUE> VALUE get(Class<? extends Key<VALUE>> key) 

Source Link

Usage

From source file:eu.fbk.dkm.sectionextractor.pantheon.WikipediaGoodTextExtractor.java

License:Apache License

@Override
public void contentPage(String text, String title, int wikiID) {

    if (pagesToConsider != null && !pagesToConsider.contains(title)) {
        return;/* w ww.  j av  a 2s . co  m*/
    }
    if (idCategory != null && !idCategory.keySet().contains(wikiID)) {
        return;
    }

    try {

        String okTitle = title.trim().replace('_', ' ');

        String folderNumberName = Integer.toString(wikiID / MAX_FILES_PER_FOLDER);
        String fileName = Integer.toString(wikiID);

        String folderName = baseFolder.getAbsolutePath() + File.separator + folderNumberName;
        if (idCategory.get(wikiID) != null) {
            folderName = baseFolder.getAbsolutePath() + File.separator + idCategory.get(wikiID) + File.separator
                    + folderNumberName;
        }
        File folder = new File(folderName);
        if (!folder.exists()) {
            folder.mkdirs();
        }

        if (NAFformat) {
            fileName += ".naf";
        } else {
            fileName += ".txt";
        }
        String defFileName = folder.getAbsolutePath() + File.separator + fileName;

        File file = new File(defFileName);

        StringBuffer buffer = new StringBuffer();

        WikipediaText wikipediaText = new WikipediaText();
        buffer.append(wikipediaText.parse(text, null));

        String rawText = buffer.toString();
        buffer = new StringBuffer();
        buffer.append(okTitle).append("\n").append("\n");
        List<String> strings = Splitter.on('\n').trimResults()./*omitEmptyStrings().*/splitToList(rawText);
        for (String line : strings) {
            if (line.startsWith(categoryPrefix)) {
                continue;
            }
            if (line.startsWith("new:")) {
                continue;
            }
            buffer.append(line).append("\n");
        }

        rawText = buffer.toString();

        if (useStanford) {
            Annotation myDoc = new Annotation(rawText);
            pipeline.annotate(myDoc);

            StringBuffer tokenizedString = new StringBuffer();

            List<CoreMap> sents = myDoc.get(CoreAnnotations.SentencesAnnotation.class);
            for (CoreMap thisSent : sents) {
                ArrayCoreMap sentenceCoreMap = (ArrayCoreMap) thisSent;
                List<CoreLabel> tokens = sentenceCoreMap.get(CoreAnnotations.TokensAnnotation.class);
                for (CoreLabel token : tokens) {
                    tokenizedString.append(codeToParenthesis(token.toString())).append("\n");
                }
                tokenizedString.append(eosTag).append("\n");
            }

            rawText = tokenizedString.toString();
        }

        if (NAFformat) {

            final KAFDocument document = new KAFDocument("en", "v3");

            document.setRawText(rawText);

            document.createPublic();
            document.getPublic().uri = String.format("https://%s.wikipedia.org/wiki/%s",
                    getLocale().getLanguage(), title);

            document.createFileDesc();
            document.getFileDesc().author = "MediaWiki";
            document.getFileDesc().filename = fileName;
            document.getFileDesc().filetype = "Wikipedia article";
            document.getFileDesc().title = okTitle;

            document.save(file.getAbsolutePath());
        } else {
            try {
                logger.debug("Writing file " + file.getAbsolutePath());
                BufferedWriter writer = new BufferedWriter(new FileWriter(file));
                writer.write(rawText);
                writer.close();
            } catch (Exception e) {
                logger.error(e.getMessage());
            }
        }

    } catch (Exception e) {
        logger.error("Error processing page " + title + " (" + wikiID + ")");
    }
}