Example usage for org.apache.commons.lang StringUtils normalizeSpace

List of usage examples for org.apache.commons.lang StringUtils normalizeSpace

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils normalizeSpace.

Prototype

public static String normalizeSpace(String str) 

Source Link

Document

<p> Similar to <a href="http://www.w3.org/TR/xpath/#function-normalize-space"> http://www.w3.org/TR/xpath/#function-normalize -space</a> </p> <p> The function returns the argument string with whitespace normalized by using <code> #trim(String) </code> to remove leading and trailing whitespace and then replacing sequences of whitespace characters by a single space.

Usage

From source file:de.tudarmstadt.ukp.csniper.resbuild.EvaluationItemFixer2.java

public static void main(String[] args) {
    connect(HOST, DATABASE, USER, PASSWORD);

    Map<Integer, String> items = new HashMap<Integer, String>();
    Map<Integer, String> failed = new HashMap<Integer, String>();

    // fetch coveredTexts of dubious items and clean it
    PreparedStatement select = null;
    PreparedStatement update = null;
    try {//w w w. ja  v  a2 s . c  o  m
        StringBuilder selectQuery = new StringBuilder();
        selectQuery.append("SELECT * FROM cachedparse WHERE pennTree = 'ERROR' OR pennTree = ''");

        select = connection.prepareStatement(selectQuery.toString());
        log.info("Running query [" + selectQuery.toString() + "].");
        ResultSet rs = select.executeQuery();

        //         CSVWriter writer;
        String text;
        JCas jcas = JCasFactory.createJCas();
        String updateQuery = "UPDATE CachedParse SET pennTree = ? WHERE collectionId = ? AND documentId = ? AND beginOffset = ? AND endOffset = ?";
        update = connection.prepareStatement(updateQuery);
        //         File base = new File("");

        AnalysisEngine sentences = createEngine(DummySentenceSplitter.class);
        AnalysisEngine tokenizer = createEngine(StanfordSegmenter.class,
                StanfordSegmenter.PARAM_CREATE_SENTENCES, false, StanfordSegmenter.PARAM_CREATE_TOKENS, true);
        AnalysisEngine parser = createEngine(StanfordParser.class, StanfordParser.PARAM_WRITE_CONSTITUENT, true,
                //               StanfordParser.PARAM_CREATE_DEPENDENCY_TAGS, true,
                StanfordParser.PARAM_WRITE_PENN_TREE, true, StanfordParser.PARAM_LANGUAGE, "en",
                StanfordParser.PARAM_VARIANT, "factored");

        while (rs.next()) {
            String collectionId = rs.getString("collectionId");
            String documentId = rs.getString("documentId");
            int beginOffset = rs.getInt("beginOffset");
            int endOffset = rs.getInt("endOffset");
            text = retrieveCoveredText(collectionId, documentId, beginOffset, endOffset);

            jcas.setDocumentText(text);
            jcas.setDocumentLanguage("en");
            sentences.process(jcas);
            tokenizer.process(jcas);
            parser.process(jcas);

            //            writer = new CSVWriter(new FileWriter(new File(base, documentId + ".csv"));

            System.out.println("Updating " + text);
            for (PennTree p : JCasUtil.select(jcas, PennTree.class)) {
                String tree = StringUtils.normalizeSpace(p.getPennTree());
                update.setString(1, tree);
                update.setString(2, collectionId);
                update.setString(3, documentId);
                update.setInt(4, beginOffset);
                update.setInt(5, endOffset);
                update.executeUpdate();
                System.out.println("with tree " + tree);
                break;
            }
            jcas.reset();
        }
    } catch (SQLException e) {
        log.error("Exception while selecting: " + e.getMessage());
    } catch (UIMAException e) {
        e.printStackTrace();
    } finally {
        closeQuietly(select);
        closeQuietly(update);
    }

    // write logs
    //      BufferedWriter bwf = null;
    //      BufferedWriter bws = null;
    //      try {
    //         bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(
    //               LOG_FAILED)), "UTF-8"));
    //         for (Entry<Integer, String> e : failed.entrySet()) {
    //            bwf.write(e.getKey() + " - " + e.getValue() + "\n");
    //         }
    //
    //         bws = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(
    //               LOG_SUCCESSFUL)), "UTF-8"));
    //         for (Entry<Integer, String> e : items.entrySet()) {
    //            bws.write(e.getKey() + " - " + e.getValue() + "\n");
    //         }
    //      }
    //      catch (IOException e) {
    //         log.error("Got an IOException while writing the log files.");
    //      }
    //      finally {
    //         IOUtils.closeQuietly(bwf);
    //         IOUtils.closeQuietly(bws);
    //      }

    log.info("Texts for [" + items.size() + "] items need to be cleaned up.");

    // update the dubious items with the cleaned coveredText
    //      PreparedStatement update = null;
    //      try {
    //         String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?";
    //
    //         update = connection.prepareStatement(updateQuery);
    //         int i = 0;
    //         for (Entry<Integer, String> e : items.entrySet()) {
    //            int id = e.getKey();
    //            String coveredText = e.getValue();
    //
    //            // update item in database
    //            update.setString(1, coveredText);
    //            update.setInt(2, id);
    //            update.executeUpdate();
    //            log.debug("Updating " + id + " with [" + coveredText + "]");
    //
    //            // show percentage of updated items
    //            i++;
    //            int part = (int) Math.ceil((double) items.size() / 100);
    //            if (i % part == 0) {
    //               log.info(i / part + "% finished (" + i + "/" + items.size() + ").");
    //            }
    //         }
    //      }
    //      catch (SQLException e) {
    //         log.error("Exception while updating: " + e.getMessage());
    //      }
    //      finally {
    //         closeQuietly(update);
    //      }

    closeQuietly(connection);
}

From source file:com.haulmont.cuba.web.gui.components.WebLinkButton.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(BaseTheme.BUTTON_LINK, ""));
}

From source file:com.haulmont.cuba.web.gui.components.WebFlowBoxLayout.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(FLOWLAYOUT_STYLENAME, ""));
}

From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebFoldersPane.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(C_FOLDERS_PANE, ""));
}

From source file:com.haulmont.cuba.web.gui.components.WebButtonsPanel.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(BUTTONS_PANNEL_STYLENAME, ""));
}

From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebLogoutButton.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(LOGOUT_BUTTON_STYLENAME, ""));
}

From source file:de.tudarmstadt.ukp.csniper.ml.NeAugmentationAnnotator.java

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    jcas = aJCas;/*from  www.  ja  v  a 2  s . com*/
    for (PennTree pt : JCasUtil.select(aJCas, PennTree.class)) {
        String tree = StringUtils.normalizeSpace(pt.getPennTree());
        PennTreeNode ptn = PennTreeUtils.parsePennTree(tree);
        if (ptn != null) {
            int augmented = augment(ptn, JCasUtil.selectCovered(NamedEntity.class, pt));
            if (augmented > 0) {
                getLogger().info("ORIGINAL PENNTREE:  [" + tree + "]");
                tree = PennTreeUtils.toPennTree(ptn);
                pt.setPennTree(tree);
                getLogger().info("AUGMENTED PENNTREE: [" + tree + "]");
            }
        }
    }
}

From source file:de.tudarmstadt.ukp.csniper.ml.TKSVMlightFeatureExtractor.java

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    // create a new instance for each PennTree
    Collection<PennTree> trees = JCasUtil.select(aJCas, PennTree.class);
    for (PennTree t : trees) {
        Instance<Boolean> instance = new Instance<Boolean>();
        instance.add(new Feature("TK_tree", StringUtils.normalizeSpace(t.getPennTree())));

        BooleanClassification bc = JCasUtil.selectSingle(aJCas, BooleanClassification.class);
        instance.setOutcome(bc.getExpectedLabel());

        train(instance);// w ww . j a  v a2  s  . c  o  m
    }
    if (trees.size() == 0) {
        getLogger().warn("No PennTree found: " + aJCas.getDocumentText());
    }
    if (trees.size() > 1) {
        getLogger().warn("Too many [" + trees.size() + "] PennTrees found: " + aJCas.getDocumentText());
    }
}

From source file:com.haulmont.cuba.web.gui.components.mainwindow.WebTimeZoneIndicator.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(USER_TIMEZONE_LABEL_STYLENAME, ""));
}

From source file:com.haulmont.cuba.web.gui.components.WebListEditor.java

@Override
public String getStyleName() {
    return StringUtils.normalizeSpace(super.getStyleName().replace(LISTEDITOR_STYLENAME, ""));
}