Example usage for opennlp.tools.util Span Span

List of usage examples for opennlp.tools.util Span Span

Introduction

In this page you can find the example usage for opennlp.tools.util Span Span.

Prototype

public Span(Span span, double prob) 

Source Link

Document

Creates a new immutable span based on an existing span, where the existing span did not include the prob

Usage

From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java

private void parse(String[] tokens) {
    Span streetSpan = createStreetSpan(originalSpan.getStart(), originalSpan.getEnd(), tokens);
    street = buildString(streetSpan, tokens);
    Span streetNumberSpan = new Span(streetSpan.getEnd(), streetSpan.getEnd() + 1);
    streetNumber = buildString(streetNumberSpan, tokens);
    Span zipSpan = new Span(streetNumberSpan.getEnd(), streetNumberSpan.getEnd() + 1);
    zip = buildString(zipSpan, tokens);// w w  w  . ja  v a  2 s  .c  o  m
    zip = zip.replaceAll("[+.^:,]", "");
    if (StringUtils.isBlank(zip)) {
        // token include only special chars like , or .
        //try next zip token
        // use case Lindenstr. 19 , 12207 Berlin
        zipSpan = new Span(zipSpan.getStart() + 1, zipSpan.getEnd() + 1);
        zip = buildString(zipSpan, tokens);
    }

    CSVAddressData csvAddressData = findAddressDataByZip(zip);
    if (csvAddressData != null) {
        city = csvAddressData.getCity();
        country = "Deutschland";
    } else {
        String cityAndMaybeCountry = buildString(zipSpan.getEnd(), originalSpan.getEnd(), tokens);
        country = tryToFindCountry(cityAndMaybeCountry);
        if (country == null) {
            // no country found, means rest string is a city string
            city = cityAndMaybeCountry;
        } else {
            city = cityAndMaybeCountry.replace(country, "").trim();
        }
    }
}

From source file:com.civis.utils.opennlp.models.address.AddressSpanBuilder.java

private Span createStreetSpan(int start, int end, String[] tokens) {
    for (int i = start; i < end; i++) {
        if (StreetNumberFeature.STREET_NUMBER_PATTERN.matcher(tokens[i]).matches()) {
            return new Span(start, i);
        }// www  .j  av a2 s. co  m
    }

    return new Span(start, end);
}

From source file:com.civprod.writerstoolbox.testarea.UnsupervisedDiscourseSegmentation.java

public static List<List<String>> segment(Document<?> inDocument, SentenceDetector inSentenceDetector,
        StringTokenizer inStringTokenizer) {
    List<String> concatenateTokens = concatenateTokens(inDocument, inSentenceDetector, inStringTokenizer);
    List<String> stemmAndFilterList = TokenUtil.stemmAndFilterList(concatenateTokens);
    List<List<String>> splitIntoFixLengthLists = splitIntoFixLengthLists(stemmAndFilterList, 20);
    List<Counter<String>> counters = splitIntoFixLengthLists.parallelStream()
            .map((List<String> curSentence) -> CounterUtils.count(curSentence)).collect(Collectors.toList());
    List<Double> cosineSimilarity = new ArrayList<>(counters.size() - 20);
    for (int i = 0; i < (counters.size() - 20); i++) {
        cosineSimilarity.add(cosineSimilarityStemmedAndFiltered(Counter.join(counters.subList(i, i + 10)),
                Counter.join(counters.subList(i + 11, i + 20))));
    }/*from ww  w .  j av a2  s.  c om*/
    List<Double> valleys = new ArrayList<>(cosineSimilarity.size() - 2);
    for (int i = 0; i < valleys.size(); i++) {
        double ya1 = cosineSimilarity.get(i);
        double ya2 = cosineSimilarity.get(i + 1);
        double ya3 = cosineSimilarity.get(i + 2);
        valleys.add((ya1 - ya2) + (ya3 - ya2));
    }
    SummaryStatistics valleyStatistics = valleys.parallelStream().collect(SummaryStatisticCollector.instance);
    double cutoffThreshold = valleyStatistics.getMean() - valleyStatistics.getStandardDeviation();
    int lastLocation = 0;
    List<Span> spans = new ArrayList<>(1);
    for (int i = 0; i < valleys.size(); i++) {
        double curValley = valleys.get(i);
        if (curValley < cutoffThreshold) {
            int curLocation = (i + 11) * 20;
            spans.add(new Span(lastLocation, curLocation));
            lastLocation = curLocation;
        }
    }
    spans.add(new Span(lastLocation, concatenateTokens.size()));
    return spans.parallelStream()
            .map((Span curSpan) -> concatenateTokens.subList(curSpan.getStart(), curSpan.getEnd()))
            .collect(Collectors.toList());
}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpParser.java

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    CAS cas = aJCas.getCas();/*from w w w .j a  va2 s  .  c  o  m*/

    modelProvider.configure(cas);
    mappingProvider.configure(cas);

    for (Sentence sentence : select(aJCas, Sentence.class)) {
        List<Token> tokens = selectCovered(aJCas, Token.class, sentence);

        Parse parseInput = new Parse(cas.getDocumentText(), new Span(sentence.getBegin(), sentence.getEnd()),
                AbstractBottomUpParser.INC_NODE, 0, 0);
        int i = 0;
        for (Token t : tokens) {
            parseInput.insert(new Parse(cas.getDocumentText(), new Span(t.getBegin(), t.getEnd()),
                    AbstractBottomUpParser.TOK_NODE, 0, i));
            i++;
        }

        Parse parseOutput = modelProvider.getResource().parse(parseInput);

        createConstituentAnnotationFromTree(aJCas, parseOutput, null, tokens);

        if (createPennTreeString) {
            StringBuffer sb = new StringBuffer();
            parseOutput.setType("ROOT"); // in DKPro the root is ROOT, not TOP
            parseOutput.show(sb);

            PennTree pTree = new PennTree(aJCas, sentence.getBegin(), sentence.getEnd());
            pTree.setPennTree(sb.toString());
            pTree.addToIndexes();
        }
    }
}

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#getStart()}.
     */
    public void testGetStart() {
        Assert.assertEquals(5, new Span(5, 6).getStart());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#getEnd()}.
     */
    public void testGetEnd() {
        Assert.assertEquals(6, new Span(5, 6).getEnd());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#length()}.
     */
    public void testLength() {
        Assert.assertEquals(11, new Span(10, 21).length());
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///from w  w w . ja  v  a  2 s.  c om
    public void testContains() {
        Span a = new Span(500, 900);
        Span b = new Span(520, 600);

        Assert.assertEquals(true, a.contains(b));
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///www  .jav  a2 s .  c  o  m
    public void testContainsWithEqual() {
        Span a = new Span(500, 900);

        Assert.assertEquals(true, a.contains(a));
    }

From source file:opennlp.tools.util.Span.java

/**
     * Test for {@link Span#contains(Span)}.
     *///from ww w. j a  v  a2s  . c  om
    public void testContainsWithLowerIntersect() {
        Span a = new Span(500, 900);
        Span b = new Span(450, 1000);

        Assert.assertEquals(false, a.contains(b));
    }