Java URL Load readStopwordsURL(URL url, boolean lowercase)

Here you can find the source of readStopwordsURL(URL url, boolean lowercase)

Description

read Stopwords URL

License

Apache License

Declaration

public static Set<String> readStopwordsURL(URL url, boolean lowercase) throws IOException 

Method Source Code


//package com.java2s;
/*/*from  ww w. j ava 2 s . c o m*/
 * Copyright 2016
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universit?t Darmstadt
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.*;
import java.net.URL;

import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class Main {
    public static Set<String> readStopwordsURL(URL url, boolean lowercase) throws IOException {
        return readStopwordsInputStream(url.openStream(), lowercase);
    }

    /**
     * Read an {@link InputStream} containing stopwords (one per line).
     * <p>
     * Empty lines and lines starting with ("#") are filtered out.
     *
     * @param inputStream input stream
     * @param lowercase   if true, lowercase everything
     * @return a collection of unique stopwords
     */
    public static Set<String> readStopwordsInputStream(InputStream inputStream, boolean lowercase) {
        return readStream(new BufferedReader(new InputStreamReader(inputStream)).lines(), lowercase);
    }

    private static Set<String> readStream(Stream<String> s, boolean lowercase) {
        return s.map(String::trim).filter(l -> !l.isEmpty()).filter(l -> !l.startsWith("#"))
                .map(l -> lowercase ? l.toLowerCase() : l).collect(Collectors.toSet());
    }
}

Related

  1. readPropertyFile(URL url)
  2. readResource(final URL filename)
  3. readResource(URL resource)
  4. readServicesFromUrl(Collection list, URL url)
  5. readSqlStatements(URL url)
  6. readStringFromUrl(URL url)
  7. readStringFromURL(URL url)
  8. readText(final URL url)
  9. readText(final URL url)