org.kmworks.util.cp.CodepointSetUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.kmworks.util.cp.CodepointSetUtil.java

Source

/*
 * Copyright (C) 2005-2016 Christian P. Lerch <christian.p.lerch[at]gmail.com>
 *  
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.kmworks.util.cp;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.PeekingIterator;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.List;

/**
 *
 * @author Christian P. Lerch <christian.p.lerch@gmail.com>
 */
public class CodepointSetUtil {

    public static List<Integer> codepointsFrom(CharSequence s) {
        ImmutableList.Builder<Integer> builder = new ImmutableList.Builder<>();
        int i = 0;
        while (i < s.length()) {
            builder.add((int) s.charAt(i));
            i += Character.charCount(i);
        }
        return builder.build();
    }

    /**
     * Internalize an AbstractCodepointSet from an external textual representation.
     * The external textual representation should comply th the following garmmar:
     * <pre>
     * start = ws line+ EOF
     * line = codepoint ( '-' codepoint)? ws
     * codepoint = decNumber |  hexNumber
     * decNumber = decDigit+
     * decDigit = [0-9]
     * hexNumber = '0x' hexDigit+
     * hexDigit = [a-fA-F0-9]
     * ws = ( wschar+ | comment )*
     * comment = '#' (!nl _)* nl
     * wschar = [ \t\r\n]
     * nl = '\r'? '\n'
     * </pre>
     * @param r a Reader for reading text input
     * @param radix
     * @return
     * @throws IOException 
     */
    public static CodepointSet fromText(Reader r, int radix) throws IOException {
        final ImmutableSortedSet.Builder<Integer> builder = new ImmutableSortedSet.Builder<>(
                CodepointSet.INTEGER_COMPARATOR);
        final BufferedReader br = new BufferedReader(r);
        String line;
        while ((line = br.readLine()) != null) {
            line = line.trim();
            if (line.length() > 0 && !line.startsWith("#")) { // skip empty & comment lines
                if (line.contains("-")) { // line contains a codepoint range
                    final String[] range = line.split("\\s*-\\s*");
                    final int lo = Integer.parseInt(range[0], radix);
                    final int hi = Integer.parseInt(stripTrailingComment(range[1]), radix);
                    for (int i = lo; i <= hi; i++) {
                        builder.add(i);
                    }
                } else { // line contains a single codepoint
                    builder.add(Integer.parseInt(stripTrailingComment(line), radix));
                }
            }
        }
        return CodepointBitSet.of(builder.build());
    }

    public static void toText(CodepointSet set, Writer w, int radix) throws IOException {
        final PeekingIterator<Integer> iter = set.iterator();

        while (iter.hasNext()) {
            int curr = iter.next();
            int ende = curr;
            while (iter.hasNext() && iter.peek().equals(ende + 1)) {
                ende = iter.next();
            }
            if (ende == curr) {
                writeCodepoint(w, curr, radix);
                writeln(w);
            } else {
                writeCodepoint(w, curr, radix);
                w.write('-');
                writeCodepoint(w, ende, radix);
                writeln(w);
            }
        }
    }

    private static void writeCodepoint(Writer w, int cp, int radix) throws IOException {
        w.write(Integer.toString(cp, radix));
    }

    private static void writeln(Writer w) throws IOException {
        w.write("\n");
    }

    public static void externalize(CodepointBitSet set, OutputStream out) throws IOException {
        ObjectOutputStream oos = new ObjectOutputStream(out);
        oos.writeObject(set);
        oos.flush();
    }

    public static CodepointSet internalize(InputStream is) throws IOException, ClassNotFoundException {
        ObjectInputStream ois = new ObjectInputStream(is);
        return (CodepointBitSet) ois.readObject();
    }

    /*
        Private helpers
    */
    private static String stripTrailingComment(String line) {
        if (line.contains("#")) { // strip trailing comments
            line = line.substring(0, line.indexOf('#'));
        }
        return line.trim();
    }

}