CompiledSymbolTable.java :  » Natural-Language-Processing » LingPipe » com » aliasi » symbol » Java Open Source

Java Open Source » Natural Language Processing » LingPipe 
LingPipe » com » aliasi » symbol » CompiledSymbolTable.java
/*
 * LingPipe v. 3.9
 * Copyright (C) 2003-2010 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.symbol;

import com.aliasi.util.AbstractExternalizable;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;

import java.util.Arrays;

/**
 * A symbol table that is initialized with an array of symbols, either
 * directory, or through a data input stream.
 * Methods provided to write a table to a data output stream and
 * to reada  table from a data input stream.

 * <p>The binary in-file format of a symbol table is as follows.
 * <br/><br/>
 * <table cellpadding="5" border="1">
 * <tr><td width="20%"><b>Number</b></td>
 *     <td width="20%"><b>Variable</b></td>
 *     <td width="20%"><b>Type</b></td>
 *     <td width="60%"><b>Description</b></td></tr>
 * <tr><td><code>1</code></td>
 *     <td><code>numSymbols</code></td>
 *     <td><code>int</code></td>
 *     <td>Number of symbols</td></tr>
 * <tr><td rowspan=2><code>numSymbols</code></td>
 *     <td><code>numChars</code></td>
 *     <td><code>short</code></td>
 *     <td>Number of characters in next symbol</td></tr>
 * <tr><td><code>numChars * short</code></td>
 *     <td><code>char</code></td>
 *     <td>Characters for next symbol, encoded separately.</td></tr>
 * </table>
 * <br/>
 * As dictated by the use of <code>short</code> values to
 * encode lengths, the longest symbol allowable in a binary
 * symbol table will be {@link java.lang.Short#MAX_VALUE}.
 * </p>
 *
 * @author  Bob Carpenter
 * @version 3.8.1
 * @since   LingPipe1.0
 */
class CompiledSymbolTable implements SymbolTable, Serializable {

    static final long serialVersionUID = -8025428413920807070L;

    /**
     * Sorted array of symbols in the symbol table.
     */
    private final String[] mSymbols;

    /**
     * Construct a compiled symbol table from the specified symbols.
     * Makes a local copy of the symbol array for safety.  The symbols
     * are sorted in order to produce identifiers.
     *
     * @param symbols Array of symbols.
     */
    public CompiledSymbolTable(String[] symbols) {
        mSymbols = new String[symbols.length];
        System.arraycopy(symbols,0,mSymbols,0,symbols.length);
        Arrays.sort(mSymbols);
    }

    private CompiledSymbolTable(String[] symbols, boolean ignore) {
        mSymbols = symbols;
    }

    Object writeReplace() {
        return new Serializer(this);
    }

    /**
     * Returns a string representation of the symbols in
     * order, beginning with symbol 0.
     *
     * @return String representation of this symbol table.
     */
    @Override
    public String toString() {
        return java.util.Arrays.asList(mSymbols).toString();
    }

    /**
     * Write the symbol table to a data output stream.  This method is
     * designed to write a symbol table as part of an output stream,
     * so the stream is not closed after the symbol table is written.
     * The bytes may be read in through a data input stream using the
     * static {@link #read(DataInputStream)}.  The format is described
     * in {@link #read(DataInputStream)}.
     *
     * @param out Data output stream to which the symbol table is written.
     * @throws IOException If there is an exception writing to the
     * underlying stream.
     */
    public void write(DataOutputStream out) throws IOException {
        out.writeInt(numSymbols());
        for (int i = 0; i < numSymbols(); ++i) {
            String symbol = idToSymbol(i);
            out.writeShort(symbol.length());
            out.writeChars(symbol);
        }
    }

    void writeObj(ObjectOutput out) throws IOException {
        out.writeInt(numSymbols());
        for (int i = 0; i < numSymbols(); ++i) {
            String symbol = idToSymbol(i);
            out.writeShort(symbol.length());
            out.writeChars(symbol);
        }
    }

    /**
     * Return the identifier corresponding to the specified symbol,
     * or <code>-1</code> if the symbol does not exist.
     *
     * @param symbol Symbol whose identifier is returned.
     * @return Identifier corresponding to specified symbol or
     * <code>-1</code> if the symbol does not exist.
     */
    public int symbolToID(String symbol) {
        int result = Arrays.binarySearch(mSymbols,symbol);
        return result < 0 ? -1 : result;
    }

    /**
     * Return the symbol corresponding to the specified identifier.
     * Symbols exist for identifiers between <code>0</code> and the
     * number of symbols in the table minus one, inclusive.  Raises an
     * index out of bounds exception for identifiers out of range.
     *
     * @param id Identifier whose symbol is returned.
     * @return Symbol corresponding to the specified identifier.
     * @throws IndexOutOfBoundsException If there is no symbol for the
     * specified identifier.
     */
    public String idToSymbol(int id) {
        return mSymbols[id];
    }

    /**
     * Returns the number of symbols in this symbol table.
     *
     * @return Number of symbols in this table.
     */
    public int numSymbols() {
        return mSymbols.length;
    }

    /**
     * Throws an unsupported operation exception.
     *
     * @throws UnsupportedOperationException Always.
     */
    public void clear() {
        throw new UnsupportedOperationException();
    }

    /**
     * Throws an unsupported operation exception.
     *
     * @param symbol Symbol is ignored.
     * @return Always throws an exception before returning a value.
     * @throws UnsupportedOperationException Always.
     */
    public int getOrAddSymbol(String symbol) {
        throw new UnsupportedOperationException();
    }

    /**
     * Throws an unsupported operation exception.
     *
     * @param symbol Symbol is ignored.
     * @return Always throws an exception before returning a value.
     * @throws UnsupportedOperationException Always.
     */
    public int removeSymbol(String symbol) {
        throw new UnsupportedOperationException();
    }

    static class Serializer extends AbstractExternalizable {
        static final long serialVersionUID = 2115083345444042460L;
        private final CompiledSymbolTable mSymbolTable;
        public Serializer(CompiledSymbolTable symbolTable) {
            mSymbolTable = symbolTable;
        }
        public Serializer() {
            this(null);
        }
        public void writeExternal(ObjectOutput out)
            throws IOException {
            out.writeInt(mSymbolTable.mSymbols.length);
            for (String symbol : mSymbolTable.mSymbols)
                out.writeUTF(symbol);
        }
        public Object read(ObjectInput in)
            throws ClassNotFoundException, IOException {
            int numSymbols = in.readInt();
            String[] symbols = new String[numSymbols];
            for (int i = 0; i < numSymbols; ++i)
                symbols[i] = in.readUTF();
            return new CompiledSymbolTable(symbols,IGNORE);
        }
        static final boolean IGNORE = true;
    }



}

java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.