edu.stanford.nlp.parser.lexparser.Options.java Source code

Introduction

Here is the source code for edu.stanford.nlp.parser.lexparser.Options.java
Source

package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.trees.CompositeTreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TreeTransformer;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.StringUtils;

import java.io.*;
import java.util.*;

/**
 * This class contains options to the parser which MUST be the SAME at
 * both training and testing (parsing) time in order for the parser to
 * work properly.  It also contains an object which stores the options
 * used by the parser at training time and an object which contains
 * default options for test use.
 *
 * @author Dan Klein
 * @author Christopher Manning
 * @author John Bauer
 */
public class Options implements Serializable {

    /** A logger for this class */
    private static Redwood.RedwoodChannels log = Redwood.channels(Options.class);

    public Options() {
        this(new EnglishTreebankParserParams());
    }

    public Options(TreebankLangParserParams tlpParams) {
        this.tlpParams = tlpParams;
    }

    /**
     * Set options based on a String array in the style of
     * commandline flags. This method goes through the array until it ends,
     * processing options, as for {@link #setOption}.
     *
     * @param flags Array of options (or as a varargs list of arguments).
     *      The options passed in should
     *      be specified like command-line arguments, including with an initial
     *      minus sign  for example,
     *          {"-outputFormat", "typedDependencies", "-maxLength", "70"}
     * @throws IllegalArgumentException If an unknown flag is passed in
     */
    public void setOptions(String... flags) {
        setOptions(flags, 0, flags.length);
    }

    /**
     * Set options based on a String array in the style of
     * commandline flags. This method goes through the array until it ends,
     * processing options, as for {@link #setOption}.
     *
     * @param flags Array of options.  The options passed in should
     *      be specified like command-line arguments, including with an initial
     *      minus sign  for example,
     *          {"-outputFormat", "typedDependencies", "-maxLength", "70"}
     * @param startIndex The index in the array to begin processing options at
     * @param endIndexPlusOne A number one greater than the last array index at
     *      which options should be processed
     * @throws IllegalArgumentException If an unknown flag is passed in
     */
    public void setOptions(final String[] flags, final int startIndex, final int endIndexPlusOne) {
        for (int i = startIndex; i < endIndexPlusOne;) {
            i = setOption(flags, i);
        }
    }

    /**
     * Set options based on a String array in the style of
     * commandline flags. This method goes through the array until it ends,
     * processing options, as for {@link #setOption}.
     *
     * @param flags Array of options (or as a varargs list of arguments).
     *      The options passed in should
     *      be specified like command-line arguments, including with an initial
     *      minus sign  for example,
     *          {"-outputFormat", "typedDependencies", "-maxLength", "70"}
     * @throws IllegalArgumentException If an unknown flag is passed in
     */
    public void setOptionsOrWarn(String... flags) {
        setOptionsOrWarn(flags, 0, flags.length);
    }

    /**
     * Set options based on a String array in the style of
     * commandline flags. This method goes through the array until it ends,
     * processing options, as for {@link #setOption}.
     *
     * @param flags Array of options.  The options passed in should
     *      be specified like command-line arguments, including with an initial
     *      minus sign  for example,
     *          {"-outputFormat", "typedDependencies", "-maxLength", "70"}
     * @param startIndex The index in the array to begin processing options at
     * @param endIndexPlusOne A number one greater than the last array index at
     *      which options should be processed
     * @throws IllegalArgumentException If an unknown flag is passed in
     */
    public void setOptionsOrWarn(final String[] flags, final int startIndex, final int endIndexPlusOne) {
        for (int i = startIndex; i < endIndexPlusOne;) {
            i = setOptionOrWarn(flags, i);
        }
    }

    /**
     * Set an option based on a String array in the style of
     * commandline flags. The option may
     * be either one known by the Options object, or one recognized by the
     * TreebankLangParserParams which has already been set up inside the Options
     * object, and then the option is set in the language-particular
     * TreebankLangParserParams.
     * Note that despite this method being an instance method, many flags
     * are actually set as static class variables in the Train and Test
     * classes (this should be fixed some day).
     * Some options (there are many others; see the source code):
     * <ul>
     * <li> <code>-maxLength n</code> set the maximum length sentence to parse (inclusively)
     * <li> <code>-printTT</code> print the training trees in raw, annotated, and annotated+binarized form.  Useful for debugging and other miscellany.
     * <li> <code>-printAnnotated filename</code> use only in conjunction with -printTT.  Redirects printing of annotated training trees to <code>filename</code>.
     * <li> <code>-forceTags</code> when the parser is tested against a set of gold standard trees, use the tagged yield, instead of just the yield, as input.
     * </ul>
     *
     * @param flags An array of options arguments, command-line style.  E.g. {"-maxLength", "50"}.
     * @param i The index in flags to start at when processing an option
     * @return The index in flags of the position after the last element used in
     *      processing this option. If the current array position cannot be processed as a valid
     *      option, then a warning message is printed to stderr and the return value is <code>i+1</code>
     */
    public int setOptionOrWarn(String[] flags, int i) {
        int j = setOptionFlag(flags, i);
        if (j == i) {
            j = tlpParams.setOptionFlag(flags, i);
        }
        if (j == i) {
            log.info("WARNING! lexparser.Options: Unknown option ignored: " + flags[i]);
            j++;
        }
        return j;
    }

    /**
     * Set an option based on a String array in the style of
     * commandline flags. The option may
     * be either one known by the Options object, or one recognized by the
     * TreebankLangParserParams which has already been set up inside the Options
     * object, and then the option is set in the language-particular
     * TreebankLangParserParams.
     * Note that despite this method being an instance method, many flags
     * are actually set as static class variables in the Train and Test
     * classes (this should be fixed some day).
     * Some options (there are many others; see the source code):
     * <ul>
     * <li> <code>-maxLength n</code> set the maximum length sentence to parse (inclusively)
     * <li> <code>-printTT</code> print the training trees in raw, annotated, and annotated+binarized form.  Useful for debugging and other miscellany.
     * <li> <code>-printAnnotated filename</code> use only in conjunction with -printTT.  Redirects printing of annotated training trees to <code>filename</code>.
     * <li> <code>-forceTags</code> when the parser is tested against a set of gold standard trees, use the tagged yield, instead of just the yield, as input.
     * </ul>
     *
     * @param flags An array of options arguments, command-line style.  E.g. {"-maxLength", "50"}.
     * @param i The index in flags to start at when processing an option
     * @return The index in flags of the position after the last element used in
     *      processing this option.
     * @throws IllegalArgumentException If the current array position cannot be
     *      processed as a valid option
     */
    public int setOption(String[] flags, int i) {
        int j = setOptionFlag(flags, i);
        if (j == i) {
            j = tlpParams.setOptionFlag(flags, i);
        }
        if (j == i) {
            throw new IllegalArgumentException("Unknown option: " + flags[i]);
        }
        return j;
    }

    /**
     * Set an option in this object, based on a String array in the style of
     * commandline flags.  The option is only processed with respect to
     * options directly known by the Options object.
     * Some options (there are many others; see the source code):
     * <ul>
     * <li> <code>-maxLength n</code> set the maximum length sentence to parse (inclusively)
     * <li> <code>-printTT</code> print the training trees in raw, annotated, and annotated+binarized form.  Useful for debugging and other miscellany.
     * <li> <code>-printAnnotated filename</code> use only in conjunction with -printTT.  Redirects printing of annotated training trees to <code>filename</code>.
     * <li> <code>-forceTags</code> when the parser is tested against a set of gold standard trees, use the tagged yield, instead of just the yield, as input.
     * </ul>
     *
     * @param args An array of options arguments, command-line style.  E.g. {"-maxLength", "50"}.
     * @param i The index in args to start at when processing an option
     * @return The index in args of the position after the last element used in
     *      processing this option, or the value i unchanged if a valid option couldn't
     *      be processed starting at position i.
     */
    protected int setOptionFlag(String[] args, int i) {
        if (args[i].equalsIgnoreCase("-PCFG")) {
            doDep = false;
            doPCFG = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-dep")) {
            doDep = true;
            doPCFG = false;
            i++;
        } else if (args[i].equalsIgnoreCase("-factored")) {
            doDep = true;
            doPCFG = true;
            testOptions.useFastFactored = false;
            i++;
        } else if (args[i].equalsIgnoreCase("-fastFactored")) {
            doDep = true;
            doPCFG = true;
            testOptions.useFastFactored = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-noRecoveryTagging")) {
            testOptions.noRecoveryTagging = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-useLexiconToScoreDependencyPwGt")) {
            testOptions.useLexiconToScoreDependencyPwGt = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-useSmoothTagProjection")) {
            useSmoothTagProjection = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-useUnigramWordSmoothing")) {
            useUnigramWordSmoothing = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-useNonProjectiveDependencyParser")) {
            testOptions.useNonProjectiveDependencyParser = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-maxLength") && (i + 1 < args.length)) {
            testOptions.maxLength = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-MAX_ITEMS") && (i + 1 < args.length)) {
            testOptions.MAX_ITEMS = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-trainLength") && (i + 1 < args.length)) {
            // train on only short sentences
            trainOptions.trainLengthLimit = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-lengthNormalization")) {
            testOptions.lengthNormalization = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-iterativeCKY")) {
            testOptions.iterativeCKY = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-vMarkov") && (i + 1 < args.length)) {
            int order = Integer.parseInt(args[i + 1]);
            if (order <= 1) {
                trainOptions.PA = false;
                trainOptions.gPA = false;
            } else if (order == 2) {
                trainOptions.PA = true;
                trainOptions.gPA = false;
            } else if (order >= 3) {
                trainOptions.PA = true;
                trainOptions.gPA = true;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-vSelSplitCutOff") && (i + 1 < args.length)) {
            trainOptions.selectiveSplitCutOff = Double.parseDouble(args[i + 1]);
            trainOptions.selectiveSplit = trainOptions.selectiveSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-vSelPostSplitCutOff") && (i + 1 < args.length)) {
            trainOptions.selectivePostSplitCutOff = Double.parseDouble(args[i + 1]);
            trainOptions.selectivePostSplit = trainOptions.selectivePostSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-deleteSplitters") && (i + 1 < args.length)) {
            String[] toDel = args[i + 1].split(" *, *");
            trainOptions.deleteSplitters = Generics.newHashSet(Arrays.asList(toDel));
            i += 2;
        } else if (args[i].equalsIgnoreCase("-postSplitWithBaseCategory")) {
            trainOptions.postSplitWithBaseCategory = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-vPostMarkov") && (i + 1 < args.length)) {
            int order = Integer.parseInt(args[i + 1]);
            if (order <= 1) {
                trainOptions.postPA = false;
                trainOptions.postGPA = false;
            } else if (order == 2) {
                trainOptions.postPA = true;
                trainOptions.postGPA = false;
            } else if (order >= 3) {
                trainOptions.postPA = true;
                trainOptions.postGPA = true;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-hMarkov") && (i + 1 < args.length)) {
            int order = Integer.parseInt(args[i + 1]);
            if (order >= 0) {
                trainOptions.markovOrder = order;
                trainOptions.markovFactor = true;
            } else {
                trainOptions.markovFactor = false;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-distanceBins") && (i + 1 < args.length)) {
            int numBins = Integer.parseInt(args[i + 1]);
            if (numBins <= 1) {
                distance = false;
            } else if (numBins == 4) {
                distance = true;
                coarseDistance = true;
            } else if (numBins == 5) {
                distance = true;
                coarseDistance = false;
            } else {
                throw new IllegalArgumentException("Invalid value for -distanceBin: " + args[i + 1]);
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-noStop")) {
            genStop = false;
            i++;
        } else if (args[i].equalsIgnoreCase("-nonDirectional")) {
            directional = false;
            i++;
        } else if (args[i].equalsIgnoreCase("-depWeight") && (i + 1 < args.length)) {
            testOptions.depWeight = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printPCFGkBest") && (i + 1 < args.length)) {
            testOptions.printPCFGkBest = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-evalPCFGkBest") && (i + 1 < args.length)) {
            testOptions.evalPCFGkBest = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printFactoredKGood") && (i + 1 < args.length)) {
            testOptions.printFactoredKGood = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-smoothTagsThresh") && (i + 1 < args.length)) {
            lexOptions.smoothInUnknownsThreshold = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unseenSmooth") && (i + 1 < args.length)) {
            testOptions.unseenSmooth = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-fractionBeforeUnseenCounting") && (i + 1 < args.length)) {
            trainOptions.fractionBeforeUnseenCounting = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-hSelSplitThresh") && (i + 1 < args.length)) {
            trainOptions.HSEL_CUT = Integer.parseInt(args[i + 1]);
            trainOptions.hSelSplit = trainOptions.HSEL_CUT > 0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-nohSelSplit")) {
            trainOptions.hSelSplit = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-tagPA")) {
            trainOptions.tagPA = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noTagPA")) {
            trainOptions.tagPA = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-tagSelSplitCutOff") && (i + 1 < args.length)) {
            trainOptions.tagSelectiveSplitCutOff = Double.parseDouble(args[i + 1]);
            trainOptions.tagSelectiveSplit = trainOptions.tagSelectiveSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-tagSelPostSplitCutOff") && (i + 1 < args.length)) {
            trainOptions.tagSelectivePostSplitCutOff = Double.parseDouble(args[i + 1]);
            trainOptions.tagSelectivePostSplit = trainOptions.tagSelectivePostSplitCutOff > 0.0;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-noTagSplit")) {
            trainOptions.noTagSplit = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-uwm") && (i + 1 < args.length)) {
            lexOptions.useUnknownWordSignatures = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unknownSuffixSize") && (i + 1 < args.length)) {
            lexOptions.unknownSuffixSize = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unknownPrefixSize") && (i + 1 < args.length)) {
            lexOptions.unknownPrefixSize = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-uwModelTrainer") && (i + 1 < args.length)) {
            lexOptions.uwModelTrainer = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-openClassThreshold") && (i + 1 < args.length)) {
            trainOptions.openClassTypesThreshold = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unary") && i + 1 < args.length) {
            trainOptions.markUnary = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unaryTags")) {
            trainOptions.markUnaryTags = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-mutate")) {
            lexOptions.smartMutation = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-useUnicodeType")) {
            lexOptions.useUnicodeType = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-rightRec")) {
            trainOptions.rightRec = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noRightRec")) {
            trainOptions.rightRec = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-preTag")) {
            testOptions.preTag = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-forceTags")) {
            testOptions.forceTags = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-taggerSerializedFile")) {
            testOptions.taggerSerializedFile = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-forceTagBeginnings")) {
            testOptions.forceTagBeginnings = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noFunctionalForcing")) {
            testOptions.noFunctionalForcing = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-scTags")) {
            dcTags = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-dcTags")) {
            dcTags = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-basicCategoryTagsInDependencyGrammar")) {
            trainOptions.basicCategoryTagsInDependencyGrammar = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-evalb")) {
            testOptions.evalb = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-v") || args[i].equalsIgnoreCase("-verbose")) {
            testOptions.verbose = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-outputFilesDirectory") && i + 1 < args.length) {
            testOptions.outputFilesDirectory = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputFilesExtension") && i + 1 < args.length) {
            testOptions.outputFilesExtension = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputFilesPrefix") && i + 1 < args.length) {
            testOptions.outputFilesPrefix = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputkBestEquivocation") && i + 1 < args.length) {
            testOptions.outputkBestEquivocation = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-writeOutputFiles")) {
            testOptions.writeOutputFiles = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-printAllBestParses")) {
            testOptions.printAllBestParses = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-outputTreeFormat") || args[i].equalsIgnoreCase("-outputFormat")) {
            testOptions.outputFormat = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-outputTreeFormatOptions")
                || args[i].equalsIgnoreCase("-outputFormatOptions")) {
            testOptions.outputFormatOptions = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-addMissingFinalPunctuation")) {
            testOptions.addMissingFinalPunctuation = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-flexiTag")) {
            lexOptions.flexiTag = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-lexiTag")) {
            lexOptions.flexiTag = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-useSignatureForKnownSmoothing")) {
            lexOptions.useSignatureForKnownSmoothing = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-wordClassesFile")) {
            lexOptions.wordClassesFile = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-compactGrammar")) {
            trainOptions.compactGrammar = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-markFinalStates")) {
            trainOptions.markFinalStates = args[i + 1].equalsIgnoreCase("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-leftToRight")) {
            trainOptions.leftToRight = args[i + 1].equals("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-cnf")) {
            forceCNF = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-smoothRules")) {
            trainOptions.ruleSmoothing = true;
            trainOptions.ruleSmoothingAlpha = Double.valueOf(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-nodePrune") && i + 1 < args.length) {
            nodePrune = args[i + 1].equalsIgnoreCase("true");
            i += 2;
        } else if (args[i].equalsIgnoreCase("-noDoRecovery")) {
            testOptions.doRecovery = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-acl03chinese")) {
            trainOptions.markovOrder = 1;
            trainOptions.markovFactor = true;
            // no increment
        } else if (args[i].equalsIgnoreCase("-wordFunction")) {
            wordFunction = ReflectionLoading.loadByReflection(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-acl03pcfg")) {
            doDep = false;
            doPCFG = true;
            // lexOptions.smoothInUnknownsThreshold = 30;
            trainOptions.markUnary = 1;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.tagPA = true;
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = true;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 400.0;
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            lexOptions.useUnknownWordSignatures = 2;
            lexOptions.flexiTag = true;
            // DAN: Tag double-counting is BAD for PCFG-only parsing
            dcTags = false;
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-jenny")) {
            doDep = false;
            doPCFG = true;
            // lexOptions.smoothInUnknownsThreshold = 30;
            trainOptions.markUnary = 1;
            trainOptions.PA = false;
            trainOptions.gPA = false;
            trainOptions.tagPA = false;
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = true;
            trainOptions.selectiveSplit = false;
            //      trainOptions.selectiveSplitCutOff = 400.0;
            trainOptions.markovFactor = false;
            //      trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = false;
            lexOptions.useUnknownWordSignatures = 2;
            lexOptions.flexiTag = true;
            // DAN: Tag double-counting is BAD for PCFG-only parsing
            dcTags = false;
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-goodPCFG")) {
            doDep = false;
            doPCFG = true;
            // op.lexOptions.smoothInUnknownsThreshold = 30;
            trainOptions.markUnary = 1;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.tagPA = true;
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = true;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 400.0;
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            lexOptions.useUnknownWordSignatures = 2;
            lexOptions.flexiTag = true;
            // DAN: Tag double-counting is BAD for PCFG-only parsing
            dcTags = false;
            String[] delSplit = { "-deleteSplitters", "VP^NP,VP^VP,VP^SINV,VP^SQ" };
            if (this.setOptionFlag(delSplit, 0) != 2) {
                log.info("Error processing deleteSplitters");
            }
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-linguisticPCFG")) {
            doDep = false;
            doPCFG = true;
            // op.lexOptions.smoothInUnknownsThreshold = 30;
            trainOptions.markUnary = 1;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.tagPA = true; // on at the moment, but iffy
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = false; // not for linguistic
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 400.0;
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            lexOptions.useUnknownWordSignatures = 5; // different from acl03pcfg
            lexOptions.flexiTag = false; // different from acl03pcfg
            // DAN: Tag double-counting is BAD for PCFG-only parsing
            dcTags = false;
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-ijcai03")) {
            doDep = true;
            doPCFG = true;
            trainOptions.markUnary = 0;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.tagPA = false;
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = false;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 300.0;
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            trainOptions.compactGrammar = 0; /// cdm: May 2005 compacting bad for factored?
            lexOptions.useUnknownWordSignatures = 2;
            lexOptions.flexiTag = false;
            dcTags = true;
            // op.nodePrune = true;  // cdm: May 2005: this doesn't help
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-goodFactored")) {
            doDep = true;
            doPCFG = true;
            trainOptions.markUnary = 0;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.tagPA = false;
            trainOptions.tagSelectiveSplit = false;
            trainOptions.rightRec = false;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 300.0;
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            trainOptions.compactGrammar = 0; /// cdm: May 2005 compacting bad for factored?
            lexOptions.useUnknownWordSignatures = 5; // different from ijcai03
            lexOptions.flexiTag = false;
            dcTags = true;
            // op.nodePrune = true;  // cdm: May 2005: this doesn't help
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-chineseFactored")) {
            // Single counting tag->word rewrite is also much better for Chinese
            // Factored.  Bracketing F1 goes up about 0.7%.
            dcTags = false;
            lexOptions.useUnicodeType = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            trainOptions.markovFactor = true;
            trainOptions.HSEL_CUT = 50;
            // trainOptions.openClassTypesThreshold=1;  // so can get unseen punctuation
            // trainOptions.fractionBeforeUnseenCounting=0.0;  // so can get unseen punctuation
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-arabicFactored")) {
            doDep = true;
            doPCFG = true;
            dcTags = false; // "false" seems to help Arabic about 0.1% F1
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            trainOptions.HSEL_CUT = 75; // 75 bit better than 50, 100 a bit worse
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 300.0;
            trainOptions.markUnary = 1; // Helps PCFG and marginally factLB
            // trainOptions.compactGrammar = 0;  // Doesn't seem to help or only 0.05% F1
            lexOptions.useUnknownWordSignatures = 9;
            lexOptions.unknownPrefixSize = 1;
            lexOptions.unknownSuffixSize = 1;
            testOptions.MAX_ITEMS = 500000; // Arabic sentences are long enough that this helps a fraction
            // don't increment i so it gets language specific stuff as well
        } else if (args[i].equalsIgnoreCase("-frenchFactored")) {
            doDep = true;
            doPCFG = true;
            dcTags = false; //wsg2011: Setting to false improves F1 by 0.5%
            trainOptions.markovFactor = true;
            trainOptions.markovOrder = 2;
            trainOptions.hSelSplit = true;
            trainOptions.HSEL_CUT = 75;
            trainOptions.PA = true;
            trainOptions.gPA = false;
            trainOptions.selectiveSplit = true;
            trainOptions.selectiveSplitCutOff = 300.0;
            trainOptions.markUnary = 0; //Unary rule marking bad for french..setting to 0 gives +0.3 F1
            lexOptions.useUnknownWordSignatures = 1;
            lexOptions.unknownPrefixSize = 1;
            lexOptions.unknownSuffixSize = 2;

        } else if (args[i].equalsIgnoreCase("-chinesePCFG")) {
            trainOptions.markovOrder = 2;
            trainOptions.markovFactor = true;
            trainOptions.HSEL_CUT = 5;
            trainOptions.PA = true;
            trainOptions.gPA = true;
            trainOptions.selectiveSplit = false;
            doDep = false;
            doPCFG = true;
            // Single counting tag->word rewrite is also much better for Chinese PCFG
            // Bracketing F1 is up about 2% and tag accuracy about 1% (exact by 6%)
            dcTags = false;
            // no increment
        } else if (args[i].equalsIgnoreCase("-printTT") && (i + 1 < args.length)) {
            trainOptions.printTreeTransformations = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printAnnotatedRuleCounts")) {
            trainOptions.printAnnotatedRuleCounts = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-printAnnotatedStateCounts")) {
            trainOptions.printAnnotatedStateCounts = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-printAnnotated") && (i + 1 < args.length)) {
            try {
                trainOptions.printAnnotatedPW = tlpParams.pw(new FileOutputStream(args[i + 1]));
            } catch (IOException ioe) {
                trainOptions.printAnnotatedPW = null;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printBinarized") && (i + 1 < args.length)) {
            try {
                trainOptions.printBinarizedPW = tlpParams.pw(new FileOutputStream(args[i + 1]));
            } catch (IOException ioe) {
                trainOptions.printBinarizedPW = null;
            }
            i += 2;
        } else if (args[i].equalsIgnoreCase("-printStates")) {
            trainOptions.printStates = true;
            i++;
        } else if (args[i].equalsIgnoreCase("-preTransformer") && (i + 1 < args.length)) {
            String[] classes = args[i + 1].split(",");
            i += 2;
            if (classes.length == 1) {
                trainOptions.preTransformer = ReflectionLoading.loadByReflection(classes[0], this);
            } else if (classes.length > 1) {
                CompositeTreeTransformer composite = new CompositeTreeTransformer();
                trainOptions.preTransformer = composite;
                for (String clazz : classes) {
                    TreeTransformer transformer = ReflectionLoading.loadByReflection(clazz, this);
                    composite.addTransformer(transformer);
                }
            }
        } else if (args[i].equalsIgnoreCase("-taggedFiles") && (i + 1 < args.length)) {
            trainOptions.taggedFiles = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-predictSplits")) {
            // This is an experimental (and still in development)
            // reimplementation of Berkeley's state splitting grammar.
            trainOptions.predictSplits = true;
            trainOptions.compactGrammar = 0;
            i++;
        } else if (args[i].equalsIgnoreCase("-splitCount")) {
            trainOptions.splitCount = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-splitRecombineRate")) {
            trainOptions.splitRecombineRate = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-trainingThreads") || args[i].equalsIgnoreCase("-nThreads")) {
            trainOptions.trainingThreads = Integer.parseInt(args[i + 1]);
            testOptions.testingThreads = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-testingThreads")) {
            testOptions.testingThreads = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-evals")) {
            testOptions.evals = StringUtils.stringToProperties(args[i + 1], testOptions.evals);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-fastFactoredCandidateMultiplier")) {
            testOptions.fastFactoredCandidateMultiplier = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-fastFactoredCandidateAddend")) {
            testOptions.fastFactoredCandidateAddend = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-quietEvaluation")) {
            testOptions.quietEvaluation = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noquietEvaluation")) {
            testOptions.quietEvaluation = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-simpleBinarizedLabels")) {
            trainOptions.simpleBinarizedLabels = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noRebinarization")) {
            trainOptions.noRebinarization = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-dvKBest")) {
            trainOptions.dvKBest = Integer.parseInt(args[i + 1]);
            rerankerKBest = trainOptions.dvKBest;
            i += 2;
        } else if (args[i].equalsIgnoreCase("-regCost")) {
            trainOptions.regCost = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-dvIterations") || args[i].equalsIgnoreCase("-trainingIterations")) {
            trainOptions.trainingIterations = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-stalledIterationLimit")) {
            trainOptions.stalledIterationLimit = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-dvBatchSize") || args[i].equalsIgnoreCase("-batchSize")) {
            trainOptions.batchSize = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-qnIterationsPerBatch")) {
            trainOptions.qnIterationsPerBatch = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-qnEstimates")) {
            trainOptions.qnEstimates = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-qnTolerance")) {
            trainOptions.qnTolerance = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-debugOutputFrequency")) {
            trainOptions.debugOutputFrequency = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-maxTrainTimeSeconds")) {
            trainOptions.maxTrainTimeSeconds = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-dvSeed") || args[i].equalsIgnoreCase("-randomSeed")) {
            trainOptions.randomSeed = Long.parseLong(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-wordVectorFile")) {
            lexOptions.wordVectorFile = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-numHid")) {
            lexOptions.numHid = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-learningRate")) {
            trainOptions.learningRate = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-deltaMargin")) {
            trainOptions.deltaMargin = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unknownNumberVector")) {
            trainOptions.unknownNumberVector = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownNumberVector")) {
            trainOptions.unknownNumberVector = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-unknownDashedWordVectors")) {
            trainOptions.unknownDashedWordVectors = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownDashedWordVectors")) {
            trainOptions.unknownDashedWordVectors = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-unknownCapsVector")) {
            trainOptions.unknownCapsVector = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownCapsVector")) {
            trainOptions.unknownCapsVector = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-unknownChineseYearVector")) {
            trainOptions.unknownChineseYearVector = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownChineseYearVector")) {
            trainOptions.unknownChineseYearVector = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-unknownChineseNumberVector")) {
            trainOptions.unknownChineseNumberVector = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownChineseNumberVector")) {
            trainOptions.unknownChineseNumberVector = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-unknownChinesePercentVector")) {
            trainOptions.unknownChinesePercentVector = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUnknownChinesePercentVector")) {
            trainOptions.unknownChinesePercentVector = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-dvSimplifiedModel")) {
            trainOptions.dvSimplifiedModel = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-scalingForInit")) {
            trainOptions.scalingForInit = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-rerankerKBest")) {
            rerankerKBest = Integer.parseInt(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-baseParserWeight")) {
            baseParserWeight = Double.parseDouble(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-unkWord")) {
            trainOptions.unkWord = args[i + 1];
            i += 2;
        } else if (args[i].equalsIgnoreCase("-lowercaseWordVectors")) {
            trainOptions.lowercaseWordVectors = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noLowercaseWordVectors")) {
            trainOptions.lowercaseWordVectors = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-transformMatrixType")) {
            trainOptions.transformMatrixType = TrainOptions.TransformMatrixType.valueOf(args[i + 1]);
            i += 2;
        } else if (args[i].equalsIgnoreCase("-useContextWords")) {
            trainOptions.useContextWords = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noUseContextWords")) {
            trainOptions.useContextWords = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-trainWordVectors")) {
            trainOptions.trainWordVectors = true;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-noTrainWordVectors")) {
            trainOptions.trainWordVectors = false;
            i += 1;
        } else if (args[i].equalsIgnoreCase("-markStrahler")) {
            trainOptions.markStrahler = true;
            i += 1;
        }
        return i;
    }

    public static class LexOptions implements Serializable {

        /**
         * Whether to use suffix and capitalization information for unknowns.
         * Within the BaseLexicon model options have the following meaning:
         * 0 means a single unknown token.  1 uses suffix, and capitalization.
         * 2 uses a variant (richer) form of signature.  Good.
         * Use this one.  Using the richer signatures in versions 3 or 4 seems
         * to have very marginal or no positive value.
         * 3 uses a richer form of signature that mimics the NER word type
         * patterns.  4 is a variant of 2.  5 is another with more English
         * specific morphology (good for English unknowns!).
         * 6-9 are options for Arabic.  9 codes some patterns for numbers and
         * derivational morphology, but also supports unknownPrefixSize and
         * unknownSuffixSize.
         * For German, 0 means a single unknown token, and non-zero means to use
         * capitalization of first letter and a suffix of length
         * unknownSuffixSize.
         */
        public int useUnknownWordSignatures = 0;

        /**
         * RS: file for Turian's word vectors
         * The default value is an example of size 25 word vectors on the nlp machines
         */
        public static final String DEFAULT_WORD_VECTOR_FILE = "/u/scr/nlp/deeplearning/datasets/turian/embeddings-scaled.EMBEDDING_SIZE=25.txt";
        public String wordVectorFile = DEFAULT_WORD_VECTOR_FILE;
        /**
         * Number of hidden units in the word vectors.  As setting of 0
         * will make it try to extract the size from the data file.
         */
        public int numHid = 0;

        /**
         * Words more common than this are tagged with MLE P(t|w). Default 100. The
         * smoothing is sufficiently slight that changing this has little effect.
         * But set this to 0 to be able to use the parser as a vanilla PCFG with
         * no smoothing (not as a practical parser but for exposition or debugging).
         */
        public int smoothInUnknownsThreshold = 100;

        /**
         * Smarter smoothing for rare words.
         */
        public boolean smartMutation = false;

        /**
         * Make use of unicode code point types in smoothing.
         */
        public boolean useUnicodeType = false;

        /** For certain Lexicons, a certain number of word-final letters are
         *  used to subclassify the unknown token. This gives the number of
         *  letters.
         */
        public int unknownSuffixSize = 1;

        /** For certain Lexicons, a certain number of word-initial letters are
         *  used to subclassify the unknown token. This gives the number of
         *  letters.
         */
        public int unknownPrefixSize = 1;

        /**
         * Model for unknown words that the lexicon should use.  This is the
         * name of a class.
         */
        public String uwModelTrainer; // = null;

        /* If this option is false, then all words that were seen in the training
         * data (even once) are constrained to only have seen tags.  That is,
         * mle is used for the lexicon.
         * If this option is true, then if a word has been seen more than
         * smoothInUnknownsThreshold, then it will still only get tags with which
         * it has been seen, but rarer words will get all tags for which the
         * unknown word model (or smart mutation) does not give a score of -Inf.
         * This will normally be all open class tags.
         * If floodTags is invoked by the parser, all other tags will also be
         * given a minimal non-zero, non-infinite probability.
         */
        public boolean flexiTag = false;

        /** Whether to use signature rather than just being unknown as prior in
         *  known word smoothing.  Currently only works if turned on for English.
         */
        public boolean useSignatureForKnownSmoothing;

        /** A file of word class data which may be used for smoothing,
         *  normally instead of hand-specified signatures.
         */
        public String wordClassesFile;

        private static final long serialVersionUID = 2805351374506855632L;

        private static final String[] params = { "useUnknownWordSignatures", "smoothInUnknownsThreshold",
                "smartMutation", "useUnicodeType", "unknownSuffixSize", "unknownPrefixSize", "flexiTag",
                "useSignatureForKnownSmoothing", "wordClassesFile" };

        @Override
        public String toString() {
            return params[0] + " " + useUnknownWordSignatures + "\n" + params[1] + " " + smoothInUnknownsThreshold
                    + "\n" + params[2] + " " + smartMutation + "\n" + params[3] + " " + useUnicodeType + "\n"
                    + params[4] + " " + unknownSuffixSize + "\n" + params[5] + " " + unknownPrefixSize + "\n"
                    + params[6] + " " + flexiTag + "\n" + params[7] + " " + useSignatureForKnownSmoothing + "\n"
                    + params[8] + " " + wordClassesFile + "\n";
        }

        public void readData(BufferedReader in) throws IOException {
            for (int i = 0; i < params.length; i++) {
                String line = in.readLine();
                int idx = line.indexOf(' ');
                String key = line.substring(0, idx);
                String value = line.substring(idx + 1);
                if (!key.equalsIgnoreCase(params[i])) {
                    log.info("Yikes!!! Expected " + params[i] + " got " + key);
                }
                switch (i) {
                case 0:
                    useUnknownWordSignatures = Integer.parseInt(value);
                    break;
                case 1:
                    smoothInUnknownsThreshold = Integer.parseInt(value);
                    break;
                case 2:
                    smartMutation = Boolean.parseBoolean(value);
                    break;
                case 3:
                    useUnicodeType = Boolean.parseBoolean(value);
                    break;
                case 4:
                    unknownSuffixSize = Integer.parseInt(value);
                    break;
                case 5:
                    unknownPrefixSize = Integer.parseInt(value);
                    break;
                case 6:
                    flexiTag = Boolean.parseBoolean(value);
                    break;
                case 7:
                    useSignatureForKnownSmoothing = Boolean.parseBoolean(value);
                    break;
                case 8:
                    wordClassesFile = value;
                    break;
                }
            }
        }

    } // end class LexOptions

    public LexOptions lexOptions = new LexOptions();

    /**
     * The treebank-specific parser parameters  to use.
     */
    public TreebankLangParserParams tlpParams;

    /**
     * @return The treebank language pack for the treebank the parser
     * is trained on.
     */
    public TreebankLanguagePack langpack() {
        return tlpParams.treebankLanguagePack();
    }

    /**
     * Forces parsing with strictly CNF grammar -- unary chains are converted
     * to XP&amp;YP symbols and back
     */
    public boolean forceCNF = false;

    /**
     * Do a PCFG parse of the sentence.  If both variables are on,
     * also do a combined parse of the sentence.
     */
    public boolean doPCFG = true;

    /**
     * Do a dependency parse of the sentence.
     */
    public boolean doDep = true;

    /**
     * if true, any child can be the head (seems rather bad!)
     */
    public boolean freeDependencies = false;

    /**
     * Whether dependency grammar considers left/right direction. Good.
     */
    public boolean directional = true;
    public boolean genStop = true;

    public boolean useSmoothTagProjection = false;
    public boolean useUnigramWordSmoothing = false;

    /**
     * Use distance bins in the dependency calculations
     */
    public boolean distance = true;
    /**
     * Use coarser distance (4 bins) in dependency calculations
     */
    public boolean coarseDistance = false;

    /**
     * "double count" tags rewrites as word in PCFG and Dep parser.  Good for
     * combined parsing only (it used to not kick in for PCFG parsing).  This
     * option is only used at Test time, but it is now in Options, so the
     * correct choice for a grammar is recorded by a serialized parser.
     * You should turn this off for a vanilla PCFG parser.
     */
    public boolean dcTags = true;

    /**
     * If true, inside the factored parser, remove any node from the final
     * chosen tree which improves the PCFG score. This was added as the
     * dependency factor tends to encourage 'deep' trees.
     */
    public boolean nodePrune = false;

    public TrainOptions trainOptions = newTrainOptions();

    /** Separated out so subclasses of Options can override */
    public TrainOptions newTrainOptions() {
        return new TrainOptions();
    }

    /**
     * Note that the TestOptions is transient.  This means that whatever
     * options get set at creation time are forgotten when the parser is
     * serialized.  If you want an option to be remembered when the
     * parser is reloaded, put it in either TrainOptions or in this
     * class itself.
     */
    public transient TestOptions testOptions = newTestOptions();

    /** Separated out so subclasses of Options can override */
    public TestOptions newTestOptions() {
        return new TestOptions();
    }

    /**
     * A function that maps words used in training and testing to new
     * words.  For example, it could be a function to lowercase text,
     * such as edu.stanford.nlp.util.LowercaseFunction (which makes the
     * parser case insensitive).  This function is applied in
     * LexicalizedParserQuery.parse and in the training methods which
     * build a new parser.
     */
    public Function<String, String> wordFunction = null;

    /**
     * If the parser has a reranker, it looks at this many trees when
     * building the reranked list.
     */
    public int rerankerKBest = 100;

    /**
     * If reranking sentences, we can use the score from the original
     * parser as well.  This tells us how much weight to give that score.
     */
    public double baseParserWeight = 0.0;

    /**
     * Making the TestOptions transient means it won't even be
     * constructed when you deserialize an Options, so we need to
     * construct it on our own when deserializing
     */
    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        testOptions = newTestOptions();
    }

    public void display() {
        //    try {
        log.info("Options parameters:");
        writeData(new PrintWriter(System.err));
        /*    } catch (IOException e) {
              e.printStackTrace();
            }*/
    }

    public void writeData(Writer w) {//throws IOException {
        PrintWriter out = new PrintWriter(w);
        StringBuilder sb = new StringBuilder();
        sb.append(lexOptions.toString());
        sb.append("parserParams ").append(tlpParams.getClass().getName()).append("\n");
        sb.append("forceCNF ").append(forceCNF).append("\n");
        sb.append("doPCFG ").append(doPCFG).append("\n");
        sb.append("doDep ").append(doDep).append("\n");
        sb.append("freeDependencies ").append(freeDependencies).append("\n");
        sb.append("directional ").append(directional).append("\n");
        sb.append("genStop ").append(genStop).append("\n");
        sb.append("distance ").append(distance).append("\n");
        sb.append("coarseDistance ").append(coarseDistance).append("\n");
        sb.append("dcTags ").append(dcTags).append("\n");
        sb.append("nPrune ").append(nodePrune).append("\n");
        out.print(sb.toString());
        out.flush();
    }

    /**
     * Populates data in this Options from the character stream.
     * @param in The Reader
     * @throws IOException If there is a problem reading data
     */
    public void readData(BufferedReader in) throws IOException {
        String line, value;
        // skip old variables if still present
        lexOptions.readData(in);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        try {
            tlpParams = (TreebankLangParserParams) Class.forName(value).getDeclaredConstructor().newInstance();
        } catch (Exception e) {
            IOException ioe = new IOException("Problem instantiating parserParams: " + line);
            ioe.initCause(e);
            throw ioe;
        }
        line = in.readLine();
        // ensure backwards compatibility
        if (line.matches("^forceCNF.*")) {
            value = line.substring(line.indexOf(' ') + 1);
            forceCNF = Boolean.parseBoolean(value);
            line = in.readLine();
        }
        value = line.substring(line.indexOf(' ') + 1);
        doPCFG = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        doDep = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        freeDependencies = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        directional = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        genStop = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        distance = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        coarseDistance = Boolean.parseBoolean(value);
        line = in.readLine();
        value = line.substring(line.indexOf(' ') + 1);
        dcTags = Boolean.parseBoolean(value);
        line = in.readLine();
        if (!line.matches("^nPrune.*")) {
            throw new RuntimeException("Expected nPrune, found: " + line);
        }
        value = line.substring(line.indexOf(' ') + 1);
        nodePrune = Boolean.parseBoolean(value);
        line = in.readLine(); // get rid of last line
        if (line.length() != 0) {
            throw new RuntimeException("Expected blank line, found: " + line);
        }
    }

    private static final long serialVersionUID = 4L;

} // end class Options