Java tutorial
/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.subsample; import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.FileOutputStream; import java.io.IOException; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; /** * A subsampler which takes in word-alignments as well as the F and * E files. To remove redundant code, this class uses callback * techniques in order to "override" the superclass methods. * * @see joshua.subsample.Subsampler * @author wren ng thornton <wren@users.sourceforge.net> * @version $LastChangedDate: 2009-05-27 20:14:28 -0500 (Wed, 27 May 2009) $ */ public class AlignedSubsampler extends Subsampler { public AlignedSubsampler(String[] testFiles, int maxN, int targetCount) throws IOException { super(testFiles, maxN, targetCount); } /** * @param filelist list of source files to subsample from * @param targetFtoERatio goal for ratio of output F length * to output E length * @param extf extension of F files * @param exte extension of E files * @param exta extension of alignment files * @param fpath path to source F files * @param epath path to source E files * @param apath path to source alignment files * @param output basename for output files (will append * extensions) */ public void subsample(String filelist, float targetFtoERatio, String extf, String exte, String exta, String fpath, String epath, String apath, String output) throws IOException { this.subsample(filelist, targetFtoERatio, new PhraseWriter( new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output + "." + extf), "UTF8")), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output + "." + exte), "UTF8")), new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output + "." + exta), "UTF8"))), new BiCorpusFactory(fpath, epath, apath, extf, exte, exta, this.vf, this.ve) { /* Local class definition */ public BiCorpus fromFiles(String f) throws IOException { return this.alignedFromFiles(f); } }); } @SuppressWarnings("static-access") public static void main(String[] args) { new SubsamplerCLI() { /* Local class definition */ //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option oa = OptionBuilder.withArgName("lang").hasArg() .withDescription("Word alignment extension").isRequired().create("a"); //TODO hasArg is a static method. It should be accessed as OptionBuilder.hasArg() protected final Option oapath = OptionBuilder.withArgName("path").hasArg() .withDescription("Directory containing word alignment files").create("apath"); public Options getCliOptions() { return super.getCliOptions().addOption(oa).addOption(oapath); } public String getClassName() { return AlignedSubsampler.class.getName(); } public void runSubsampler(String[] testFiles, int maxN, int targetCount, float ratio) throws IOException { new AlignedSubsampler(testFiles, maxN, targetCount).subsample(ot.getValue(), ratio, of.getValue(), oe.getValue(), oa.getValue(), ofpath.getValue(), oepath.getValue(), oapath.getValue(), ooutput.getValue()); } }.runMain(args); } }