Java tutorial
/* * Copyright 2014 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.utils.vectors.lucene; import com.google.common.base.Charsets; import com.google.common.io.Closeables; import com.google.common.io.Files; import java.io.BufferedWriter; import java.io.File; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; import org.apache.commons.cli2.OptionException; import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.mahout.common.AbstractJob; import org.apache.mahout.utils.vectors.io.DelimitedTermInfoWriter; /** * * @author zhaoyufei */ public class SeqFilePrint extends AbstractJob { public String inputSeqFile = null; public String outFile = null; public String getInputSeqFile() { return inputSeqFile; } public void setInputSeqFile(String inputSeqFile) { this.inputSeqFile = inputSeqFile; } public String getOutFile() { return outFile; } public void setOutFile(String outFile) { this.outFile = outFile; } public static void main(String[] args) throws OptionException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("inputFile").withRequired(true) .withArgument(abuilder.withName("inputFile").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("inputFile") .create(); Option outFileOpt = obuilder.withLongName("outFile").withRequired(true) .withArgument(abuilder.withName("outfolder").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("outFile").create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outFileOpt).create(); SeqFilePrint seqFilePrint = new SeqFilePrint(); Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(inputOpt)) { seqFilePrint.setInputSeqFile(cmdLine.getValue(inputOpt).toString()); } if (cmdLine.hasOption(outFileOpt)) { seqFilePrint.setOutFile(cmdLine.getValue(outFileOpt).toString()); } try { seqFilePrint.run(args); } catch (Exception ex) { Logger.getLogger(SeqFilePrint.class.getName()).log(Level.SEVERE, null, ex); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path inputPath = new Path(this.inputSeqFile); BufferedWriter br = null; File textOutFile = new File(this.outFile); Writer writer = Files.newWriter(textOutFile, Charsets.UTF_8); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, inputPath, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.write(key.toString()); writer.write(value.toString()); writer.write('\n'); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); Closeables.close(writer, false); } return 0; } }