di.uniba.it.wsd.tool.wn.BuildOccSense.java Source code

Java tutorial

Introduction

Here is the source code for di.uniba.it.wsd.tool.wn.BuildOccSense.java

Source

/**
 * Copyright (c) 2014, the LESK-WSD-DSM AUTHORS.
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the University of Bari nor the names of its contributors
 * may be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007
 *
 */
package di.uniba.it.wsd.tool.wn;

import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Build synsets occurrences information exploiting the index.sense WordNet file
 * @author pierpaolo
 */
public class BuildOccSense {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        try {
            BufferedReader in = new BufferedReader(new FileReader(new File(args[0])));
            Multiset<String> synset = HashMultiset.create();
            while (in.ready()) {
                String[] values = in.readLine().split("\\s+");
                String[] keys = values[0].split("%");
                String[] poss = keys[1].split(":");
                String offset = null;
                int occ = Integer.parseInt(values[3]);
                if (poss[0].equals("1")) {
                    offset = values[1] + "n";
                } else if (poss[0].equals("2")) {
                    offset = values[1] + "v";
                } else if (poss[0].equals("3") || poss[0].equals("5")) {
                    offset = values[1] + "a";
                } else if (poss[0].equals("4")) {
                    offset = values[1] + "r";
                }
                for (int i = 0; i < occ; i++) {
                    synset.add(offset);
                }
            }
            in.close();

            BufferedWriter out = new BufferedWriter(new FileWriter(new File(args[1])));
            Iterator<Multiset.Entry<String>> iterator = synset.entrySet().iterator();
            while (iterator.hasNext()) {
                Multiset.Entry<String> entry = iterator.next();
                out.append(entry.getElement()).append("\t").append(String.valueOf(entry.getCount()));
                out.newLine();
            }
            out.close();
        } catch (IOException | NumberFormatException ioex) {
            Logger.getLogger(BuildOccSense.class.getName()).log(Level.SEVERE, "IO Error", ioex);
        }
    }

}