eu.project.ttc.test.func.ControlFiles.java Source code

Java tutorial

Introduction

Here is the source code for eu.project.ttc.test.func.ControlFiles.java

Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/

package eu.project.ttc.test.func;

import static org.assertj.core.api.Assertions.tuple;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;

import org.assertj.core.groups.Tuple;
import org.assertj.core.util.Lists;

import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;

import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.models.CompoundType;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.tools.utils.ControlFilesGenerator;

public class ControlFiles {

    private static final Path CONTROL_DIRETCORY_PATH = Paths.get("src", "test", "resources", "eu", "project", "ttc",
            "test", "corpus");

    public static Path getControlDirectory(Lang lang, String corpus) {
        Path controlDirectory = CONTROL_DIRETCORY_PATH.resolve(corpus).resolve(lang.getName()).resolve("control");
        Preconditions.checkState(controlDirectory.toFile().exists());
        Preconditions.checkState(controlDirectory.toFile().isDirectory());
        return controlDirectory;

    }

    /**
     * 
     * tuple("source gKey", "target gKey")
     * 
     * @param lang
     *          the language
     * @param corpus
     *          the test corpus
     * @return
     *          the list of tuples parsed from control file
     */
    public static Tuple[] prefixVariationTuples(Lang lang, String corpus) {
        Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getPrefixFileName());
        return array(getPrefixVariationTuples(path.toFile()));
    }

    /**
     * 
     * tuple("Deriv pattern", "source gKey", "target gKey")
     * 
     * e.g. tuple("N A", "n: olienne", "a: olien")
     * 
     * @param lang
     *          the language
     * @param corpus
     *          the test corpus
     * @return
     *          the list of tuples parsed from control file
     */
    public static Tuple[] derivateVariationTuples(Lang lang, String corpus) {
        Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getDerivatesFileName());
        return array(getDerivateVariationTuples(path.toFile()));
    }

    /**
     * 
     * 
     * tuple("source gKey", "target gKey", VariationType)
     * 
     * 
     * @param lang
     *          the language
     * @param corpus
     *          the test corpus
     * @param ruleName
     *          the name of the syntactic rule to control
     * @return
     *          the list of tuples parsed from control file
     */
    public static Iterable<Tuple> syntacticVariationTuples(Lang lang, String corpus, String ruleName) {
        Path path = syntacticVariationControlFilePath(lang, corpus, ruleName);
        return getSyntacticVariationTuples(path.toFile(), ruleName);
    }

    /**
     * Returns a list of tuples having the following format :
     * 
     * tuple("Compound type short name", "term gKey", "compound string")
     * 
     * e.g. tuple("nat", "n: interconnexion", "inter:inter|connexion:connexion")
     * 
     * @see CompoundType#getShortName()
     * @see ControlFilesGenerator#toCompoundString(eu.project.ttc.models.Term)
     * @param lang
     *          the language
     * @param corpus
     *          the test corpus
     * @return
     *          the list of tuples parsed from control file
     */
    public static Tuple[] compoundTuples(Lang lang, String corpus) {
        Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getCompoundsFileName());
        return array(getCompoundTuples(path.toFile()));
    }

    public static List<String> getLines(File file) {
        Preconditions.checkArgument(file.exists());
        List<String> lines = Lists.newArrayList();
        try (BufferedReader br = new BufferedReader(new FileReader(file))) {
            String line;
            while ((line = br.readLine()) != null) {
                String trim = line.trim();
                if (trim.startsWith("#") || trim.isEmpty())
                    continue;
                else
                    lines.add(trim);
            }
        } catch (Exception e) {
            throw new IllegalStateException(e);
        }
        return lines;
    }

    public static List<String[]> getRows(File file, int nbColumns, String sep) {
        List<String[]> rows = Lists.newArrayList();
        for (String line : getLines(file)) {
            List<String> valuesAsList = Splitter.on(sep).splitToList(line);
            Preconditions.checkArgument(valuesAsList.size() == nbColumns,
                    "Bad row format for line: \"%s\". Expected %s columns, got %s", line, nbColumns,
                    valuesAsList.size());
            rows.add(valuesAsList.toArray(new String[nbColumns]));
        }
        return rows;
    }

    public static List<Tuple> getSyntacticVariationTuples(File file, String ruleName) {
        List<Tuple> tuples = Lists.newArrayList();
        for (String[] row : getRows(file, 4, "\t")) {
            Preconditions.checkState(row[3].equals(ruleName));
            tuples.add(tuple(row[0], row[1], VariationType.valueOf(row[2])));
        }
        return tuples;
    }

    private static List<Tuple> getPrefixVariationTuples(File file) {
        List<Tuple> tuples = Lists.newArrayList();
        for (String[] row : getRows(file, 3, "\t")) {
            Preconditions.checkState(row[2].equals(VariationType.IS_PREFIX_OF.toString()));
            tuples.add(tuple(row[0], row[1]));
        }
        return tuples;
    }

    private static Tuple[] array(Collection<Tuple> tuples) {
        return tuples.toArray(new Tuple[tuples.size()]);
    }

    private static List<Tuple> getDerivateVariationTuples(File file) {
        List<Tuple> tuples = Lists.newArrayList();
        for (String[] row : getRows(file, 4, "\t")) {
            Preconditions.checkState(row[2].equals(VariationType.DERIVES_INTO.toString()));
            tuples.add(tuple(row[3], row[0], row[1]));
        }
        return tuples;
    }

    private static List<Tuple> getCompoundTuples(File file) {
        List<Tuple> tuples = Lists.newArrayList();
        for (String[] row : getRows(file, 3, "\t")) {
            tuples.add(tuple(row[1], row[0], row[2]));
        }
        return tuples;

    }

    public static Path syntacticVariationControlFilePath(Lang lang, String corpus, String ruleName) {
        return getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getSyntacticRuleFileName(ruleName));
    }

}