Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.apache.lens.driver.hive.TestHiveDriver.java

License:Apache License

/**
 * Validate persistent result./*from   w w w.j  av  a  2  s .c o m*/
 *
 * @param resultSet   the result set
 * @param dataFile    the data file
 * @param outptuDir   the outptu dir
 * @param formatNulls the format nulls
 * @throws Exception the exception
 */
private void validatePersistentResult(LensResultSet resultSet, String dataFile, Path outptuDir,
        boolean formatNulls) throws Exception {
    assertTrue(resultSet instanceof HivePersistentResultSet,
            "resultset class: " + resultSet.getClass().getName());
    HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet;
    String path = persistentResultSet.getOutputPath();

    Path actualPath = new Path(path);
    FileSystem fs = actualPath.getFileSystem(driverConf);
    assertEquals(actualPath, fs.makeQualified(outptuDir));
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus stat : fs.listStatus(actualPath, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !new File(path.toUri()).isDirectory();
        }
    })) {
        FSDataInputStream in = fs.open(stat.getPath());
        BufferedReader br = null;
        try {
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                System.out.println("Actual:" + line);
                actualRows.add(line.trim());
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }

    BufferedReader br = null;
    List<String> expectedRows = new ArrayList<String>();

    try {
        br = new BufferedReader(new FileReader(new File(dataFile)));
        String line = "";
        while ((line = br.readLine()) != null) {
            String row = line.trim();
            if (formatNulls) {
                row += ",-NA-,";
                row += line.trim();
            }
            expectedRows.add(row);
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    assertEquals(actualRows, expectedRows);
}

From source file:org.apache.mahout.cf.taste.example.email.EmailUtility.java

License:Apache License

public static void loadDictionaries(Configuration conf, String fromPrefix,
        OpenObjectIntHashMap<String> fromDictionary, String msgIdPrefix,
        OpenObjectIntHashMap<String> msgIdDictionary) throws IOException {

    Path[] localFiles = HadoopUtil.getCachedFiles(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    for (Path dictionaryFile : localFiles) {

        // key is word value is id

        OpenObjectIntHashMap<String> dictionary = null;
        if (dictionaryFile.getName().startsWith(fromPrefix)) {
            dictionary = fromDictionary;
        } else if (dictionaryFile.getName().startsWith(msgIdPrefix)) {
            dictionary = msgIdDictionary;
        }//from www  .ja  v a  2  s .  c  om
        if (dictionary != null) {
            dictionaryFile = fs.makeQualified(dictionaryFile);
            for (Pair<Writable, IntWritable> record : new SequenceFileIterable<Writable, IntWritable>(
                    dictionaryFile, true, conf)) {
                dictionary.put(record.getFirst().toString(), record.getSecond().get());
            }
        }
    }

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchUtils.java

License:Apache License

public static HmmModel CreateHmmModel(int nrOfHiddenStates, int nrOfOutputStates, Path modelPath,
        Configuration conf) throws IOException {

    log.info("Entering Create Hmm Model. Model Path = {}", modelPath.toUri());
    Vector initialProbabilities = new DenseVector(nrOfHiddenStates);
    Matrix transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
    Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfOutputStates);

    // Get the path location where the seq files encoding model are stored
    Path modelFilesPath = new Path(modelPath, "*");
    log.info("Create Hmm Model. ModelFiles Path = {}", modelFilesPath.toUri());
    Collection<Path> result = new ArrayList<Path>();

    // get all filtered file names in result list
    FileSystem fs = modelFilesPath.getFileSystem(conf);
    log.info("Create Hmm Model. File System = {}", fs);
    FileStatus[] matches = fs.listStatus(
            FileUtil.stat2Paths(fs.globStatus(modelFilesPath, PathFilters.partFilter())),
            PathFilters.partFilter());//from w  ww .ja v  a  2  s .  c  o  m

    for (FileStatus match : matches) {
        log.info("CreateHmmmModel Adding File Match {}", match.getPath().toString());
        result.add(fs.makeQualified(match.getPath()));
    }

    // iterate through the result path list
    for (Path path : result) {
        for (Pair<Writable, MapWritable> pair : new SequenceFileIterable<Writable, MapWritable>(path, true,
                conf)) {
            Text key = (Text) pair.getFirst();
            log.info("CreateHmmModel Matching Seq File Key = {}", key);
            MapWritable valueMap = pair.getSecond();
            if (key.charAt(0) == 'I') {
                // initial distribution stripe
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Initial Prob Adding  Key, Value  = ({} {})",
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get());
                    initialProbabilities.set(((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == 'T') {
                // transition distribution stripe
                // key is of the form TRANSIT_0, TRANSIT_1 etc
                // the number after _ is the state ID at char number 11
                int stateID = Character.getNumericValue(key.charAt(8));
                log.info("CreateHmmModel stateID = key.charAt(8) = {}", stateID);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Transition Matrix ({}, {}) = {}", new Object[] { stateID,
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get() });
                    transitionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == 'E') {
                // emission distribution stripe
                // key is of the form EMIT_0, EMIT_1 etc
                // the number after _ is the state ID at char number 5
                int stateID = Character.getNumericValue(key.charAt(5));
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    log.info("CreateHmmModel Emission Matrix ({}, {}) = {}", new Object[] { stateID,
                            ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get() });
                    emissionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else {
                throw new IllegalStateException("Error creating HmmModel from Sequence File Path");
            }
        }
    }
    HmmModel model = new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);
    HmmUtils.validate(model);
    return model;
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.MapWritableCache.java

License:Apache License

/**
 * @param key SequenceFile key/* w w  w. jav  a2  s  .co m*/
 * @param map Map to save
 */
public static void save(Writable key, MapWritable map, Path output, Configuration conf, boolean overwritePath,
        boolean deleteOnExit) throws IOException {

    FileSystem fs = FileSystem.get(conf);
    output = fs.makeQualified(output);
    if (overwritePath) {
        HadoopUtil.delete(conf, output);
    }

    // set the cache
    DistributedCache.setCacheFiles(new URI[] { output.toUri() }, conf);

    // set up the writer
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, IntWritable.class,
            MapWritable.class);
    try {
        writer.append(key, new MapWritable(map));
    } finally {
        Closeables.closeQuietly(writer);
    }

    if (deleteOnExit) {
        fs.deleteOnExit(output);
    }
}

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java

License:Apache License

/**
 * Converts the sequence files present in a directory to a {@link HmmModel} model.
 *
 * @param nrOfHiddenStates Number of hidden states
 * @param nrOfOutputStates Number of output states
 * @param modelPath        Location of the sequence files containing the model's distributions
 * @param conf             Configuration object
 * @return HmmModel the encoded model//w  w  w .ja  va2s  . c  om
 * @throws IOException
 */
public static HmmModel createHmmModel(int nrOfHiddenStates, int nrOfOutputStates, Path modelPath,
        Configuration conf) throws IOException {

    log.info("Entering Create Hmm Model. Model Path = {}", modelPath.toUri());
    Vector initialProbabilities = new DenseVector(nrOfHiddenStates);
    Matrix transitionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfHiddenStates);
    Matrix emissionMatrix = new DenseMatrix(nrOfHiddenStates, nrOfOutputStates);

    // Get the path location where the seq files encoding model are stored
    Path modelFilesPath = new Path(modelPath, "*");

    Collection<Path> result = new ArrayList<Path>();

    // get all filtered file names in result list
    FileSystem fs = modelFilesPath.getFileSystem(conf);
    FileStatus[] matches = fs.listStatus(
            FileUtil.stat2Paths(fs.globStatus(modelFilesPath, PathFilters.partFilter())),
            PathFilters.partFilter());

    for (FileStatus match : matches) {
        result.add(fs.makeQualified(match.getPath()));
    }

    // iterate through the result path list
    for (Path path : result) {
        for (Pair<Writable, MapWritable> pair : new SequenceFileIterable<Writable, MapWritable>(path, true,
                conf)) {
            Text key = (Text) pair.getFirst();
            MapWritable valueMap = pair.getSecond();
            if (key.charAt(0) == (int) 'I') {
                // initial distribution stripe
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    initialProbabilities.set(((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == (int) 'T') {
                // transition distribution stripe
                // key is of the form TRANSIT_0, TRANSIT_1 etc
                int stateID = Integer.parseInt(key.toString().split("_")[1]);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    transitionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else if (key.charAt(0) == (int) 'E') {
                // emission distribution stripe
                // key is of the form EMIT_0, EMIT_1 etc
                int stateID = Integer.parseInt(key.toString().split("_")[1]);
                for (MapWritable.Entry<Writable, Writable> entry : valueMap.entrySet()) {
                    emissionMatrix.set(stateID, ((IntWritable) entry.getKey()).get(),
                            ((DoubleWritable) entry.getValue()).get());
                }
            } else {
                throw new IllegalStateException("Error creating HmmModel from Sequence File Path");
            }
        }
    }

    HmmModel model = new HmmModel(transitionMatrix, emissionMatrix, initialProbabilities);

    if (model != null) {
        return model;
    } else
        throw new IOException("Error building model from output location");

}

From source file:org.apache.mahout.clustering.spectral.common.TestVectorCache.java

License:Apache License

@Test
public void testLoad() throws Exception {
    // save a vector manually
    Configuration conf = new Configuration();
    Writable key = new IntWritable(0);
    Vector value = new DenseVector(VECTOR);
    Path path = getTestTempDirPath("output");

    FileSystem fs = FileSystem.get(path.toUri(), conf);
    // write the vector
    path = fs.makeQualified(path);
    fs.deleteOnExit(path);//from   ww  w  .  java2  s . co m
    HadoopUtil.delete(conf, path);
    DistributedCache.setCacheFiles(new URI[] { path.toUri() }, conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class,
            VectorWritable.class);
    try {
        writer.append(key, new VectorWritable(value));
    } finally {
        Closeables.closeQuietly(writer);
    }

    // load it
    Vector result = VectorCache.load(conf);

    // are they the same?
    assertNotNull("Vector is not null", result);
    assertEquals("Loaded vector is identical to original", result, value);
}

From source file:org.apache.mahout.clustering.spectral.common.VectorCache.java

License:Apache License

/**
 * //from w w w. j  a v a 2  s  .  co m
 * @param key SequenceFile key
 * @param vector Vector to save, to be wrapped as VectorWritable
 */
public static void save(Writable key, Vector vector, Path output, Configuration conf, boolean overwritePath,
        boolean deleteOnExit) throws IOException {

    FileSystem fs = FileSystem.get(output.toUri(), conf);
    output = fs.makeQualified(output);
    if (overwritePath) {
        HadoopUtil.delete(conf, output);
    }

    // set the cache
    DistributedCache.setCacheFiles(new URI[] { output.toUri() }, conf);

    // set up the writer
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, IntWritable.class,
            VectorWritable.class);
    try {
        writer.append(key, new VectorWritable(vector));
    } finally {
        Closeables.closeQuietly(writer);
    }

    if (deleteOnExit) {
        fs.deleteOnExit(output);
    }
}

From source file:org.apache.mahout.clustering.spectral.common.VectorMatrixMultiplicationJob.java

License:Apache License

public static DistributedRowMatrix runJob(Path markovPath, Vector diag, Path outputPath, Path tmpPath)
        throws IOException, ClassNotFoundException, InterruptedException {

    // set up the serialization of the diagonal vector
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(markovPath.toUri(), conf);
    markovPath = fs.makeQualified(markovPath);
    outputPath = fs.makeQualified(outputPath);
    Path vectorOutputPath = new Path(outputPath.getParent(), "vector");
    VectorCache.save(new IntWritable(EigencutsKeys.DIAGONAL_CACHE_INDEX), diag, vectorOutputPath, conf);

    // set up the job itself
    Job job = new Job(conf, "VectorMatrixMultiplication");
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(VectorMatrixMultiplicationMapper.class);
    job.setNumReduceTasks(0);//from  www.j  a v  a 2 s.c  o m

    FileInputFormat.addInputPath(job, markovPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setJarByClass(VectorMatrixMultiplicationJob.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

    // build the resulting DRM from the results
    return new DistributedRowMatrix(outputPath, tmpPath, diag.size(), diag.size());
}

From source file:org.apache.mahout.clustering.spectral.TestVectorCache.java

License:Apache License

@Test
public void testLoad() throws Exception {
    // save a vector manually
    Configuration conf = getConfiguration();
    Writable key = new IntWritable(0);
    Vector value = new DenseVector(VECTOR);
    Path path = getTestTempDirPath("output");

    FileSystem fs = FileSystem.get(path.toUri(), conf);
    // write the vector
    path = fs.makeQualified(path);
    fs.deleteOnExit(path);// w ww.jav a  2 s  . c o m
    HadoopUtil.delete(conf, path);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class,
            VectorWritable.class);
    try {
        writer.append(key, new VectorWritable(value));
    } finally {
        Closeables.close(writer, false);
    }
    DistributedCache.setCacheFiles(new URI[] { path.toUri() }, conf);

    // load it
    Vector result = VectorCache.load(conf);

    // are they the same?
    assertNotNull("Vector is null", result);
    assertEquals("Loaded vector is not identical to original", result, value);
}

From source file:org.apache.mahout.clustering.spectral.VectorCache.java

License:Apache License

/**
 * @param key    SequenceFile key/*from  ww w.  ja  v  a2s. c om*/
 * @param vector Vector to save, to be wrapped as VectorWritable
 */
public static void save(Writable key, Vector vector, Path output, Configuration conf, boolean overwritePath,
        boolean deleteOnExit) throws IOException {

    FileSystem fs = FileSystem.get(output.toUri(), conf);
    output = fs.makeQualified(output);
    if (overwritePath) {
        HadoopUtil.delete(conf, output);
    }

    // set the cache
    DistributedCache.setCacheFiles(new URI[] { output.toUri() }, conf);

    // set up the writer
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, IntWritable.class,
            VectorWritable.class);
    try {
        writer.append(key, new VectorWritable(vector));
    } finally {
        Closeables.close(writer, false);
    }

    if (deleteOnExit) {
        fs.deleteOnExit(output);
    }
}