Example usage for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri()

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java

License:Apache License

private static Object getResourceValue(Field field, String valueString, String originalTypeClassname,
        Path[] distFiles) throws IOException, ClassNotFoundException {

    // First, determine our approach:
    Object value = null;/* w w w  .  j a  va  2 s .com*/
    if (field.getType().isAssignableFrom(String.class)) {
        value = valueString;
    } else if (ClassUtils.isPrimitiveOrWrapper(field.getType())) {
        value = ConvertUtils.convert(valueString, field.getType());
    } else {
        Path path = distributedFilePath(valueString, distFiles);

        // This is something on the distributed cache (or illegal)
        if (field.getType() == Path.class) {
            value = path;
        } else if (field.getType() == File.class) {
            value = new File(path.toUri());
        }
        // Deserialize .ser file
        else if (field.getType().isAssignableFrom(Class.forName(originalTypeClassname))) {
            ObjectInputStream in = null;
            try {
                File beanSerFile = new File(path.toUri().getPath());
                FileInputStream fileIn = new FileInputStream(beanSerFile);
                in = new ObjectInputStream(fileIn);
                value = in.readObject();
            } finally {
                IOUtils.closeQuietly(in);
            }
        } else {
            throw new IllegalArgumentException("Cannot locate resource for field [" + field.getName() + "]");
        }
    }

    return value;
}

From source file:com.dasasian.chok.command.ListIndicesCommand.java

License:Apache License

private long calculateIndexDiskUsage(String index) {
    Path indexPath = new Path(index);
    URI indexUri = indexPath.toUri();
    try {// w w  w  .j av  a 2s. c  o  m
        FileSystem fileSystem = FileSystem.get(indexUri, new Configuration());
        if (!fileSystem.exists(indexPath)) {
            return -1;
        }
        return fileSystem.getContentSummary(indexPath).getLength();
    } catch (Exception e) {
        return -1;
    }
}

From source file:com.dasasian.chok.util.HadoopUtil.java

License:Apache License

public static FileSystem getFileSystem(Path path) throws IOException {
    synchronized (FileSystem.class) {
        // had once a ConcurrentModificationException
        return FileSystem.get(path.toUri(), new Configuration());
    }//w  w w .j av a  2  s.  c  o  m
}

From source file:com.datasalt.pangool.examples.avro.TestAvroTopicalWordCount.java

License:Apache License

public static int assertOutput(String output, Configuration conf)
        throws NumberFormatException, IOException, InterruptedException {
    int validatedOutputLines = 0;

    Path outPath = new Path(output);
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(outPath.toUri(), conf), conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    while (reader.next(tuple)) {
        Record record = (Record) tuple.get("my_avro");
        int topicId = (Integer) record.get("topic");
        String word = (record.get("word")).toString();
        int count = (Integer) record.get("count");
        if (topicId == 1) {
            if (word.equals("bar") || word.equals("foo")) {
                assertEquals(2, count);//from   ww w . java 2 s  .c  o  m
                validatedOutputLines++;
            } else if (word.equals("blah") || word.equals("bloh")) {
                assertEquals(1, count);
                validatedOutputLines++;
            }
        } else if (topicId == 2) {
            if (word.equals("bar")) {
                assertEquals(2, count);
                validatedOutputLines++;
            } else if (word.equals("bor")) {
                assertEquals(1, count);
                validatedOutputLines++;
            }
        }
    }

    return validatedOutputLines;
}

From source file:com.datasalt.pangool.examples.topicalwordcount.TestTopicalWordCount.java

License:Apache License

public static int assertOutput(String output, Configuration conf)
        throws NumberFormatException, IOException, InterruptedException {
    int validatedOutputLines = 0;

    Path outPath = new Path(output);
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(outPath.toUri(), conf), conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    while (reader.next(tuple)) {
        int topicId = (Integer) tuple.get("topic");
        String word = ((Utf8) tuple.get("word")).toString();
        int count = (Integer) tuple.get("count");
        if (topicId == 1) {
            if (word.equals("bar") || word.equals("foo")) {
                assertEquals(2, count);//from  w w  w.ja va 2s  .com
                validatedOutputLines++;
            } else if (word.equals("blah") || word.equals("bloh")) {
                assertEquals(1, count);
                validatedOutputLines++;
            }
        } else if (topicId == 2) {
            if (word.equals("bar")) {
                assertEquals(2, count);
                validatedOutputLines++;
            } else if (word.equals("bor")) {
                assertEquals(1, count);
                validatedOutputLines++;
            }
        }
    }
    reader.close();
    return validatedOutputLines;
}

From source file:com.datasalt.pangool.examples.topicalwordcount.TestTopicFingerprint.java

License:Apache License

@Test
public void test() throws Exception {
    trash(OUTPUT);//  ww  w. j  a va2s  .co  m

    Configuration conf = new Configuration();

    createInput(INPUT, conf);
    ToolRunner.run(getConf(), new TopicFingerprint(), new String[] { INPUT, OUTPUT, 2 + "" });

    Path outPath = new Path(OUTPUT + "/part-r-00000");
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    TupleFile.Reader reader = new TupleFile.Reader(fs, conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    // The order in the output file is deterministic (we have sorted by topic, count)
    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals("a", tuple.get("word").toString());

    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals("c", tuple.get("word").toString());

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals("a", tuple.get("word").toString());

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals("b", tuple.get("word").toString());

    // Check the named output

    reader.close();
    outPath = new Path(OUTPUT + "/" + TopicFingerprint.OUTPUT_TOTALCOUNT + "/" + "part-r-00000");
    reader = new TupleFile.Reader(fs, conf, outPath);
    tuple = new Tuple(reader.getSchema());

    reader.next(tuple);
    assertEquals(1, tuple.get("topic"));
    assertEquals(15, tuple.get("totalcount"));

    reader.next(tuple);
    assertEquals(2, tuple.get("topic"));
    assertEquals(19, tuple.get("totalcount"));

    reader.close();

    trash(INPUT, OUTPUT);
}

From source file:com.datasalt.pangool.examples.topicalwordcount.TestTopicFingerprint.java

License:Apache License

public void createInput(String input, Configuration conf) throws IOException, InterruptedException {
    Path inPath = new Path(input);
    FileSystem fs = FileSystem.get(inPath.toUri(), conf);
    TupleFile.Writer writer = new TupleFile.Writer(fs, conf, inPath, TopicalWordCount.getSchema());

    // Topic 1, words: { a, 10 } { b, 1 } , { c, 5 }
    // Top 2 words = a(10), c(5)
    ITuple tuple = new Tuple(TopicalWordCount.getSchema());
    tuple.set("word", "a");
    tuple.set("topic", 1);
    tuple.set("count", 10);
    writer.append(tuple);/*from w ww  . j  a  v a 2  s. c  o  m*/

    tuple.set("word", "b");
    tuple.set("topic", 1);
    tuple.set("count", 1);
    writer.append(tuple);

    tuple.set("word", "c");
    tuple.set("topic", 1);
    tuple.set("count", 5);
    writer.append(tuple);

    // Topic 2, words: { a, 10 } { b, 9 } , { c, 5 }
    // Top 2 words = a(10), b(9)
    tuple.set("word", "a");
    tuple.set("topic", 2);
    tuple.set("count", 10);
    writer.append(tuple);

    tuple.set("word", "b");
    tuple.set("topic", 2);
    tuple.set("count", 9);
    writer.append(tuple);

    tuple.set("word", "c");
    tuple.set("topic", 2);
    tuple.set("count", 5);
    writer.append(tuple);

    writer.close();
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components from the actual file name when creating
 * the zip file entry.//from  ww w .j av a 2  s. com
 * 
 * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where
 * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the
 * contents of the directory are added recursively.
 * 
 * @param zos
 *          The zip output stream
 * @param baseName
 *          The base name to use for the file name entry in the zip file
 * @param root
 *          The path to remove from <code>itemToZip</code> to make a relative path name
 * @param itemToZip
 *          The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip));
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}

From source file:com.datasalt.pangool.utils.DCUtils.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in the Distributed Cache.
 * <p>/*  ww  w . j ava  2  s  .c om*/
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link DCUtils#loadSerializedObjectInDC(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param serializeToLocalFile The local file where the instance will be serialized. It will be copied to the HDFS and removed.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void serializeToDC(Object obj, String serializeToLocalFile, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    File hadoopTmpDir = new File(conf.get("hadoop.tmp.dir"));
    if (!hadoopTmpDir.exists()) {
        hadoopTmpDir.mkdir();
    }
    File file = new File(hadoopTmpDir, serializeToLocalFile);
    FileSystem fS = FileSystem.get(conf);

    ObjectOutput out = new ObjectOutputStream(new FileOutputStream(file));
    out.writeObject(obj);
    out.close();

    if (fS.equals(FileSystem.getLocal(conf))) {
        return;
    }

    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF);
    if (tmpHdfsFolder == null) {
        // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
        // This folder will be used across the cluster for location the instances. This way, tasktrackers
        // that are being run as different user will still be able to locate this folder
        tmpHdfsFolder = conf.get("hadoop.tmp.dir");
        conf.set(HDFS_TMP_FOLDER_CONF, tmpHdfsFolder);
    }
    Path toHdfs = new Path(tmpHdfsFolder, serializeToLocalFile);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }
    FileUtil.copy(FileSystem.getLocal(conf), new Path(file + ""), FileSystem.get(conf), toHdfs, true, conf);
    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}

From source file:com.datasalt.pangool.utils.InstancesDistributor.java

License:Apache License

/**
 * Utility method for serializing an object and saving it in a way that later can be recovered
* anywhere in the cluster./*from w  w  w  . jav  a 2s.com*/
 * <p>
 * The file where it has been serialized will be saved into a Hadoop Configuration property so that you can call
 * {@link InstancesDistributor#loadInstance(Configuration, Class, String, boolean)} to re-instantiate the serialized instance.
 * 
 * @param obj The obj instance to serialize using Java serialization.
 * @param fileName The file name where the instance will be serialized.
 * @param conf The Hadoop Configuration.
 * @throws FileNotFoundException
 * @throws IOException
 * @throws URISyntaxException
 */
public static void distribute(Object obj, String fileName, Configuration conf)
        throws FileNotFoundException, IOException, URISyntaxException {

    FileSystem fS = FileSystem.get(conf);
    // set the temporary folder for Pangool instances to the temporary of the user that is running the Job
    // This folder will be used across the cluster for location the instances.
    // The default value can be changed by a user-provided one.
    String tmpHdfsFolder = conf.get(HDFS_TMP_FOLDER_CONF, DEFAULT_HDFS_TMP_FOLDER_CONF_VALUE);
    Path toHdfs = new Path(tmpHdfsFolder, fileName);
    if (fS.exists(toHdfs)) { // Optionally, copy to DFS if
        fS.delete(toHdfs, false);
    }

    ObjectOutput out = new ObjectOutputStream(fS.create(toHdfs));
    out.writeObject(obj);
    out.close();

    DistributedCache.addCacheFile(toHdfs.toUri(), conf);
}