Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImpl.java

License:Apache License

/**
 * Stages the source file or folder to a Hadoop file system and sets their permission and replication value
 * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging
 * the archive./*  w  ww. ja va2  s  .  c  om*/
 *
 * @param source    File or folder to copy to the file system. If it is a folder all contents will be copied into
 *                  dest.
 * @param fs        Hadoop file system to store the contents of the archive in
 * @param dest      Destination to copy source into. If source is a file, the new file name will be exactly dest. If
 *                  source is a folder its contents will be copied into dest. For more info see {@link
 *                  FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}.
 * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown.
 * @throws IOException         Destination exists is not a directory
 * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
 */
public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic)
        throws IOException, KettleFileException {
    if (!source.exists()) {
        throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                "DistributedCacheUtil.SourceDoesNotExist", source));
    }

    if (fs.exists(dest)) {
        if (overwrite) {
            // It is a directory, clear it out
            fs.delete(dest, true);
        } else {
            throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                    "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
        }
    }

    // Use the same replication we'd use for submitting jobs
    short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

    if (source.getURL().toString().endsWith(CONFIG_PROPERTIES)) {
        copyConfigProperties(source, fs, dest);
    } else {
        Path local = new Path(source.getURL().getPath());
        fs.copyFromLocalFile(local, dest);
    }

    if (isPublic) {
        fs.setPermission(dest, PUBLIC_CACHED_FILE_PERMISSION);
    } else {
        fs.setPermission(dest, CACHED_FILE_PERMISSION);
    }
    fs.setReplication(dest, replication);
}

From source file:org.pentaho.hadoop.shim.hsp101.HadoopShim.java

License:Apache License

@Override
public void onLoad(HadoopConfiguration config, HadoopConfigurationFileSystemManager fsm) throws Exception {
    fsm.addProvider(config, "hdfs", config.getIdentifier(), new HDFSFileProvider());
    setDistributedCacheUtil(new DistributedCacheUtilImpl(config) {
        /**/*ww  w  .  j a  v  a  2  s  .  co  m*/
         * Default permission for cached files
         * <p/>
         * Not using FsPermission.createImmutable due to EOFExceptions when using it with Hadoop 0.20.2
         */
        private final FsPermission CACHED_FILE_PERMISSION = new FsPermission((short) 0755);

        public void addFileToClassPath(Path file, Configuration conf) throws IOException {
            String classpath = conf.get("mapred.job.classpath.files");
            conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
                    : classpath + getClusterPathSeparator() + file.toString());
            FileSystem fs = FileSystem.get(conf);
            URI uri = fs.makeQualified(file).toUri();

            DistributedCache.addCacheFile(uri, conf);
        }

        /**
         * Stages the source file or folder to a Hadoop file system and sets their permission and replication
         * value appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of
         * dest before staging the archive.
         *
         * @param source    File or folder to copy to the file system. If it is a folder all contents will be
         *                  copied into dest.
         * @param fs        Hadoop file system to store the contents of the archive in
         * @param dest      Destination to copy source into. If source is a file, the new file name will be
         *                  exactly dest. If source is a folder its contents will be copied into dest. For more
         *                  info see {@link FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path,
         *                  org.apache.hadoop.fs.Path)}.
         * @param overwrite Should an existing file or folder be overwritten? If not an exception will be
         *                  thrown.
         * @throws IOException         Destination exists is not a directory
         * @throws KettleFileException Source does not exist or destination exists and overwrite is false.
         */
        public void stageForCache(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException, KettleFileException {
            if (!source.exists()) {
                throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                        "DistributedCacheUtil.SourceDoesNotExist", source));
            }

            if (fs.exists(dest)) {
                if (overwrite) {
                    // It is a directory, clear it out
                    fs.delete(dest, true);
                } else {
                    throw new KettleFileException(BaseMessages.getString(DistributedCacheUtilImpl.class,
                            "DistributedCacheUtil.DestinationExists", dest.toUri().getPath()));
                }
            }

            // Use the same replication we'd use for submitting jobs
            short replication = (short) fs.getConf().getInt("mapred.submit.replication", 10);

            copyFile(source, fs, dest, overwrite);
            fs.setReplication(dest, replication);
        }

        private void copyFile(FileObject source, FileSystem fs, Path dest, boolean overwrite)
                throws IOException {
            if (source.getType() == FileType.FOLDER) {
                fs.mkdirs(dest);
                fs.setPermission(dest, CACHED_FILE_PERMISSION);
                for (FileObject fileObject : source.getChildren()) {
                    copyFile(fileObject, fs, new Path(dest, fileObject.getName().getBaseName()), overwrite);
                }
            } else {
                try (FSDataOutputStream fsDataOutputStream = fs.create(dest, overwrite)) {
                    IOUtils.copy(source.getContent().getInputStream(), fsDataOutputStream);
                    fs.setPermission(dest, CACHED_FILE_PERMISSION);
                }
            }
        }

        public String getClusterPathSeparator() {
            return System.getProperty("hadoop.cluster.path.separator", ",");
        }
    });
}

From source file:org.springframework.data.hadoop.fs.FsShell.java

License:Apache License

private void copyToLocal(final FileSystem srcFS, final Path src, final File dst, final boolean copyCrc)
        throws IOException {

    final String COPYTOLOCAL_PREFIX = "_copyToLocal_";

    /* Keep the structure similar to ChecksumFileSystem.copyToLocal(). 
    * Ideal these two should just invoke FileUtil.copy() and not repeat
    * recursion here. Of course, copy() should support two more options :
    * copyCrc and useTmpFile (may be useTmpFile need not be an option).
    *///from   w w  w .  ja v  a 2 s  . c o  m
    if (!srcFS.getFileStatus(src).isDir()) {
        if (dst.exists()) {
            // match the error message in FileUtil.checkDest():
            throw new IOException("Target " + dst + " already exists");
        }

        // use absolute name so that tmp file is always created under dest dir
        File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), COPYTOLOCAL_PREFIX, true);
        if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) {
            throw new IOException("Failed to copy " + src + " to " + dst);
        }

        if (!tmp.renameTo(dst)) {
            throw new IOException(
                    "Failed to rename tmp file " + tmp + " to local destination \"" + dst + "\".");
        }

        if (copyCrc) {
            if (!(srcFS instanceof ChecksumFileSystem)) {
                throw new IOException("Source file system does not have crc files");
            }

            ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS;
            File dstcs = FileSystem.getLocal(srcFS.getConf())
                    .pathToFile(csfs.getChecksumFile(new Path(dst.getCanonicalPath())));
            copyToLocal(csfs.getRawFileSystem(), csfs.getChecksumFile(src), dstcs, false);
        }
    } else {
        // once FileUtil.copy() supports tmp file, we don't need to mkdirs().
        dst.mkdirs();
        for (FileStatus path : srcFS.listStatus(src)) {
            copyToLocal(srcFS, path.getPath(), new File(dst, path.getPath().getName()), copyCrc);
        }
    }
}

From source file:org.springframework.data.hadoop.fs.HdfsResourceLoader.java

License:Apache License

/**
 * Constructs a new <code>HdfsResourceLoader</code> instance.
 *
 * @param fs Hadoop file system to use./* w ww . jav  a 2  s. c  o m*/
 */
public HdfsResourceLoader(FileSystem fs) {
    Assert.notNull(fs, "a non-null file-system required");
    this.fs = fs;
    internalFS = false;
    codecsFactory = new CompressionCodecFactory(fs.getConf());
}

From source file:org.talend.components.simplefileio.runtime.sinks.ParquetHdfsFileSink.java

License:Open Source License

@Override
protected boolean mergeOutput(FileSystem fs, String sourceFolder, String targetFile) {
    try {//from   w ww .ja v a2 s  . com
        FileStatus[] sourceStatuses = FileSystemUtil.listSubFiles(fs, sourceFolder);
        List<Path> sourceFiles = new ArrayList<>();
        for (FileStatus sourceStatus : sourceStatuses) {
            sourceFiles.add(sourceStatus.getPath());
        }
        FileMetaData mergedMeta = ParquetFileWriter.mergeMetadataFiles(sourceFiles, fs.getConf())
                .getFileMetaData();
        ParquetFileWriter writer = new ParquetFileWriter(fs.getConf(), mergedMeta.getSchema(),
                new Path(targetFile), ParquetFileWriter.Mode.CREATE);
        writer.start();
        for (Path input : sourceFiles) {
            writer.appendFile(fs.getConf(), input);
        }
        writer.end(mergedMeta.getKeyValueMetaData());
    } catch (Exception e) {
        LOG.error("Error when merging files in {}.\n{}", sourceFolder, e.getMessage());
        return false;
    }
    return true;
}

From source file:org.talend.components.simplefileio.runtime.sinks.UgiFileSinkBase.java

License:Open Source License

protected boolean mergeOutput(FileSystem fs, String sourceFolder, String targetFile) {
    // implement how to merge files, different between format
    try {//from  w  ww  . j a v  a 2 s .  c  o  m
        return FileUtil.copyMerge(fs, new Path(sourceFolder), fs, new Path(targetFile), false, fs.getConf(),
                "");
    } catch (Exception e) {
        LOG.error("Error when merging files in {}.\n{}", sourceFolder, e.getMessage());
        return false;
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given parquet.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 *//*from w  w  w .  j a va2s .c o  m*/
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(),
                new Path(path))) {
            IndexedRecord record = null;
            while (null != (record = reader.read())) {
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:sa.edu.kaust.fwindex.IntDocVectorsForwardIndex.java

License:Apache License

/**
 * Creates an <code>IntDocVectorsIndex</code> object.
 * /* ww w  .  j  a v  a  2 s  . co m*/
 * @param indexPath
 *            location of the index file
 * @param fs
 *            handle to the FileSystem
 * @throws IOException
 */
public IntDocVectorsForwardIndex(String origIndexPath, String fwindexPath, FileSystem fs) throws IOException {
    mFs = fs;
    mConf = fs.getConf();

    mOrigIndexPath = origIndexPath;
    sLogger.debug("mPath: " + mOrigIndexPath);

    String forwardIndexPath = fwindexPath;
    sLogger.debug("forwardIndexPath: " + forwardIndexPath);
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    mCount = 0;

    mPositions = new Hashtable<String, Long>();
    while (true) {
        //Terms are retireved until end of file is reached.
        try {
            //The two values (term and position) are written in a single string (see 
            //BuildIntDocVectorsForwardIndex.MyReducer.reduce). Here they are retrieved. 
            String[] inp = posInput.readUTF().split("\t");
            String k = inp[0];
            long l = Long.parseLong(inp[1]);
            mPositions.put(k, l);
        } catch (Exception e) {
            break;
        }
        mCount++;
    }
    sLogger.info("mCount: " + mCount);
}

From source file:sa.edu.kaust.twitter.index.PostingsForwardIndex.java

License:Apache License

/**
 * Creates an <code>IntDocVectorsIndex</code> object.
 * //from   w w  w  .j  a  va  2s  . com
 * @param indexPath
 *            location of the index file
 * @param fs
 *            handle to the FileSystem
 * @throws IOException 
 * @throws IOException
 */
public PostingsForwardIndex(String origIndexPath, String fwindexPath, FileSystem fs) throws IOException {
    mFs = fs;
    mConf = fs.getConf();

    mOrigIndexPath = origIndexPath;
    sLogger.debug("mPath: " + mOrigIndexPath);

    String forwardIndexPath = fwindexPath;
    sLogger.debug("forwardIndexPath: " + forwardIndexPath);
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    //mCount = posInput.readInt();

    //mPositions = new long[mCount];
    map = new HMapKL<String>();
    String term;
    long pos;
    int i = 0;
    System.out.println("Loading postings forward index ...");
    while (true) {
        try {
            term = posInput.readUTF();
            pos = posInput.readLong();
            map.put(term, pos);
        } catch (IOException e) {
            break;
        }
        i++;
        if (i % 1000000 == 0)
            System.out.println("loaded " + i + " entries ...");
    }
    System.out.println("done.");
}

From source file:sa.edu.kaust.twitter.index.TweetsForwardIndex.java

License:Apache License

/**
 * Creates an <code>IntDocVectorsIndex</code> object.
 * /*from w ww .j a v  a2  s  .  c  o m*/
 * @param indexPath
 *            location of the index file
 * @param fs
 *            handle to the FileSystem
 * @throws IOException 
 * @throws IOException
 */
public TweetsForwardIndex(String origIndexPath, String fwindexPath, FileSystem fs) throws IOException {
    mFs = fs;
    mConf = fs.getConf();

    mOrigIndexPath = origIndexPath;
    sLogger.debug("mPath: " + mOrigIndexPath);

    String forwardIndexPath = fwindexPath;
    sLogger.debug("forwardIndexPath: " + forwardIndexPath);
    FSDataInputStream posInput = fs.open(new Path(forwardIndexPath));

    //mCount = posInput.readInt();

    //mPositions = new long[mCount];
    map = new HMapKL<Long>();
    long tweetID;
    long pos;
    int i = 0;
    System.out.println("Loading tweets forward index ...");
    while (true) {
        try {
            tweetID = posInput.readLong();
            pos = posInput.readLong();
            map.put(tweetID, pos);
        } catch (IOException e) {
            break;
        }
        i++;
        if (i % 1000000 == 0) {
            //if(i % 10 == 0){
            //System.out.println(tweetID+"\t"+pos);
            System.out.println("loaded " + i + " entries ...");
        }
    }
    System.out.println("done (" + i + " entries).");
}