Example usage for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String fileExtension = new LzopCodec().getDefaultExtension();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = (FileStatus) iterator.next();
        Path file = fileStatus.getPath();

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();/*from   w w w  . j av  a 2  s  .  co m*/
        } else {
            //read the index file
            LzoIndex index = readIndex(file, fs);
            indexes.put(file, index);
        }
    }

    return files;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Read the index of the lzo file./*from  ww w .j a v  a 2s . co  m*/
 * 
 * @param split
 *          Read the index of this file.
 * @param fs
 *          The index file is on this file system.
 * @throws IOException
 */
private LzoIndex readIndex(Path file, FileSystem fs) throws IOException {
    FSDataInputStream indexIn = null;
    try {
        Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX);
        if (!fs.exists(indexFile)) {
            // return empty index, fall back to the unsplittable mode
            return new LzoIndex();
        }

        long indexLen = fs.getFileStatus(indexFile).getLen();
        int blocks = (int) (indexLen / 8);
        LzoIndex index = new LzoIndex(blocks);
        indexIn = fs.open(indexFile);
        for (int i = 0; i < blocks; i++) {
            index.set(i, indexIn.readLong());
        }
        return index;
    } finally {
        if (indexIn != null) {
            indexIn.close();
        }
    }
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

/**
 * Index an lzo file to allow the input format to split them into separate map
 * jobs./*from w  ww.ja  v a2s . c  o  m*/
 * 
 * @param fs
 *          File system that contains the file.
 * @param lzoFile
 *          the lzo file to index.
 * @throws IOException
 */
public static void createIndex(FileSystem fs, Path lzoFile) throws IOException {

    Configuration conf = fs.getConf();
    CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
    CompressionCodec codec = factory.getCodec(lzoFile);
    ((Configurable) codec).setConf(conf);

    InputStream lzoIs = null;
    FSDataOutputStream os = null;
    Path outputFile = new Path(lzoFile.toString() + LzoTextInputFormat.LZO_INDEX_SUFFIX);
    Path tmpOutputFile = outputFile.suffix(".tmp");

    try {
        FSDataInputStream is = fs.open(lzoFile);
        os = fs.create(tmpOutputFile);
        LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor();
        // for reading the header
        lzoIs = codec.createInputStream(is, decompressor);

        int numChecksums = decompressor.getChecksumsCount();

        while (true) {
            // read and ignore, we just want to get to the next int
            int uncompressedBlockSize = is.readInt();
            if (uncompressedBlockSize == 0) {
                break;
            } else if (uncompressedBlockSize < 0) {
                throw new EOFException();
            }

            int compressedBlockSize = is.readInt();
            if (compressedBlockSize <= 0) {
                throw new IOException("Could not read compressed block size");
            }

            long pos = is.getPos();
            // write the pos of the block start
            os.writeLong(pos - 8);
            // seek to the start of the next block, skip any checksums
            is.seek(pos + compressedBlockSize + (4 * numChecksums));
        }
    } finally {
        if (lzoIs != null) {
            lzoIs.close();
        }

        if (os != null) {
            os.close();
        }
    }

    fs.rename(tmpOutputFile, outputFile);
}

From source file:com.hazelcast.jet.connector.hadoop.ReadHdfsPTest.java

License:Open Source License

@Test
public void testReadFile() throws Exception {
    Path path = writeToFile("key-1 value-1\n", "key-2 value-2\n", "key-3 value-3\n", "key-4 value-4\n");

    JetInstance instance = createJetMember();
    createJetMember();/*w ww .  j ava2s. c  o  m*/
    DAG dag = new DAG();
    Vertex source = dag.newVertex("source", readHdfs(path.toString())).localParallelism(4);
    Vertex sink = dag.newVertex("sink", writeList("sink")).localParallelism(1);
    dag.edge(between(source, sink));

    Future<Void> future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    IList<Map.Entry> list = instance.getList("sink");
    assertEquals(4, list.size());
    assertTrue(list.get(0).getValue().toString().contains("value"));
}

From source file:com.hazelcast.jet.connector.hadoop.ReadHdfsPTest.java

License:Open Source License

@Test
public void testReadFile_withMapping() throws Exception {
    Path path = writeToFile("key-1 value-1\n", "key-2 value-2\n", "key-3 value-3\n", "key-4 value-4\n");

    JetInstance instance = createJetMember();
    createJetMember();/*  w  ww .  ja va2s  . c o  m*/
    DAG dag = new DAG();
    Vertex source = dag.newVertex("source", readHdfs(path.toString(), (k, v) -> v.toString()))
            .localParallelism(4);
    Vertex sink = dag.newVertex("sink", writeList("sink")).localParallelism(1);
    dag.edge(between(source, sink));

    Future<Void> future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    IList<String> list = instance.getList("sink");
    assertEquals(4, list.size());
    assertTrue(list.get(0).contains("key"));
}

From source file:com.hazelcast.jet.connector.hadoop.WriteHdfsPTest.java

License:Open Source License

@Test
public void testWriteFile() throws Exception {
    int messageCount = 20;
    String mapName = randomMapName();
    JetInstance instance = createJetMember();
    createJetMember();/*from   w  w  w . j av a  2  s.c  o  m*/

    Map<Integer, Integer> map = IntStream.range(0, messageCount).boxed()
            .collect(Collectors.toMap(m -> m, m -> m));
    instance.getMap(mapName).putAll(map);

    DAG dag = new DAG();
    Vertex producer = dag.newVertex("producer", readMap(mapName)).localParallelism(1);

    Path path = getPath();
    Vertex consumer = dag.newVertex("consumer", writeHdfs(path.toString())).localParallelism(4);

    dag.edge(between(producer, consumer));

    Future<Void> future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    dag = new DAG();
    producer = dag.newVertex("producer", readHdfs(path.toString())).localParallelism(8);

    consumer = dag.newVertex("consumer", writeList("results")).localParallelism(1);

    dag.edge(between(producer, consumer));
    future = instance.newJob(dag).execute();
    assertCompletesEventually(future);

    IList<Object> results = instance.getList("results");
    assertEquals(messageCount, results.size());
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void standAlone() throws IOException {
    String absSrcDir = fs.makeQualified(srcDir).toUri().getPath();
    String absOutDir = fs.makeQualified(outDir).toUri().getPath();

    Text bucket = new Text(absSrcDir + "-0");

    List<Text> files = new ArrayList<Text>();

    FileStatus[] contents = fs.listStatus(new Path(absSrcDir));

    for (FileStatus content : contents) {
        if (!content.isDir()) {
            if (ignoredFiles != null) {
                // Check for files to skip
                ignoredFiles.reset(content.getPath().toUri().getPath());
                if (ignoredFiles.matches()) {
                    LOG.trace("Ignoring " + content.getPath().toString());
                    continue;
                }/*from   ww w .  ja  v  a  2  s  .  c  o m*/
            }
            files.add(new Text(content.getPath().toUri().getPath()));
        }
    }

    /*
     * Is the directory empty?
     */
    if (files.isEmpty()) {
        return;
    }

    /*
     * We trick the reducer into doing some work for us by setting these configuration properties.
     */
    job.set("mapred.tip.id", "task_000000000000_00000_r_000000");
    job.set("mapred.task.id", "attempt_000000000000_0000_r_000000_0");

    job.set("mapred.output.dir", absOutDir);

    /*
     * File output committer needs this.
     */
    fs.mkdirs(new Path(absOutDir, "_temporary"));

    CrushReducer reducer = new CrushReducer();

    reducer.configure(job);
    reducer.reduce(bucket, files.iterator(), new NullOutputCollector<Text, Text>(), Reporter.NULL);
    reducer.close();

    /*
     * Use a glob here because the temporary and task attempt work dirs have funny names.
     * Include a * at the end to cover wildcards for compressed files.
     */
    Path crushOutput = new Path(absOutDir + "/*/*/crush" + absSrcDir + "/" + dest.getName() + "*");

    FileStatus[] statuses = fs.globStatus(crushOutput);

    if (statuses == null || 1 != statuses.length) {
        throw new AssertionError("Did not find the expected output in " + crushOutput.toString());
    }

    rename(statuses[0].getPath(), dest.getParent(), dest.getName());
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

/**
 * Moves all crush input files to {@link #dest} and then moves the crush output file to {@link #srcDir}.
 *//* ww  w.ja v  a2s.c o m*/
private void swap(List<Path> crushInput, String crushFileName) throws IOException {

    if (crushInput.isEmpty()) {
        return;
    }

    print(Verbosity.INFO, format("\n\nSwapping %s", crushFileName));

    List<Path> movedSrc = new ArrayList<Path>(crushInput.size());
    List<Path> movedDest = new ArrayList<Path>(crushInput.size());

    Path crushedDir = crushInput.get(0).getParent();

    boolean crushFileNotInstalled = true;

    try {
        /*
         * Move each source file into the clone directory, replacing the root with the path of the clone dir.
         */
        for (Iterator<Path> iter = crushInput.iterator(); iter.hasNext();) {
            Path source = iter.next();

            /*
             * Remove the leading slash from the input file to create a path relative to the clone dir.
             */
            Path destPath = new Path(dest, source.toString().substring(1));

            rename(source, destPath.getParent(), null);

            iter.remove();

            movedSrc.add(source);
            movedDest.add(destPath);
        }

        /*
         * Install the crush output file now that all the source files have been moved to the clone dir. Sometimes the compression
         * codec messes with the names so watch out.
         */
        Path crushFile = new Path(crushFileName);

        rename(crushFile, crushedDir, null);

        crushFileNotInstalled = false;

    } finally {
        if (!crushInput.isEmpty()) {
            /*
             * We failed while moving the source files to the clone directory.
             */
            LOG.error(format(
                    "Failed while moving files into the clone directory and before installing the crush output file (%d moved and %d remaining)",
                    movedSrc.size(), crushInput.size()));

            StringBuilder sb = new StringBuilder("hadoop fs -mv ");

            for (int i = 0; i < movedSrc.size(); i++) {
                sb.append(" ");
                sb.append(movedDest.get(i));
            }

            sb.append(" ");
            sb.append(crushedDir);

            LOG.error("Execute the following to restore the file system to a good state: " + sb.toString());
        } else if (crushFileNotInstalled) {
            /*
             * We failed moving the crush output file to the source directory.
             */
            LOG.error(format("Failed while moving crush output file (%s) to the source directory (%s)",
                    crushFileName, crushedDir));
        }
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

/**
 * Strips out the scheme and authority./*  w w w.  j  a  v a2 s.  c om*/
 */
private String getPathPart(Path path) {
    pathMatcher.reset(path.toString());

    pathMatcher.matches();

    return pathMatcher.group(5);
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ?//from   www  . j  av a  2  s  .  co  m
 * 
 * @param srcFS
 *            
 * @param src
 *            ?
 * @param dst
 *            
 * @param queryStr
 *            
 * @param deleteSource
 *            ??
 * @param overwrite
 *            ????
 * @return boolean
 * @throws Exception
 */
private static boolean copyFromHDFSToFTP(Config config) throws Exception {
    // ?hdfs
    Configuration conf = new Configuration();
    FileSystem srcFS = FileSystem.get(conf);
    long start = System.currentTimeMillis();
    boolean isRename = config.isRenameUploaded();
    int retryTimes = config.getRetryTimes();
    // ?
    String dstPath = config.getDestDir();
    Path src = new Path(config.getSouceDir());
    FileStatus fileStatus = srcFS.getFileStatus(src);
    String subDir = null;
    if (fileStatus.isDirectory()) {// 
        if (isRename) {// ??rename
            subDir = Config.RENAME_DIR;
            srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
        }
        int threadNum = config.getThreadNum();
        // 
        ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        // ?ftp
        FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
        FTPClient ftpClient = ftpPool.borrowObject();
        // ?
        ftpClient.makeDirectory(dstPath);
        ftpPool.returnObject(ftpClient);
        // ??
        FileStatus contents[] = srcFS.listStatus(src);
        long beginFilter = 0;
        long endFileter = 0;

        if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                || config.getCommandLine().hasOption("t")) {// ?"["
            beginFilter = System.currentTimeMillis();
            Long[] timeRange = parseTimeRange(config.getCommandLine());
            contents = getNewContents(timeRange, contents);
            endFileter = System.currentTimeMillis();
        }
        // ?
        if (config.getCommandLine().hasOption("r")) {// "["??
            beginFilter = System.currentTimeMillis();
            contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
            endFileter = System.currentTimeMillis();
        }
        logger.info("total file count:" + contents.length);
        Map<String, String> fileNameMap = null;
        long beginSkip = 0;
        long endSkip = 0;
        boolean overwrite = true;
        if (config.getCommandLine().hasOption("o")) {
            overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
        }
        if (!overwrite) {// ?????
            beginSkip = System.currentTimeMillis();
            fileNameMap = getFileNameMap(dstPath, ftpPool);
            endSkip = System.currentTimeMillis();
        }
        int skiped = 0;

        List<Future<?>> futureList = new ArrayList<Future<?>>();
        for (int i = 0; i < contents.length; i++) {
            if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                // 
                skiped++;
                Log.info("skiped filename:" + contents[i].getPath().getName());
                continue;
            }
            if (contents[i].isDirectory()) {
                continue;
            }
            // ???
            Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                    new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                    retryTimes));
            futureList.add(future);
        }
        int transfered = 0;
        int failed = 0;
        for (Future<?> future : futureList) {
            Boolean computeResult = (Boolean) future.get();
            if (computeResult) {
                transfered++;
                if (transfered % 50 == 0 || transfered == contents.length) {
                    logger.info("have transfered:" + transfered + " files");
                }
            } else {
                failed++;
                logger.error("failed transter:" + failed + " files");
            }
        }
        // 
        threadPool.shutdown();
        // FTPCient
        ftpPool.close();
        // ****************
        logger.info("filter time:" + (endFileter - beginFilter) + " ms");
        if (!overwrite) {
            logger.info("skip time:" + (endSkip - beginSkip) + " ms");
        }
        logger.info("total file count:" + contents.length);
        logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

    } else {// 

        BufferedReader reader = null;
        FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
        FTPClient ftpClient = null;
        InputStream in = null;
        try {
            Path path = fileStatus.getPath();
            if (!path.getName().contains("log")) {

            }
            reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
            String str = null;

            ftpClient = facotry.makeObject();

            while ((str = reader.readLine()) != null) {
                String[] feilds = str.split("&");
                Path filePath = null;
                if (feilds.length == 2 && feilds[1] != "") {
                    filePath = new Path(feilds[1]);
                    in = srcFS.open(filePath);
                    boolean result = ftpClient.storeFile(dstPath, in);
                    System.out.println(ftpClient.getReplyCode());
                    if (result) {
                        logger.info(filePath.toString());
                    } else {
                        logger_failed.info(filePath.toString());
                    }
                } else {
                    continue;
                }

            }
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            in.close();
            reader.close();
            facotry.destroyObject(ftpClient);
        }

    }
    long end = System.currentTimeMillis();
    logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
    return true;
}