Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.hdfs.concat.clean.Clean.java

License:Apache License

public int cleanup(Path p) {
    try {/*from w  ww . j  a  v  a2s. c  om*/
        if (fs.isFile(p)) {
            if (conf.get(TARGET_EXPR) != null) {
                if (p.getName().matches(conf.get(TARGET_EXPR))) {
                    warnOrDelete(p);
                }
            }
            if (conf.get(CUTTOFF_MILLIS) != null) {
                if (fs.getFileStatus(p).getModificationTime() < cutoff) {
                    warnOrDelete(p);
                }
            }
        }

        if (fs.isDirectory(p)) {
            for (FileStatus stat : fs.listStatus(p)) {
                cleanup(stat.getPath());
            }
            if (fs.listStatus(p).length == 0) {
                if (conf.get(TARGET_EXPR) != null) {
                    if (p.getName().matches(conf.get(TARGET_EXPR))) {
                        warnOrDelete(p);
                    }
                }
                if (conf.get(CUTTOFF_MILLIS) != null) {
                    if (fs.getFileStatus(p).getModificationTime() < cutoff) {
                        warnOrDelete(p);
                    }
                }
            }
        }
    } catch (IOException e) {
        System.out.println("exception " + e);
        return 7;
    }
    return 0;
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void standAlone() throws IOException {
    String absSrcDir = fs.makeQualified(srcDir).toUri().getPath();
    String absOutDir = fs.makeQualified(outDir).toUri().getPath();

    Text bucket = new Text(absSrcDir + "-0");

    List<Text> files = new ArrayList<Text>();

    FileStatus[] contents = fs.listStatus(new Path(absSrcDir));

    for (FileStatus content : contents) {
        if (!content.isDir()) {
            if (ignoredFiles != null) {
                // Check for files to skip
                ignoredFiles.reset(content.getPath().toUri().getPath());
                if (ignoredFiles.matches()) {
                    LOG.trace("Ignoring " + content.getPath().toString());
                    continue;
                }/*  w  ww  .j a  v a  2  s.co m*/
            }
            files.add(new Text(content.getPath().toUri().getPath()));
        }
    }

    /*
     * Is the directory empty?
     */
    if (files.isEmpty()) {
        return;
    }

    /*
     * We trick the reducer into doing some work for us by setting these configuration properties.
     */
    job.set("mapred.tip.id", "task_000000000000_00000_r_000000");
    job.set("mapred.task.id", "attempt_000000000000_0000_r_000000_0");

    job.set("mapred.output.dir", absOutDir);

    /*
     * File output committer needs this.
     */
    fs.mkdirs(new Path(absOutDir, "_temporary"));

    CrushReducer reducer = new CrushReducer();

    reducer.configure(job);
    reducer.reduce(bucket, files.iterator(), new NullOutputCollector<Text, Text>(), Reporter.NULL);
    reducer.close();

    /*
     * Use a glob here because the temporary and task attempt work dirs have funny names.
     * Include a * at the end to cover wildcards for compressed files.
     */
    Path crushOutput = new Path(absOutDir + "/*/*/crush" + absSrcDir + "/" + dest.getName() + "*");

    FileStatus[] statuses = fs.globStatus(crushOutput);

    if (statuses == null || 1 != statuses.length) {
        throw new AssertionError("Did not find the expected output in " + crushOutput.toString());
    }

    rename(statuses[0].getPath(), dest.getParent(), dest.getName());
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

private void cloneOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    /*//from w w w . j a v  a 2s  .  c o m
     * Initialize to empty list, in which case swap() will be a no-op. The reference is then replaced with a real list, which is
     * used in the subsequent iterations.
     */
    List<Path> crushInput = emptyList();

    Text srcFile = new Text();
    Text crushOut = new Text();
    Text prevCrushOut = new Text();

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                if (!crushOut.equals(prevCrushOut)) {
                    swap(crushInput, prevCrushOut.toString());

                    prevCrushOut.set(crushOut);
                    crushInput = new LinkedList<Path>();
                }

                crushInput.add(new Path(srcFile.toString()));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }

        swap(crushInput, prevCrushOut.toString());
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

/**
 * Moves the skipped files to the output directory. Called when operation in normal (non-clone) mode.
 *///from w w w . j  a v  a 2  s .  c  o m
private void moveOutput() throws IOException {

    List<FileStatus> listStatus = getOutputMappings();

    Text srcFile = new Text();
    Text crushOut = new Text();

    Set<String> crushOutputFiles = new HashSet<String>(nBuckets);

    for (FileStatus partFile : listStatus) {
        Path path = partFile.getPath();

        Reader reader = new Reader(fs, path, fs.getConf());

        try {
            while (reader.next(srcFile, crushOut)) {
                crushOutputFiles.add(new Path(crushOut.toString()).toUri().getPath());
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn("Trapped exception when closing " + path, e);
            }
        }
    }

    assert crushOutputFiles.size() == nBuckets;

    /*
     * The crushoutput files will appear in a subdirectory of the output directory. The subdirectory will be the full path of the
     * input directory that was crushed. E.g.
     *
     * Crush input:
     * /user/me/input/dir1/file1
     * /user/me/input/dir1/file2
     * /user/me/input/dir2/file3
     * /user/me/input/dir2/file4
     * /user/me/input/dir3/dir4/file5
     * /user/me/input/dir3/dir4/file6
     *
     * Crush output:
     * /user/me/output/user/me/input/dir1/crushed_file ...
     * /user/me/output/user/me/input/dir2/crushed_file ...
     * /user/me/output/user/me/input/dir2/dir3/dir4/crushed_file ...
     *
     * We need to collapse this down to:
     * /user/me/output/dir1/crushed_file ...
     * /user/me/output/dir2/crushed_file ...
     * /user/me/output/dir2/dir3/dir4/crushed_file ...
     */
    String srcDirName = fs.makeQualified(srcDir).toUri().getPath();

    String destName = fs.makeQualified(dest).toUri().getPath();
    String partToReplace = fs.makeQualified(outDir).toUri().getPath() + "/crush" + srcDirName;

    print(Verbosity.INFO, "\n\nCopying crush files to " + destName);

    for (String crushOutputFile : crushOutputFiles) {
        Path srcPath = new Path(crushOutputFile);
        Path destPath = new Path(destName + crushOutputFile.substring(partToReplace.length())).getParent();

        rename(srcPath, destPath, null);
    }

    print(Verbosity.INFO, "\n\nMoving skipped files to " + destName);

    /*
     * Don't forget to move the files that were not crushed to the output dir so that the output dir has all the data that was in
     * the input dir, the difference being there are fewer files in the output dir.
     */
    for (String name : skippedFiles) {
        Path srcPath = new Path(name);
        Path destPath = new Path(destName + name.substring(srcDirName.length())).getParent();

        rename(srcPath, destPath, null);
    }
}

From source file:com.hdfs.concat.crush.Crush.java

License:Apache License

void writeDirs() throws IOException {

    print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath());

    FileStatus status = fs.getFileStatus(srcDir);

    Path tmpIn = new Path(tmpDir, "in");

    bucketFiles = new Path(tmpIn, "dirs");
    partitionMap = new Path(tmpIn, "partition-map");
    counters = new Path(tmpIn, "counters");

    skippedFiles = new HashSet<String>();

    /*/*from   w w w .j a v a 2 s  .  c o  m*/
     * Prefer the path returned by the status because it is always fully qualified.
     */
    List<Path> dirs = asList(status.getPath());

    Text key = new Text();
    Text value = new Text();

    Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class,
            CompressionType.BLOCK);

    int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks"));

    Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false);
    partitionBucketer.reset("partition-map");

    jobCounters = new Counters();

    try {
        while (!dirs.isEmpty()) {
            List<Path> nextLevel = new LinkedList<Path>();

            for (Path dir : dirs) {
                jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1);

                print(Verbosity.INFO, "\n\n" + dir.toUri().getPath());

                FileStatus[] contents = fs.listStatus(dir, new PathFilter() {
                    @Override
                    public boolean accept(Path testPath) {
                        if (ignoredFiles == null)
                            return true;
                        ignoredFiles.reset(testPath.toUri().getPath());
                        return !ignoredFiles.matches();
                    }

                });

                if (contents == null || contents.length == 0) {
                    print(Verbosity.INFO, " is empty");

                    jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                } else {
                    List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length);
                    Set<String> uncrushedFiles = new HashSet<String>(contents.length);

                    long crushableBytes = 0;

                    /*
                     * Queue sub directories for subsequent inspection and examine the files in this directory.
                     */
                    for (FileStatus content : contents) {
                        Path path = content.getPath();

                        if (content.isDir()) {
                            nextLevel.add(path);
                        } else {
                            boolean changed = uncrushedFiles.add(path.toUri().getPath());

                            assert changed : path.toUri().getPath();

                            long fileLength = content.getLen();

                            if (fileLength <= maxEligibleSize) {
                                crushables.add(content);
                                crushableBytes += fileLength;
                            }
                        }
                    }

                    /*
                     * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the
                     * number of files we found.
                     */
                    if (!uncrushedFiles.isEmpty()) {
                        if (-1 == findMatcher(dir)) {
                            throw new IllegalArgumentException(
                                    "Could not find matching regex for directory: " + dir);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size());
                    }

                    if (0 == crushableBytes) {
                        print(Verbosity.INFO, " has no crushable files");

                        jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                    } else {
                        /*
                         * We found files to consider for crushing.
                         */
                        long nBlocks = crushableBytes / dfsBlockSize;

                        if (nBlocks * dfsBlockSize != crushableBytes) {
                            nBlocks++;
                        }

                        /*
                         * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory.
                         */
                        long dirBuckets = nBlocks / maxFileBlocks;

                        if (dirBuckets * maxFileBlocks != nBlocks) {
                            dirBuckets++;
                        }

                        if (dirBuckets > Integer.MAX_VALUE) {
                            throw new AssertionError("Too many buckets: " + dirBuckets);
                        }

                        Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs);

                        directoryBucketer.reset(getPathPart(dir));

                        for (FileStatus file : crushables) {
                            directoryBucketer.add(new FileStatusHasSize(file));
                        }

                        List<Bucket> crushFiles = directoryBucketer.createBuckets();

                        if (crushFiles.isEmpty()) {
                            jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1);
                        } else {
                            nBuckets += crushFiles.size();

                            jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1);

                            print(Verbosity.INFO, " => " + crushFiles.size() + " output files");

                            /*
                             * Write out the mapping between a bucket and a file.
                             */
                            for (Bucket crushFile : crushFiles) {
                                String bucketId = crushFile.name();

                                List<String> bucketFiles = crushFile.contents();

                                print(Verbosity.INFO,
                                        format("\n  Output %s will include %,d input bytes from %,d files",
                                                bucketId, crushFile.size(), bucketFiles.size()));

                                key.set(bucketId);

                                for (String f : bucketFiles) {
                                    boolean changed = uncrushedFiles.remove(f);

                                    assert changed : f;

                                    pathMatcher.reset(f);

                                    pathMatcher.matches();

                                    value.set(pathMatcher.group(5));

                                    writer.append(key, value);

                                    /*
                                     * Print the input file with four leading spaces.
                                     */
                                    print(Verbosity.VERBOSE, "\n    " + f);
                                }

                                jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size());

                                partitionBucketer.add(crushFile);
                            }
                        }
                    }

                    if (!uncrushedFiles.isEmpty()) {
                        print(Verbosity.INFO, "\n\n  Skipped " + uncrushedFiles.size() + " files");

                        for (String uncrushed : uncrushedFiles) {
                            print(Verbosity.VERBOSE, "\n    " + uncrushed);
                        }

                        jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size());
                    }

                    skippedFiles.addAll(uncrushedFiles);
                }
            }

            dirs = nextLevel;
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + bucketFiles, e);
        }
    }

    /*
     * Now that we have processed all the directories, write the partition map.
     */
    List<Bucket> partitions = partitionBucketer.createBuckets();

    assert partitions.size() <= numPartitions;

    writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class);

    IntWritable partNum = new IntWritable();

    try {
        for (Bucket partition : partitions) {
            String partitionName = partition.name();

            partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1)));

            for (String bucketId : partition.contents()) {
                key.set(bucketId);

                writer.append(key, partNum);
            }
        }
    } finally {
        try {
            writer.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }

    DataOutputStream countersStream = fs.create(this.counters);

    try {
        jobCounters.write(countersStream);
    } finally {
        try {
            countersStream.close();
        } catch (Exception e) {
            LOG.error("Trapped exception during close: " + partitionMap, e);
        }
    }
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ?//from w w w  .  j av  a  2s  . c  om
 * 
 * @param srcFS
 *            
 * @param src
 *            ?
 * @param dst
 *            
 * @param queryStr
 *            
 * @param deleteSource
 *            ??
 * @param overwrite
 *            ????
 * @return boolean
 * @throws Exception
 */
private static boolean copyFromHDFSToFTP(Config config) throws Exception {
    // ?hdfs
    Configuration conf = new Configuration();
    FileSystem srcFS = FileSystem.get(conf);
    long start = System.currentTimeMillis();
    boolean isRename = config.isRenameUploaded();
    int retryTimes = config.getRetryTimes();
    // ?
    String dstPath = config.getDestDir();
    Path src = new Path(config.getSouceDir());
    FileStatus fileStatus = srcFS.getFileStatus(src);
    String subDir = null;
    if (fileStatus.isDirectory()) {// 
        if (isRename) {// ??rename
            subDir = Config.RENAME_DIR;
            srcFS.mkdirs(new Path(fileStatus.getPath(), subDir));
        }
        int threadNum = config.getThreadNum();
        // 
        ExecutorService threadPool = Executors.newFixedThreadPool(threadNum);
        // ?ftp
        FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig()));
        FTPClient ftpClient = ftpPool.borrowObject();
        // ?
        ftpClient.makeDirectory(dstPath);
        ftpPool.returnObject(ftpClient);
        // ??
        FileStatus contents[] = srcFS.listStatus(src);
        long beginFilter = 0;
        long endFileter = 0;

        if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h")
                || config.getCommandLine().hasOption("t")) {// ?"["
            beginFilter = System.currentTimeMillis();
            Long[] timeRange = parseTimeRange(config.getCommandLine());
            contents = getNewContents(timeRange, contents);
            endFileter = System.currentTimeMillis();
        }
        // ?
        if (config.getCommandLine().hasOption("r")) {// "["??
            beginFilter = System.currentTimeMillis();
            contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents);
            endFileter = System.currentTimeMillis();
        }
        logger.info("total file count:" + contents.length);
        Map<String, String> fileNameMap = null;
        long beginSkip = 0;
        long endSkip = 0;
        boolean overwrite = true;
        if (config.getCommandLine().hasOption("o")) {
            overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim());
        }
        if (!overwrite) {// ?????
            beginSkip = System.currentTimeMillis();
            fileNameMap = getFileNameMap(dstPath, ftpPool);
            endSkip = System.currentTimeMillis();
        }
        int skiped = 0;

        List<Future<?>> futureList = new ArrayList<Future<?>>();
        for (int i = 0; i < contents.length; i++) {
            if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) {
                // 
                skiped++;
                Log.info("skiped filename:" + contents[i].getPath().getName());
                continue;
            }
            if (contents[i].isDirectory()) {
                continue;
            }
            // ???
            Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(),
                    new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir,
                    retryTimes));
            futureList.add(future);
        }
        int transfered = 0;
        int failed = 0;
        for (Future<?> future : futureList) {
            Boolean computeResult = (Boolean) future.get();
            if (computeResult) {
                transfered++;
                if (transfered % 50 == 0 || transfered == contents.length) {
                    logger.info("have transfered:" + transfered + " files");
                }
            } else {
                failed++;
                logger.error("failed transter:" + failed + " files");
            }
        }
        // 
        threadPool.shutdown();
        // FTPCient
        ftpPool.close();
        // ****************
        logger.info("filter time:" + (endFileter - beginFilter) + " ms");
        if (!overwrite) {
            logger.info("skip time:" + (endSkip - beginSkip) + " ms");
        }
        logger.info("total file count:" + contents.length);
        logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped);

    } else {// 

        BufferedReader reader = null;
        FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig());
        FTPClient ftpClient = null;
        InputStream in = null;
        try {
            Path path = fileStatus.getPath();
            if (!path.getName().contains("log")) {

            }
            reader = new BufferedReader(new FileReader(new File(path.toUri().getPath())));
            String str = null;

            ftpClient = facotry.makeObject();

            while ((str = reader.readLine()) != null) {
                String[] feilds = str.split("&");
                Path filePath = null;
                if (feilds.length == 2 && feilds[1] != "") {
                    filePath = new Path(feilds[1]);
                    in = srcFS.open(filePath);
                    boolean result = ftpClient.storeFile(dstPath, in);
                    System.out.println(ftpClient.getReplyCode());
                    if (result) {
                        logger.info(filePath.toString());
                    } else {
                        logger_failed.info(filePath.toString());
                    }
                } else {
                    continue;
                }

            }
        } catch (Exception e) {
            e.printStackTrace();

        } finally {
            in.close();
            reader.close();
            facotry.destroyObject(ftpClient);
        }

    }
    long end = System.currentTimeMillis();
    logger.info("finished transfer,total time:" + (end - start) / 1000 + "s");
    return true;
}

From source file:com.hdfstoftp.main.HdfsToFtp.java

/**
 * ??/*from w ww. ja  v a  2 s .  c  o  m*/
 * 
 * @param queryStr
 * @param contents
 * @return FileStatus[]
 */
public static FileStatus[] getFilterContents(String reg, FileStatus[] contents) {

    Pattern pattern = Pattern.compile(reg);
    List<FileStatus> statusList = new ArrayList<FileStatus>();
    for (FileStatus status : contents) {
        if (!status.isDirectory()) {
            String fileName = status.getPath().getName();
            Matcher matcher = pattern.matcher(fileName);
            if (matcher.matches()) {
                statusList.add(status);
            }
        }
    }
    return statusList.toArray(new FileStatus[statusList.size()]);
}

From source file:com.hortonworks.historian.nifi.reporter.HistorianDeanReporter.java

License:Apache License

@Override
public void onTrigger(ReportingContext reportingContext) {
    // create the Atlas client if we don't have one
    /*//from w  ww  .j a  v a 2s. c  om
    Properties props = System.getProperties();
     props.setProperty("atlas.conf", "/usr/hdp/current/atlas-client/conf");
     getLogger().info("***************** atlas.conf has been set to: " + props.getProperty("atlas.conf"));
    */
    inputs = new ArrayList<Referenceable>();
    outputs = new ArrayList<Referenceable>();
    //EventAccess eventAccess = reportingContext.getEventAccess();
    //int pageSize = reportingContext.getProperty(ACTION_PAGE_SIZE).asInteger();
    lateDataRoot = reportingContext.getProperty(LATE_DATA_ROOT).getValue();
    lateDataTasksPath = lateDataRoot + "/tasks";
    atlasUrl = reportingContext.getProperty(ATLAS_URL).getValue();
    nifiUrl = reportingContext.getProperty(NIFI_URL).getValue();
    nameNodeUrl = reportingContext.getProperty(NAME_NODE_URL).getValue();
    druidBrokerUrl = reportingContext.getProperty(DRUID_BROKER_HTTP_ENDPOINT).getValue();
    druidOverlordUrl = reportingContext.getProperty(DRUID_OVERLORD_HTTP_ENDPOINT).getValue();
    hiveServerUri = reportingContext.getProperty(HIVE_SERVER_CONNECTION_STRING).getValue();
    TAG_DIMENSION_NAME = reportingContext.getProperty(HISTORIAN_TAG_DIMENSION).getValue();
    //druidMetaUri = reportingContext.getProperty(DRUID_METASTORE_CONNECTION_STRING).getValue();
    String[] atlasURL = { atlasUrl };

    if (atlasClient == null) {
        getLogger().info("Creating new Atlas client for {}", new Object[] { atlasUrl });
        atlasClient = new AtlasClient(atlasURL, basicAuth);
    }

    if (atlasVersion == 0.0) {
        atlasVersion = Double.valueOf(getAtlasVersion(atlasUrl + "/api/atlas/admin/version", basicAuth));
        getLogger().info("********** Atlas Version is: " + atlasVersion);
    }

    getLogger().info("********** Number of Reports Sent: " + timesTriggered);
    if (timesTriggered == 0) {
        String hiveUsername = "hive";
        String hivePassword = "hive";

        try {
            getLogger().info("********** Establishing Connection to HDFS...");
            String hdfsPath = nameNodeUrl + "/";
            Configuration conf = new Configuration();
            conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
            conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
            fs = FileSystem.get(new URI(hdfsPath), conf);
            //createHDFSDirectory(lateDataRoot);
            createHDFSDirectory(lateDataTasksPath);

            getLogger().info("********** Checking for Unresolved Indexing Tasks...");
            FileStatus[] fileStatus = fs.listStatus(new Path(lateDataTasksPath));
            for (FileStatus status : fileStatus) {
                if (status.isDirectory()) {
                    String[] address = status.getPath().toString().split("/");
                    String currentPath = status.getPath().toString();
                    String currentDirName = address[address.length - 1];
                    String currentTaskId = currentDirName.replace("|", ":");
                    String ingestSpec = readHDFSFile(currentPath + "/ingestSpec");
                    List<String> sourceData = Arrays
                            .asList(readHDFSFile(currentPath + "/sourceData").split(","));
                    getLogger().info("********** Loading Unresolved Indexing Task:" + currentTaskId);
                    Map<String, Object> currentTaskMetaData = new HashMap<String, Object>();
                    currentTaskMetaData.put("ingestSpec", ingestSpec);
                    currentTaskMetaData.put("sourceData", sourceData);
                    deltaIndexTasks.put(currentTaskId, currentTaskMetaData);
                }
            }

            getLogger().info("********** Establishing Connection to Hive Server...");
            Class.forName("org.apache.hive.jdbc.HiveDriver");
            hiveConnection = DriverManager.getConnection(hiveServerUri, hiveUsername, hivePassword);

            getLogger().info("********** Create Business Taxonomy Terms...");
            String termPath = "/Catalog/terms/Unassigned";
            String termDefinition = "{\"name\":\"Unassigned\",\"description\":\"\"}";
            createBusinessTerm(termPath, termDefinition);

            getLogger().info("********** Checking if data model has been created...");
            /*
            try {
               atlasClient.getType(HistorianDataTypes.TAG_DIMENSION.getName());
               getLogger().info("********************* Trait: " + HistorianDataTypes.TAG_DIMENSION.getName() + " is already present");
            } catch (AtlasServiceException e) {
               getLogger().info("***************** Creating " + HistorianDataTypes.TAG_DIMENSION.getName() + " Trait...");
               atlasClient.createTraitType(HistorianDataTypes.TAG_DIMENSION.getName());
            }*/
            String historianDataModelJSON = generateHistorianDataModel();
            getLogger().info("********** Historian Data Model as JSON = " + historianDataModelJSON);
            //atlasClient.createType(historianDataModelJSON);
            getLogger().info("********** Created Types: " + atlasClient.createType(historianDataModelJSON));

            updateHiveColumnClassAttributes();

        } catch (AtlasServiceException e) {
            e.printStackTrace();
        } catch (AtlasException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }
    timesTriggered++;

    getLogger().info(
            "********** Looking for Druid Datasources to expose as Hive Tables or update with new information...");
    Iterator<String> resultIterator = getDruidDataSourceList().iterator();
    while (resultIterator.hasNext()) {
        String dataSource = resultIterator.next();
        dataSourceDetails.put(dataSource, getDruidDataSourceDetails(dataSource));

        getLogger().info("********** Exposing Druid Data Source: " + dataSource);
        exposeDruidDataSourceAsHiveTable(dataSource);

        getLogger().info("********** Update Atlas Hive Tables and Column for Druid Data Source: " + dataSource);
        updateDataSourceHiveColumnAttributes(dataSource);
    }

    getLogger().info("********** Checking for Late Arriving Data...");
    List<String> dataSourceExclusions = new ArrayList<String>();
    List<String> deletedTasks = new ArrayList<String>();
    Map<String, Object> newTasks = new HashMap<String, Object>();
    for (String taskId : deltaIndexTasks.keySet()) {
        String status = getIndexTaskStatus(taskId);
        if (status.equalsIgnoreCase("SUCCESS")) {
            getLogger().info("********** Indexing Task " + taskId
                    + " completed successfully, removing source data and task meta data...");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            Iterator<String> currentSourceObjectIterator = sourceDataList.iterator();
            while (currentSourceObjectIterator.hasNext()) {
                String currentSourceObject = currentSourceObjectIterator.next();
                deleteHDFSObject(currentSourceObject);
            }
            deleteHDFSObject(lateDataTasksPath + "/" + taskId.replace(":", "__"));
            deletedTasks.add(taskId);
        } else if (status.equalsIgnoreCase("PENDING") || status.equalsIgnoreCase("RUNNING")) {
            getLogger().info("********** Indexing Task " + taskId + " is currently " + status
                    + ", excluding source data from eligibility for new indexing tasks");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            dataSourceExclusions.addAll(sourceDataList);
        } else {
            getLogger().info("********** Indexing Task " + taskId + " is in " + status
                    + " state, excluding source data from eligibility for new indexing tasks");
            getLogger().info(
                    "********** Obtain task logs from Druid Overlord Console, address the problem, and then restart the task manually...");
            List<String> sourceDataList = (List<String>) ((Map) deltaIndexTasks.get(taskId)).get("sourceData");
            dataSourceExclusions.addAll(sourceDataList);
            String ingestSpec = (String) ((Map) deltaIndexTasks.get(taskId)).get("ingestSpec");
            String newTaskId = createDruidIndexingTask(ingestSpec);
            renameHDFSObject(lateDataTasksPath + "/" + taskId.replace(":", "__"),
                    lateDataTasksPath + "/" + newTaskId.replace(":", "__"));
            newTasks.put(newTaskId, ((Map) deltaIndexTasks.get(taskId)));
            deletedTasks.add(taskId);
        }
    }
    deltaIndexTasks.putAll(newTasks);
    deltaIndexTasks.keySet().removeAll(deletedTasks);
    indexLateData(dataSourceExclusions);
    getLogger().info("********** Done...");
}

From source file:com.hortonworks.historian.nifi.reporter.HistorianDeanReporter.java

License:Apache License

private void indexLateData(List<String> dataSourceExclusions) {
    String nifiControllersUrl = nifiUrl + "/nifi-api/flow/process-groups/root/controller-services";

    try {//from   ww w  .j av  a  2  s . co m
        JSONArray controllers = getJSONFromUrl(nifiControllersUrl, basicAuth)
                .getJSONArray("controllerServices");
        getLogger().info("********** Getting List of Druid Tranquility Controllers...");
        for (int i = 0; i < controllers.length(); i++) {
            JSONObject currentController = controllers.getJSONObject(i).getJSONObject("component");
            String currentControllerType = currentController.getString("type");
            if (currentControllerType
                    .equalsIgnoreCase("com.hortonworks.nifi.controller.DruidTranquilityController")) {
                String lateDataPath = lateDataRoot + "/" + currentController.getJSONObject("properties")
                        .getString("query_granularity").toLowerCase() + "/";
                getLogger().info("********** Checking for Late Arriving Data at HDFS Path: " + lateDataPath);
                if (fs.exists(new Path(lateDataPath))) {
                    FileStatus[] fileStatus = fs.listStatus(new Path(lateDataPath));
                    List<Date> dates = new ArrayList<Date>();
                    List<String> sourceData = new ArrayList<String>();
                    for (FileStatus status : fileStatus) {
                        String[] address = status.getPath().toString().split("/");
                        String currentBin = address[address.length - 1];
                        Date binDate = new SimpleDateFormat("yyyy-MM-dd-HH-mm").parse(currentBin);
                        sourceData.add(lateDataPath + currentBin);
                        dates.add(binDate);
                    }
                    ((Collection<?>) sourceData).removeAll(dataSourceExclusions);
                    getLogger().info("********** Detected " + sourceData.size()
                            + " bins of relevant late data, initiating Delta Indexing task...");

                    if (fileStatus.length > 0 && sourceData.size() > 0) {
                        String intervalStart = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
                                .format(Collections.min(dates));
                        String intervalEnd = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
                                .format(Collections.max(dates));
                        String bins = String.join(",", sourceData);
                        JSONArray dimensionsList = new JSONArray(Arrays.asList(currentController
                                .getJSONObject("properties").getString("dimensions_list").split(",")));
                        String ingestSpec = "{" + "     \"type\" : \"index_hadoop\"," + "     \"spec\" : {"
                                + "      \"dataSchema\" : {" + "         \"dataSource\": \""
                                + currentController.getJSONObject("properties").getString("data_source") + "\","
                                + "         \"parser\" : {" + "            \"type\" : \"hadoopyString\","
                                + "            \"parseSpec\" : {" + "               \"format\" : \"json\","
                                + "               \"timestampSpec\" : {" + "                  \"column\" : \""
                                + currentController.getJSONObject("properties").getString("timestamp_field")
                                + "\"," + "                  \"format\" : \"auto\"" + "               },"
                                + "               \"dimensionsSpec\" : {" + "                  \"dimensions\": "
                                + dimensionsList + "               }" + "            }" + "         },"
                                + "         \"metricsSpec\" : "
                                + currentController.getJSONObject("properties")
                                        .getString("aggregators_descriptor")
                                + "," + "         \"granularitySpec\" : {"
                                + "            \"type\" : \"uniform\","
                                + "            \"segmentGranularity\" : \""
                                + currentController.getJSONObject("properties").getString("segment_granularity")
                                + "\"," + "            \"queryGranularity\" : \""
                                + currentController.getJSONObject("properties").getString("query_granularity")
                                + "\"," + "            \"intervals\": [\"" + intervalStart + "/" + intervalEnd
                                + "\"]" + "         }" + "      }," + "      \"ioConfig\" : {"
                                + "         \"type\" : \"hadoop\"," + "         \"inputSpec\" : {"
                                + "            \"type\" : \"multi\"," + "            \"children\": ["
                                + "               {" + "                  \"type\" : \"dataSource\","
                                + "                  \"ingestionSpec\" : {"
                                + "                     \"dataSource\": \""
                                + currentController.getJSONObject("properties").getString("data_source") + "\","
                                + "                     \"intervals\": [\"" + intervalStart + "/" + intervalEnd
                                + "\"]" + "                  }" + "               }," + "               {"
                                + "                  \"type\" : \"static\"," + "                  \"paths\": \""
                                + bins + "\"" + "               }" + "            ]" + "         }" + "      },"
                                + "      \"tuningConfig\" : {" + "         \"type\": \"hadoop\"" + "      }"
                                + "     }" + "}";
                        getLogger().info("********** Delta Ingestion Spec: " + ingestSpec);
                        String indexTaskId = createDruidIndexingTask(ingestSpec);
                        getLogger().info("********** Created Indexing Task " + indexTaskId);
                        Map<String, Object> currentTaskMetaData = new HashMap<String, Object>();
                        currentTaskMetaData.put("ingestSpec", ingestSpec);
                        currentTaskMetaData.put("sourceData", sourceData);
                        deltaIndexTasks.put(indexTaskId, currentTaskMetaData);
                        String currentTaskDirPath = lateDataTasksPath + "/" + indexTaskId.replace(":", "__");
                        getLogger().info("********** Persisting Record of Task: " + currentTaskDirPath);
                        currentTaskDirPath = createHDFSDirectory(currentTaskDirPath);
                        writeHDFSFile(currentTaskDirPath + "/ingestSpec", ingestSpec);
                        writeHDFSFile(currentTaskDirPath + "/sourceData", bins);
                    } else {
                        getLogger().info("********** " + lateDataPath + " does not contain any data...");
                    }
                } else {
                    getLogger().info("********** There is a Druid Controller mapped to " + lateDataPath
                            + ", however, the path does not yet exist...");
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (JSONException e) {
        e.printStackTrace();
    }
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

/**
 * //from  ww w. j a v  a2  s .  c  o  m
 * @param filename
 * @return
 * @throws LanguageException
 */
public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException {
    JSONObject retVal = null;
    boolean exists = false;
    FileSystem fs = null;

    try {
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        raiseValidateError("could not read the configuration file: " + e.getMessage(), false);
    }

    Path pt = new Path(filename);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        exists = false;
    }

    boolean isDirBoolean = false;
    try {
        if (exists && fs.getFileStatus(pt).isDirectory())
            isDirBoolean = true;
        else
            isDirBoolean = false;
    } catch (Exception e) {
        raiseValidateError(
                "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(),
                conditional);
    }

    // CASE: filename is a directory -- process as a directory
    if (exists && isDirBoolean) {

        // read directory contents
        retVal = new JSONObject();

        FileStatus[] stats = null;

        try {
            stats = fs.listStatus(pt);
        } catch (Exception e) {
            raiseValidateError("for MTD file in directory, error reading directory with MTD file "
                    + pt.toString() + ": " + e.getMessage(), conditional);
        }

        for (FileStatus stat : stats) {
            Path childPath = stat.getPath(); // gives directory name
            if (childPath.getName().startsWith("part")) {

                BufferedReader br = null;
                try {
                    br = new BufferedReader(new InputStreamReader(fs.open(childPath)));
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error reading part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                JSONObject childObj = null;
                try {
                    childObj = JSONHelper.parse(br);
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error parsing part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                for (Object obj : childObj.entrySet()) {
                    @SuppressWarnings("unchecked")
                    Entry<Object, Object> e = (Entry<Object, Object>) obj;
                    Object key = e.getKey();
                    Object val = e.getValue();
                    retVal.put(key, val);
                }
            }
        } // end for 
    }

    // CASE: filename points to a file
    else if (exists) {

        BufferedReader br = null;

        // try reading MTD file
        try {
            br = new BufferedReader(new InputStreamReader(fs.open(pt)));
        } catch (Exception e) {
            raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }

        // try parsing MTD file
        try {
            retVal = JSONHelper.parse(br);
        } catch (Exception e) {
            raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }
    }

    return retVal;
}