Example usage for java.nio.file Path resolve

List of usage examples for java.nio.file Path resolve

Introduction

In this page you can find the example usage for java.nio.file Path resolve.

Prototype

default Path resolve(String other) 

Source Link

Document

Converts a given path string to a Path and resolves it against this Path in exactly the manner specified by the #resolve(Path) resolve method.

Usage

From source file:edu.jhu.hlt.concrete.ingesters.simple.DoubleLineBreakFileIngester.java

/**
 * See usage string./*from w w  w.j a va  2  s. c om*/
 *
 * @param args
 */
public static void main(String[] args) {
    if (args.length != 4) {
        System.err.println("This program converts a character-based file to a .concrete file.");
        System.err.println("The text file must contain UTF-8 encoded characters.");
        System.err.println(
                "If the file contains any double-newlines, the file will be split into sections where those double-newlines occur.");
        System.err.println(
                "The .concrete file will share the same name as the input file, including the extension.");
        System.err.println("This program takes 4 arguments.");
        System.err.println("Argument 1: path/to/a/character/based/file");
        System.err.println("Argument 2: type of Communication to generate [e.g., tweet]");
        System.err.println("Argument 3: type of Sections to generate [e.g., passage]");
        System.err.println("Argument 4: path/to/out/concrete/file");
        System.err.println("Example usage: " + CompleteFileIngester.class.getName()
                + " /my/text/file story passage /my/output/folder");
        System.exit(1);
    }

    String inPathStr = args[0];
    Path inPath = Paths.get(inPathStr);
    try {
        ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(inPath);
        Optional<String> commType = Optional.ofNullable(args[1]);
        Optional<String> sectionType = Optional.ofNullable(args[2]);
        Optional<String> outPathStr = Optional.ofNullable(args[3]);

        Path ep = ef.getPath();
        String fn = ef.getName();
        Path outPath = Paths.get(outPathStr.get());
        Path outFile = outPath.resolve(fn + ".concrete");

        // Output directory exists, or it doesn't.
        // Try to create if it does not.
        if (!Files.exists(outPath)) {
            try {
                Files.createDirectories(outPath);
            } catch (IOException e) {
                logger.error("Caught exception when making output directories.", e);
            }

            // if it does, check to make sure it's a directory.
        } else {
            if (!Files.isDirectory(outPath)) {
                logger.error("Output path exists but is not a directory.");
                System.exit(1);
            } else {
                // check to make sure the output file won't be overwritten.
                if (Files.exists(outFile)) {
                    logger.warn("Output file {} exists; not overwriting.", outFile.toString());
                    System.exit(1);
                }
            }
        }

        try {
            UTF8FileIngester ing = new DoubleLineBreakFileIngester(commType.get(), sectionType.get());
            Communication comm = ing.fromCharacterBasedFile(ep);
            new WritableCommunication(comm).writeToFile(outFile, false);
        } catch (IngestException e) {
            logger.error("Caught exception during ingest.", e);
            System.exit(1);
        } catch (ConcreteException e) {
            logger.error("Caught exception writing output.", e);
        }

    } catch (NoSuchFileException e) {
        logger.error("Path {} does not exist.", inPathStr);
        System.exit(1);
    } catch (NotFileException e) {
        logger.error("Path {} is a directory.", inPathStr);
        System.exit(1);
    }
}

From source file:cc.kave.commons.pointsto.evaluation.ProjectTrainValidateEvaluation.java

public static void main(String[] args) throws IOException {
    Locale.setDefault(Locale.US);

    Path baseDir = Paths.get("E:\\Coding\\MT");
    Path usageStoresDir = baseDir.resolve("Usages");
    Path resultFile = baseDir.resolve("EvaluationResults").resolve("TrainValidate.txt");

    ProjectTrainValidateEvaluation evaluator = INJECTOR.getInstance(ProjectTrainValidateEvaluation.class);
    evaluator.run(usageStoresDir);/*from w  ww.ja v a2  s .c o m*/

    INJECTOR.getInstance(ResultExporter.class).export(resultFile, evaluator.getResults().entrySet().stream()
            .flatMap(e -> e.getValue().stream().map(er -> ImmutablePair.of(e.getKey(), er))).map(p -> {
                return new String[] { CoReNames.vm2srcQualifiedType(p.left), p.right.training,
                        p.right.validation, String.format(Locale.US, "%.3f", p.right.score),
                        Integer.toString(p.right.numTrainingUsages),
                        Integer.toString(p.right.numValidationUsages) };
            }));

    INJECTOR.getInstance(ExecutorService.class).shutdown();
}

From source file:hdfs.MiniHDFS.java

public static void main(String[] args) throws Exception {
    if (args.length != 1 && args.length != 3) {
        throw new IllegalArgumentException(
                "Expected: MiniHDFS <baseDirectory> [<kerberosPrincipal> <kerberosKeytab>], " + "got: "
                        + Arrays.toString(args));
    }/*from   ww w. ja v a2 s.co m*/
    boolean secure = args.length == 3;

    // configure Paths
    Path baseDir = Paths.get(args[0]);
    // hadoop-home/, so logs will not complain
    if (System.getenv("HADOOP_HOME") == null) {
        Path hadoopHome = baseDir.resolve("hadoop-home");
        Files.createDirectories(hadoopHome);
        System.setProperty("hadoop.home.dir", hadoopHome.toAbsolutePath().toString());
    }
    // hdfs-data/, where any data is going
    Path hdfsHome = baseDir.resolve("hdfs-data");

    // configure cluster
    Configuration cfg = new Configuration();
    cfg.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsHome.toAbsolutePath().toString());
    // lower default permission: TODO: needed?
    cfg.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_PERMISSION_KEY, "766");

    // optionally configure security
    if (secure) {
        String kerberosPrincipal = args[1];
        String keytabFile = args[2];

        cfg.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        cfg.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
        cfg.set(DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal);
        cfg.set(DFSConfigKeys.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal);
        cfg.set(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY, kerberosPrincipal);
        cfg.set(DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY, keytabFile);
        cfg.set(DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY, keytabFile);
        cfg.set(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, "true");
        cfg.set(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, "true");
        cfg.set(DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY, "true");
    }

    UserGroupInformation.setConfiguration(cfg);

    // TODO: remove hardcoded port!
    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(cfg);
    if (secure) {
        builder.nameNodePort(9998);
    } else {
        builder.nameNodePort(9999);
    }
    MiniDFSCluster dfs = builder.build();

    // Configure contents of the filesystem
    org.apache.hadoop.fs.Path esUserPath = new org.apache.hadoop.fs.Path("/user/elasticsearch");
    try (FileSystem fs = dfs.getFileSystem()) {

        // Set the elasticsearch user directory up
        fs.mkdirs(esUserPath);
        if (UserGroupInformation.isSecurityEnabled()) {
            List<AclEntry> acls = new ArrayList<>();
            acls.add(new AclEntry.Builder().setType(AclEntryType.USER).setName("elasticsearch")
                    .setPermission(FsAction.ALL).build());
            fs.modifyAclEntries(esUserPath, acls);
        }

        // Install a pre-existing repository into HDFS
        String directoryName = "readonly-repository";
        String archiveName = directoryName + ".tar.gz";
        URL readOnlyRepositoryArchiveURL = MiniHDFS.class.getClassLoader().getResource(archiveName);
        if (readOnlyRepositoryArchiveURL != null) {
            Path tempDirectory = Files.createTempDirectory(MiniHDFS.class.getName());
            File readOnlyRepositoryArchive = tempDirectory.resolve(archiveName).toFile();
            FileUtils.copyURLToFile(readOnlyRepositoryArchiveURL, readOnlyRepositoryArchive);
            FileUtil.unTar(readOnlyRepositoryArchive, tempDirectory.toFile());

            fs.copyFromLocalFile(true, true,
                    new org.apache.hadoop.fs.Path(
                            tempDirectory.resolve(directoryName).toAbsolutePath().toUri()),
                    esUserPath.suffix("/existing/" + directoryName));

            FileUtils.deleteDirectory(tempDirectory.toFile());
        }
    }

    // write our PID file
    Path tmp = Files.createTempFile(baseDir, null, null);
    String pid = ManagementFactory.getRuntimeMXBean().getName().split("@")[0];
    Files.write(tmp, pid.getBytes(StandardCharsets.UTF_8));
    Files.move(tmp, baseDir.resolve(PID_FILE_NAME), StandardCopyOption.ATOMIC_MOVE);

    // write our port file
    tmp = Files.createTempFile(baseDir, null, null);
    Files.write(tmp, Integer.toString(dfs.getNameNodePort()).getBytes(StandardCharsets.UTF_8));
    Files.move(tmp, baseDir.resolve(PORT_FILE_NAME), StandardCopyOption.ATOMIC_MOVE);
}

From source file:edu.jhu.hlt.concrete.ingesters.alnc.ALNCIngesterRunner.java

/**
 * @param args/* w ww  . j  av  a2  s  . co m*/
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    ALNCIngesterRunner run = new ALNCIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(ALNCIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);

        for (String pstr : run.delegate.paths) {
            LOGGER.debug("Running on file: {}", pstr);
            Path p = Paths.get(pstr);
            new ExistingNonDirectoryFile(p);
            Path outWithExt = outpath.resolve(p.getFileName() + ".tar.gz");

            if (Files.exists(outWithExt)) {
                if (!run.delegate.overwrite) {
                    LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                    continue;
                } else {
                    Files.delete(outWithExt);
                }
            }

            try (ALNCIngester ing = new ALNCIngester(p);
                    OutputStream os = Files.newOutputStream(outWithExt);
                    GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                    TarArchiver arch = new TarArchiver(gout)) {
                Iterator<Communication> iter = ing.iterator();
                while (iter.hasNext()) {
                    Communication c = iter.next();
                    LOGGER.debug("Got comm: {}", c.getId());
                    arch.addEntry(new ArchivableCommunication(c));
                }
            } catch (IngestException e) {
                LOGGER.error("Caught exception processing path: " + pstr, e);
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:com.github.horrorho.inflatabledonkey.Main.java

/**
 * @param args the command line arguments
 * @throws IOException/*  w ww . j av a  2  s .  c o  m*/
 */
public static void main(String[] args) throws IOException {
    try {
        if (!PropertyLoader.instance().test(args)) {
            return;
        }
    } catch (IllegalArgumentException ex) {
        System.out.println("Argument error: " + ex.getMessage());
        System.out.println("Try '" + Property.APP_NAME.value() + " --help' for more information.");
        System.exit(-1);
    }

    // SystemDefault HttpClient.
    // TODO concurrent
    CloseableHttpClient httpClient = HttpClients.custom().setUserAgent("CloudKit/479 (13A404)")
            .useSystemProperties().build();

    // Auth
    // TODO rework when we have UncheckedIOException for Authenticator
    Auth auth = Property.AUTHENTICATION_TOKEN.value().map(Auth::new).orElse(null);

    if (auth == null) {
        auth = Authenticator.authenticate(httpClient, Property.AUTHENTICATION_APPLEID.value().get(),
                Property.AUTHENTICATION_PASSWORD.value().get());
    }
    logger.debug("-- main() - auth: {}", auth);
    logger.info("-- main() - dsPrsID:mmeAuthToken: {}:{}", auth.dsPrsID(), auth.mmeAuthToken());

    if (Property.ARGS_TOKEN.booleanValue().orElse(false)) {
        System.out.println("DsPrsID:mmeAuthToken " + auth.dsPrsID() + ":" + auth.mmeAuthToken());
        return;
    }

    logger.info("-- main() - Apple ID: {}", Property.AUTHENTICATION_APPLEID.value());
    logger.info("-- main() - password: {}", Property.AUTHENTICATION_PASSWORD.value());
    logger.info("-- main() - token: {}", Property.AUTHENTICATION_TOKEN.value());

    // Account
    Account account = Accounts.account(httpClient, auth);

    // Backup
    Backup backup = Backup.create(httpClient, account);

    // BackupAccount
    BackupAccount backupAccount = backup.backupAccount(httpClient);
    logger.debug("-- main() - backup account: {}", backupAccount);

    // Devices
    List<Device> devices = backup.devices(httpClient, backupAccount.devices());
    logger.debug("-- main() - device count: {}", devices.size());

    // Snapshots
    List<SnapshotID> snapshotIDs = devices.stream().map(Device::snapshots).flatMap(Collection::stream)
            .collect(Collectors.toList());
    logger.info("-- main() - total snapshot count: {}", snapshotIDs.size());

    Map<String, Snapshot> snapshots = backup.snapshot(httpClient, snapshotIDs).stream().collect(
            Collectors.toMap(s -> s.record().getRecordIdentifier().getValue().getName(), Function.identity()));

    boolean repeat = false;
    do {

        for (int i = 0; i < devices.size(); i++) {
            Device device = devices.get(i);
            List<SnapshotID> deviceSnapshotIDs = device.snapshots();

            System.out.println(i + " " + device.info());

            for (int j = 0; j < deviceSnapshotIDs.size(); j++) {
                SnapshotID sid = deviceSnapshotIDs.get(j);
                System.out.println("\t" + j + snapshots.get(sid.id()).info() + "   " + sid.timestamp());
            }
        }
        if (Property.PRINT_SNAPSHOTS.booleanValue().orElse(false)) {
            return;
        }
        // Selection
        Scanner input = new Scanner(System.in);

        int deviceIndex;
        int snapshotIndex = Property.SELECT_SNAPSHOT_INDEX.intValue().get();

        if (devices.size() > 1) {
            System.out.printf("Select a device [0 - %d]: ", devices.size() - 1);
            deviceIndex = input.nextInt();
        } else
            deviceIndex = Property.SELECT_DEVICE_INDEX.intValue().get();

        if (deviceIndex >= devices.size() || deviceIndex < 0) {
            System.out.println("No such device: " + deviceIndex);
            System.exit(-1);
        }

        Device device = devices.get(deviceIndex);
        System.out.println("Selected device: " + deviceIndex + ", " + device.info());

        if (device.snapshots().size() > 1) {
            System.out.printf("Select a snapshot [0 - %d]: ", device.snapshots().size() - 1);
            snapshotIndex = input.nextInt();
        } else
            snapshotIndex = Property.SELECT_SNAPSHOT_INDEX.intValue().get();

        if (snapshotIndex >= devices.get(deviceIndex).snapshots().size() || snapshotIndex < 0) {
            System.out.println("No such snapshot for selected device: " + snapshotIndex);
            System.exit(-1);
        }

        logger.info("-- main() - arg device index: {}", deviceIndex);
        logger.info("-- main() - arg snapshot index: {}", snapshotIndex);

        String selected = devices.get(deviceIndex).snapshots().get(snapshotIndex).id();
        Snapshot snapshot = snapshots.get(selected);
        System.out.println("Selected snapshot: " + snapshotIndex + ", " + snapshot.info());

        // Asset list.
        List<Assets> assetsList = backup.assetsList(httpClient, snapshot);
        logger.info("-- main() - assets count: {}", assetsList.size());

        // Domains filter --domain option
        String chosenDomain = Property.FILTER_DOMAIN.value().orElse("").toLowerCase(Locale.US);
        logger.info("-- main() - arg domain substring filter: {}", Property.FILTER_DOMAIN.value());
        // Output domains --domains option
        if (Property.PRINT_DOMAIN_LIST.booleanValue().orElse(false)) {
            System.out.println("Domains / file count:");
            assetsList.stream().filter(a -> a.domain().isPresent())
                    .map(a -> a.domain().get() + " / " + a.files().size()).sorted()
                    .forEach(System.out::println);

            System.out.print("Type a domain ('null' to exit): ");
            chosenDomain = input.next().toLowerCase(Locale.US);
            if (chosenDomain.equals("null"))
                return;
            // TODO check Assets without domain information.
        }

        String domainSubstring = chosenDomain;

        Predicate<Optional<String>> domainFilter = domain -> domain.map(d -> d.toLowerCase(Locale.US))
                .map(d -> d.contains(domainSubstring)).orElse(false);

        List<String> files = Assets.files(assetsList, domainFilter);
        logger.info("-- main() - domain filtered file count: {}", files.size());

        // Output folders.
        Path outputFolder = Paths.get(Property.OUTPUT_FOLDER.value().orElse("output"));
        Path assetOutputFolder = outputFolder.resolve("assets"); // TODO assets value injection
        Path chunkOutputFolder = outputFolder.resolve("chunks"); // TODO chunks value injection
        logger.info("-- main() - output folder chunks: {}", chunkOutputFolder);
        logger.info("-- main() - output folder assets: {}", assetOutputFolder);

        // Download tools.
        AuthorizeAssets authorizeAssets = AuthorizeAssets.backupd();
        DiskChunkStore chunkStore = new DiskChunkStore(chunkOutputFolder);
        StandardChunkEngine chunkEngine = new StandardChunkEngine(chunkStore);
        AssetDownloader assetDownloader = new AssetDownloader(chunkEngine);
        KeyBagManager keyBagManager = backup.newKeyBagManager();

        // Mystery Moo. 
        Moo moo = new Moo(authorizeAssets, assetDownloader, keyBagManager);

        // Filename extension filter.
        String filenameExtension = Property.FILTER_EXTENSION.value().orElse("").toLowerCase(Locale.US);
        logger.info("-- main() - arg filename extension filter: {}", Property.FILTER_EXTENSION.value());

        Predicate<Asset> assetFilter = asset -> asset.relativePath().map(d -> d.toLowerCase(Locale.US))
                .map(d -> d.endsWith(filenameExtension)).orElse(false);

        // Batch process files in groups of 100.
        // TODO group files into batches based on file size.
        List<List<String>> batches = ListUtils.partition(files, 100);

        for (List<String> batch : batches) {
            List<Asset> assets = backup.assets(httpClient, batch).stream().filter(assetFilter::test)
                    .collect(Collectors.toList());
            logger.info("-- main() - filtered asset count: {}", assets.size());
            moo.download(httpClient, assets, assetOutputFolder);
        }
        System.out.print("Download other snapshot (Y/N)? ");
        repeat = input.next().toLowerCase(Locale.US).charAt(0) == 'y';
    } while (repeat == true);
}

From source file:edu.jhu.hlt.concrete.ingesters.annotatednyt.AnnotatedNYTIngesterRunner.java

/**
 * @param args//from   www .  ja v  a2  s.  co m
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    AnnotatedNYTIngesterRunner run = new AnnotatedNYTIngesterRunner();
    JCommander jc = new JCommander(run, args);
    jc.setProgramName(AnnotatedNYTIngesterRunner.class.getSimpleName());
    if (run.delegate.help) {
        jc.usage();
    }

    try {
        Path outpath = Paths.get(run.delegate.outputPath);
        IngesterParameterDelegate.prepare(outpath);

        NYTCorpusDocumentParser parser = new NYTCorpusDocumentParser();
        for (String pstr : run.delegate.paths) {
            LOGGER.debug("Running on file: {}", pstr);
            Path p = Paths.get(pstr);
            new ExistingNonDirectoryFile(p);
            int nPaths = p.getNameCount();
            Path year = p.getName(nPaths - 2);
            Path outWithExt = outpath.resolve(year.toString() + p.getFileName());

            if (Files.exists(outWithExt)) {
                if (!run.delegate.overwrite) {
                    LOGGER.info("File: {} exists and overwrite disabled. Not running.", outWithExt.toString());
                    continue;
                } else {
                    Files.delete(outWithExt);
                }
            }

            try (InputStream is = Files.newInputStream(p);
                    BufferedInputStream bin = new BufferedInputStream(is);
                    TarGzArchiveEntryByteIterator iter = new TarGzArchiveEntryByteIterator(bin);

                    OutputStream os = Files.newOutputStream(outWithExt);
                    GzipCompressorOutputStream gout = new GzipCompressorOutputStream(os);
                    TarArchiver arch = new TarArchiver(gout)) {
                Iterable<byte[]> able = () -> iter;
                StreamSupport.stream(able.spliterator(), false).map(ba -> parser.fromByteArray(ba, false))
                        .map(doc -> new AnnotatedNYTDocument(doc))
                        .map(and -> new CommunicationizableAnnotatedNYTDocument(and).toCommunication())
                        .forEach(comm -> {
                            try {
                                arch.addEntry(new ArchivableCommunication(comm));
                            } catch (IOException e) {
                                LOGGER.error("Caught exception processing file: " + pstr, e);
                            }
                        });
            }
        }
    } catch (NotFileException | IOException e) {
        LOGGER.error("Caught exception processing.", e);
    }
}

From source file:edu.usc.goffish.gofs.tools.GoFSFormat.java

public static void main(String[] args) throws IOException {
    if (args.length < REQUIRED_ARGS) {
        PrintUsageAndQuit(null);/*from  ww  w . ja v a 2s. co  m*/
    }

    if (args.length == 1 && args[0].equals("-help")) {
        PrintUsageAndQuit(null);
    }

    Path executableDirectory;
    try {
        executableDirectory = Paths
                .get(GoFSFormat.class.getProtectionDomain().getCodeSource().getLocation().toURI()).getParent();
    } catch (URISyntaxException e) {
        throw new RuntimeException("Unexpected error retrieving executable location", e);
    }
    Path configPath = executableDirectory.resolve(DEFAULT_CONFIG).normalize();

    boolean copyBinaries = false;

    // parse optional arguments
    int i = 0;
    OptArgLoop: for (i = 0; i < args.length - REQUIRED_ARGS; i++) {
        switch (args[i]) {
        case "-config":
            i++;

            try {
                configPath = Paths.get(args[i]);
            } catch (InvalidPathException e) {
                PrintUsageAndQuit("Config file - " + e.getMessage());
            }

            break;
        case "-copyBinaries":
            copyBinaries = true;
            break;
        default:
            break OptArgLoop;
        }
    }

    if (args.length - i < REQUIRED_ARGS) {
        PrintUsageAndQuit(null);
    }

    // finished parsing args
    if (i < args.length) {
        PrintUsageAndQuit("Unrecognized argument \"" + args[i] + "\"");
    }

    // parse config

    System.out.println("Parsing config...");

    PropertiesConfiguration config = new PropertiesConfiguration();
    config.setDelimiterParsingDisabled(true);
    try {
        config.load(Files.newInputStream(configPath));
    } catch (ConfigurationException e) {
        throw new IOException(e);
    }

    // retrieve data nodes
    ArrayList<URI> dataNodes;
    {
        String[] dataNodesArray = config.getStringArray(GOFS_DATANODES_KEY);
        if (dataNodesArray.length == 0) {
            throw new ConversionException("Config must contain key " + GOFS_DATANODES_KEY);
        }

        dataNodes = new ArrayList<>(dataNodesArray.length);

        if (dataNodesArray.length == 0) {
            throw new ConversionException("Config key " + GOFS_DATANODES_KEY
                    + " has invalid format - must define at least one data node");
        }

        try {
            for (String node : dataNodesArray) {
                URI dataNodeURI = new URI(node);

                if (!"file".equalsIgnoreCase(dataNodeURI.getScheme())) {
                    throw new ConversionException("config key " + GOFS_DATANODES_KEY + " value \"" + dataNodeURI
                            + "\" has invalid format - data node urls must have 'file' scheme");
                } else if (dataNodeURI.getPath() == null || dataNodeURI.getPath().isEmpty()) {
                    throw new ConversionException("config key " + GOFS_DATANODES_KEY + " value \"" + dataNodeURI
                            + "\" has invalid format - data node urls must have an absolute path specified");
                }

                // ensure uri ends with a slash, so we know it is a directory
                if (!dataNodeURI.getPath().endsWith("/")) {
                    dataNodeURI = dataNodeURI.resolve(dataNodeURI.getPath() + "/");
                }

                dataNodes.add(dataNodeURI);
            }
        } catch (URISyntaxException e) {
            throw new ConversionException(
                    "Config key " + GOFS_DATANODES_KEY + " has invalid format - " + e.getMessage());
        }
    }

    // validate serializer type
    Class<? extends ISliceSerializer> serializerType;
    {
        String serializerTypeName = config.getString(GOFS_SERIALIZER_KEY);
        if (serializerTypeName == null) {
            throw new ConversionException("Config must contain key " + GOFS_SERIALIZER_KEY);
        }

        try {
            serializerType = SliceSerializerProvider.loadSliceSerializerType(serializerTypeName);
        } catch (ReflectiveOperationException e) {
            throw new ConversionException(
                    "Config key " + GOFS_SERIALIZER_KEY + " has invalid format - " + e.getMessage());
        }
    }

    // retrieve name node
    IInternalNameNode nameNode;
    try {
        nameNode = NameNodeProvider.loadNameNodeFromConfig(config, GOFS_NAMENODE_TYPE_KEY,
                GOFS_NAMENODE_LOCATION_KEY);
    } catch (ReflectiveOperationException e) {
        throw new RuntimeException("Unable to load name node", e);
    }

    System.out.println("Contacting name node...");

    // validate name node
    if (!nameNode.isAvailable()) {
        throw new IOException("Name node at " + nameNode.getURI() + " is not available");
    }

    System.out.println("Contacting data nodes...");

    // validate data nodes
    for (URI dataNode : dataNodes) {
        // only attempt ssh if host exists
        if (dataNode.getHost() != null) {
            try {
                SSHHelper.SSH(dataNode, "true");
            } catch (IOException e) {
                throw new IOException("Data node at " + dataNode + " is not available", e);
            }
        }
    }

    // create temporary directory
    Path workingDir = Files.createTempDirectory("gofs_format");
    try {
        // create deploy directory
        Path deployDirectory = Files.createDirectory(workingDir.resolve(DATANODE_DIR_NAME));

        // create empty slice directory
        Files.createDirectory(deployDirectory.resolve(DataNode.DATANODE_SLICE_DIR));

        // copy binaries
        if (copyBinaries) {
            System.out.println("Copying binaries...");
            FileUtils.copyDirectory(executableDirectory.toFile(),
                    deployDirectory.resolve(executableDirectory.getFileName()).toFile());
        }

        // write config file
        Path dataNodeConfigFile = deployDirectory.resolve(DataNode.DATANODE_CONFIG);
        try {
            // create config for every data node and scp deploy folder into place
            for (URI dataNodeParent : dataNodes) {
                URI dataNode = dataNodeParent.resolve(DATANODE_DIR_NAME);

                PropertiesConfiguration datanode_config = new PropertiesConfiguration();
                datanode_config.setDelimiterParsingDisabled(true);
                datanode_config.setProperty(DataNode.DATANODE_INSTALLED_KEY, true);
                datanode_config.setProperty(DataNode.DATANODE_NAMENODE_TYPE_KEY,
                        config.getString(GOFS_NAMENODE_TYPE_KEY));
                datanode_config.setProperty(DataNode.DATANODE_NAMENODE_LOCATION_KEY,
                        config.getString(GOFS_NAMENODE_LOCATION_KEY));
                datanode_config.setProperty(DataNode.DATANODE_LOCALHOSTURI_KEY, dataNode.toString());

                try {
                    datanode_config.save(Files.newOutputStream(dataNodeConfigFile));
                } catch (ConfigurationException e) {
                    throw new IOException(e);
                }

                System.out.println("Formatting data node " + dataNode.toString() + "...");

                // scp everything into place on the data node
                SCPHelper.SCP(deployDirectory, dataNodeParent);

                // update name node
                nameNode.addDataNode(dataNode);
            }

            // update name node
            nameNode.setSerializer(serializerType);
        } catch (Exception e) {
            System.out.println(
                    "ERROR: data node formatting interrupted - name node and data nodes are in an inconsistent state and require clean up");
            throw e;
        }

        System.out.println("GoFS format complete");

    } finally {
        FileUtils.deleteQuietly(workingDir.toFile());
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java

public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    if (args.length < 2) {
        LOGGER.info("Usage: {} {} {} {}", WebPostIngester.class.getName(), "/path/to/output/folder",
                "/path/to/web/.xml/file", "<additional/xml/file/paths>");
        System.exit(1);//  w w  w  .  j  av  a 2s. c o  m
    }

    Path outPath = Paths.get(args[0]);
    Optional.ofNullable(outPath.getParent()).ifPresent(p -> {
        if (!Files.exists(p))
            try {
                Files.createDirectories(p);
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
    });

    if (!Files.isDirectory(outPath)) {
        LOGGER.error("Output path must be a directory.");
        System.exit(1);
    }

    WebPostIngester ing = new WebPostIngester();
    for (int i = 1; i < args.length; i++) {
        Path lp = Paths.get(args[i]);
        LOGGER.info("On path: {}", lp.toString());
        try {
            Communication c = ing.fromCharacterBasedFile(lp);
            new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true);
        } catch (IngestException | ConcreteException e) {
            LOGGER.error("Caught exception during ingest on file: " + args[i], e);
        }
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
    if (args.length < 2) {
        LOGGER.info("Usage: {} {} {} {}", BoltForumPostIngester.class.getName(), "/path/to/output/folder",
                "/path/to/bolt/.xml/file", "<additional/xml/file/paths>");
        System.exit(1);/* w  w  w  .ja  v a2  s  .  c om*/
    }

    Path outPath = Paths.get(args[0]);
    Optional.ofNullable(outPath.getParent()).ifPresent(p -> {
        if (!Files.exists(p))
            try {
                Files.createDirectories(p);
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
    });

    if (!Files.isDirectory(outPath)) {
        LOGGER.error("Output path must be a directory.");
        System.exit(1);
    }

    BoltForumPostIngester ing = new BoltForumPostIngester();
    for (int i = 1; i < args.length; i++) {
        Path lp = Paths.get(args[i]);
        LOGGER.info("On path: {}", lp.toString());
        try {
            Communication c = ing.fromCharacterBasedFile(lp);
            new WritableCommunication(c).writeToFile(outPath.resolve(c.getId() + ".comm"), true);
        } catch (IngestException | ConcreteException e) {
            LOGGER.error("Caught exception during ingest on file: " + args[i], e);
        }
    }
}

From source file:edu.jhu.hlt.concrete.gigaword.expt.ConvertGigawordDocuments.java

/**
 * @param args/*from w ww  .j av  a2 s  . c  o m*/
 */
public static void main(String... args) {
    Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler() {

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            logger.error("Thread {} caught unhandled exception.", t.getName());
            logger.error("Unhandled exception.", e);
        }
    });

    if (args.length != 2) {
        logger.info("Usage: {} {} {}", GigawordConcreteConverter.class.getName(), "path/to/expt/file",
                "path/to/out/folder");
        System.exit(1);
    }

    String exptPathStr = args[0];
    String outPathStr = args[1];

    // Verify path points to something.
    Path exptPath = Paths.get(exptPathStr);
    if (!Files.exists(exptPath)) {
        logger.error("File: {} does not exist. Re-run with the correct path to "
                + " the experiment 2 column file. See README.md.");
        System.exit(1);
    }

    logger.info("Experiment map located at: {}", exptPathStr);

    // Create output dir if not yet created.
    Path outPath = Paths.get(outPathStr);
    if (!Files.exists(outPath)) {
        logger.info("Creating directory: {}", outPath.toString());
        try {
            Files.createDirectories(outPath);
        } catch (IOException e) {
            logger.error("Caught an IOException when creating output dir.", e);
            System.exit(1);
        }
    }

    logger.info("Output directory located at: {}", outPathStr);

    // Read in expt map. See README.md.
    Map<String, Set<String>> exptMap = null;
    try (Reader r = ExperimentUtils.createReader(exptPath); BufferedReader br = new BufferedReader(r)) {
        exptMap = ExperimentUtils.createFilenameToIdMap(br);
    } catch (IOException e) {
        logger.error("Caught an IOException when creating expt map.", e);
        System.exit(1);
    }

    // Start a timer.
    logger.info("Gigaword -> Concrete beginning.");
    StopWatch sw = new StopWatch();
    sw.start();
    // Iterate over expt map.
    exptMap.entrySet()
            // .parallelStream()
            .forEach(p -> {
                final String pathStr = p.getKey();
                final Set<String> ids = p.getValue();
                final Path lp = Paths.get(pathStr);
                logger.info("Converting path: {}", pathStr);

                // Get the file name and immediate folder it is under.
                int nElements = lp.getNameCount();
                Path fileName = lp.getName(nElements - 1);
                Path subFolder = lp.getName(nElements - 2);
                String newFnStr = fileName.toString().split("\\.")[0] + ".tar";

                // Mirror folders in output dir.
                Path localOutFolder = outPath.resolve(subFolder);
                Path localOutPath = localOutFolder.resolve(newFnStr);

                // Create output subfolders.
                if (!Files.exists(localOutFolder) && !Files.isDirectory(localOutFolder)) {
                    logger.info("Creating out file: {}", localOutFolder.toString());
                    try {
                        Files.createDirectories(localOutFolder);
                    } catch (IOException e) {
                        throw new RuntimeException("Caught an IOException when creating output dir.", e);
                    }
                }

                // Iterate over communications.
                Iterator<Communication> citer;
                try (OutputStream os = Files.newOutputStream(localOutPath);
                        BufferedOutputStream bos = new BufferedOutputStream(os);
                        Archiver archiver = new TarArchiver(bos);) {
                    citer = new ConcreteGigawordDocumentFactory().iterator(lp);
                    while (citer.hasNext()) {
                        Communication c = citer.next();
                        String cId = c.getId();

                        // Document ID must be in the set. Remove.
                        boolean wasInSet = ids.remove(cId);
                        if (!wasInSet) {
                            // Some IDs are duplicated in Gigaword.
                            // See ERRATA.
                            logger.debug(
                                    "ID: {} was parsed from path: {}, but was not in the experiment map. Attempting to remove dupe.",
                                    cId, pathStr);

                            // Attempt to create a duplicate id (append .duplicate to the id).
                            // Then, try to remove again.
                            String newId = RepairDuplicateIDs.repairDuplicate(cId);
                            boolean dupeRemoved = ids.remove(newId);
                            // There are not nested duplicates, so this should never fire.
                            if (!dupeRemoved) {
                                logger.info("Failed to remove dupe.");
                                return;
                            } else
                                // Modify the communication ID to the unique version.
                                c.setId(newId);
                        }

                        archiver.addEntry(new ArchivableCommunication(c));
                    }

                    logger.info("Finished path: {}", pathStr);
                } catch (ConcreteException ex) {
                    logger.error("Caught ConcreteException during Concrete mapping.", ex);
                    logger.error("Path: {}", pathStr);
                } catch (IOException e) {
                    logger.error("Error archiving communications.", e);
                    logger.error("Path: {}", localOutPath.toString());
                }
            });

    sw.stop();
    logger.info("Finished.");
    Minutes m = new Duration(sw.getTime()).toStandardMinutes();
    logger.info("Runtime: Approximately {} minutes.", m.getMinutes());
}