Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//  w w  w  .j av  a  2s .  c om
        return 1;
    }
    try {
        Job job = new Job(new Configuration());
        job.setJobName(getClass().getName());
        Configuration conf = job.getConfiguration();
        CommandLine results = cli.parse(conf, args);
        if (results.hasOption("input")) {
            Path path = new Path(results.getOptionValue("input"));
            FileInputFormat.setInputPaths(job, path);
        }
        if (results.hasOption("output")) {
            Path path = new Path(results.getOptionValue("output"));
            FileOutputFormat.setOutputPath(job, path);
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            explicitInputFormat = true;
            setIsJavaRecordReader(conf, true);
            job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(conf, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(conf, true);
            job.setMapperClass(getClass(results, "map", conf, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(conf, true);
            job.setReducerClass(getClass(results, "reduce", conf, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            explicitOutputFormat = true;
            setIsJavaRecordWriter(conf, true);
            job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass());
            }
        }
        if (results.hasOption("avroInput")) {
            avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase());
        }
        if (results.hasOption("avroOutput")) {
            avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase());
        }

        if (results.hasOption("program")) {
            setExecutable(conf, results.getOptionValue("program"));
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }
        setupPipesJob(job);
        return job.waitForCompletion(true) ? 0 : 1;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TaskLog.java

License:Apache License

/**
 * Obtain the owner of the log dir. This is
 * determined by checking the job's log directory.
 *///  www . jav a2s. co  m
static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException {
    Configuration conf = new Configuration();
    FileSystem raw = FileSystem.getLocal(conf).getRaw();
    Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath());
    FileStatus jobStat = raw.getFileStatus(jobLogDir);
    return jobStat.getOwner();
}

From source file:it.crs4.pydoop.mapreduce.pipes.TaskLog.java

License:Apache License

private static synchronized void writeToIndexFile(String logLocation, boolean isCleanup) throws IOException {
    // To ensure atomicity of updates to index file, write to temporary index
    // file first and then rename.
    File tmpIndexFile = getTmpIndexFile(currentTaskid, isCleanup);

    BufferedOutputStream bos = new BufferedOutputStream(SecureIOUtils.createForWrite(tmpIndexFile, 0644));
    DataOutputStream dos = new DataOutputStream(bos);
    //the format of the index file is
    //LOG_DIR: <the dir where the task logs are really stored>
    //STDOUT: <start-offset in the stdout file> <length>
    //STDERR: <start-offset in the stderr file> <length>
    //SYSLOG: <start-offset in the syslog file> <length>
    try {//from w  w  w .j ava2 s  . c o  m
        dos.writeBytes(LogFileDetail.LOCATION + logLocation + "\n" + LogName.STDOUT.toString() + ":");
        dos.writeBytes(Long.toString(prevOutLength) + " ");
        dos.writeBytes(Long.toString(new File(logLocation, LogName.STDOUT.toString()).length() - prevOutLength)
                + "\n" + LogName.STDERR + ":");
        dos.writeBytes(Long.toString(prevErrLength) + " ");
        dos.writeBytes(Long.toString(new File(logLocation, LogName.STDERR.toString()).length() - prevErrLength)
                + "\n" + LogName.SYSLOG.toString() + ":");
        dos.writeBytes(Long.toString(prevLogLength) + " ");
        dos.writeBytes(Long.toString(new File(logLocation, LogName.SYSLOG.toString()).length() - prevLogLength)
                + "\n");
        dos.close();
        dos = null;
    } finally {
        IOUtils.cleanup(LOG, dos);
    }

    File indexFile = getIndexFile(currentTaskid, isCleanup);
    Path indexFilePath = new Path(indexFile.getAbsolutePath());
    Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath());

    if (localFS == null) {// set localFS once
        localFS = FileSystem.getLocal(new Configuration());
    }
    localFS.rename(tmpIndexFilePath, indexFilePath);
}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

private void writePasswordToLocalFile(String localPasswordFile, byte[] password, JobConf conf)
        throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path localPath = new Path(localPasswordFile);
    FSDataOutputStream out = FileSystem.create(localFs, localPath, new FsPermission("400"));
    out.write(password);/*w  w w.  ja  v  a 2s.co m*/
    out.close();
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//from  w w w .ja  v  a2s.  com
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");

    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false,
            "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {

        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());

        JobConf job = new JobConf(getConf());

        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }

        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }

        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }

        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }

}

From source file:it.crs4.seal.demux.DemuxReducer.java

License:Open Source License

public void setup(String localSampleSheetPath, Configuration conf) throws IOException {
    // load the sample sheet
    Path path = new Path(localSampleSheetPath).makeQualified(FileSystem.getLocal(conf));
    SampleSheet sampleSheet;/*from w  w  w . ja  v  a  2s.  co m*/
    try {
        sampleSheet = DemuxUtils.loadSampleSheet(path, conf);
    } catch (SampleSheet.FormatException e) {
        throw new RuntimeException("Error loading sample sheet.  Message: " + e.getMessage());
    }
    barcodeLookup = new BarcodeLookup(sampleSheet,
            conf.getInt(Demux.CONF_MAX_MISMATCHES, Demux.DEFAULT_MAX_MISMATCHES));

    expectIndexRead = !conf.getBoolean(Demux.CONF_NO_INDEX_READS, false);
    separatesReads = conf.getBoolean(Demux.CONF_SEPARATE_READS, false);
}

From source file:it.crs4.seal.read_sort.ReadSort.java

License:Open Source License

public static Path getAnnotationPath(Configuration conf) throws IOException {
    String annotationName = conf.get(ReadSort.REF_ANN_PROP_NAME);
    if (annotationName == null)
        throw new RuntimeException("missing property " + REF_ANN_PROP_NAME);

    LOG.info("reading reference annotation from " + annotationName);

    Path annPath = new Path(annotationName);

    FileSystem srcFs;/*w ww  .j  av a  2s  . c o m*/
    if (conf.get("mapred.cache.archives") != null) {
        // we're using the distributed cache for the reference,
        // so it's on the local file system
        srcFs = FileSystem.getLocal(conf);
    } else
        srcFs = annPath.getFileSystem(conf);

    return annPath.makeQualified(srcFs);
}

From source file:it.polito.dbdmg.searum.ARM.java

License:Apache License

/**
 * Generates the header table from the serialized string representation
 * /*from ww  w . j  av a2  s .c om*/
 * @return Deserialized header table
 */
public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException {
    List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>();
    Path[] files = DistributedCache.getLocalCacheFiles(conf);
    if (files == null) {
        throw new IOException("Cannot read Frequency list from Distributed Cache");
    }
    if (files.length != 1) {
        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
    }
    FileSystem fs = FileSystem.getLocal(conf);
    Path fListLocalPath = fs.makeQualified(files[0]);
    // Fallback if we are running locally.
    if (!fs.exists(fListLocalPath)) {
        URI[] filesURIs = DistributedCache.getCacheFiles(conf);
        if (filesURIs == null) {
            throw new IOException("Cannot read header table from Distributed Cache");
        }
        if (filesURIs.length != 1) {
            throw new IOException("Cannot read header table from Distributed Cache (" + files.length + ')');
        }
        fListLocalPath = new Path(filesURIs[0].getPath());
    }
    for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(fListLocalPath, true,
            conf)) {
        list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get()));
    }
    return list;
}

From source file:ivory.cascade.retrieval.RunQueryLocalCascade.java

License:Apache License

public RunQueryLocalCascade(String[] args)
        throws SAXException, IOException, ParserConfigurationException, Exception, NotBoundException {
    LOG.info("Initializing QueryRunner...");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    runner = new CascadeBatchQueryRunner(args, fs);
}

From source file:ivory.core.data.dictionary.DefaultFrequencySortedDictionaryTest.java

License:Apache License

@Test
public void test1() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path termsFilePath = new Path("etc/trec-index-terms.dat");
    Path termIDsFilePath = new Path("etc/trec-index-termids.dat");
    Path idToTermFilePath = new Path("etc/trec-index-termid-mapping.dat");

    DefaultFrequencySortedDictionary dictionary = new DefaultFrequencySortedDictionary(termsFilePath,
            termIDsFilePath, idToTermFilePath, fs);

    assertEquals(312232, dictionary.size());
    assertEquals("page", dictionary.getTerm(1));
    assertEquals("time", dictionary.getTerm(2));
    assertEquals("will", dictionary.getTerm(3));
    assertEquals("year", dictionary.getTerm(4));
    assertEquals("nikaan", dictionary.getTerm(100000));

    assertEquals(1, dictionary.getId("page"));
    assertEquals(2, dictionary.getId("time"));
    assertEquals(3, dictionary.getId("will"));
    assertEquals(4, dictionary.getId("year"));
    assertEquals(100000, dictionary.getId("nikaan"));

    assertEquals(null, dictionary.getTerm(312233));

    Iterator<String> iter = dictionary.iterator();
    assertTrue(iter.hasNext());//www . j  a  v a 2  s. c o  m
    assertEquals("page", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("time", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("will", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("year", iter.next());
    assertTrue(iter.hasNext());

    int cnt = 0;
    for (@SuppressWarnings("unused")
    String s : dictionary) {
        cnt++;
    }
    assertEquals(dictionary.size(), cnt);

    cnt = 0;
    iter = dictionary.iterator();
    while (iter.hasNext()) {
        cnt++;
        iter.next();
    }
    assertEquals(dictionary.size(), cnt);
}