Example usage for org.apache.hadoop.fs FileSystem startLocalOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem startLocalOutput.

Prototype

public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException

Source Link

Document

Returns a local file that the user can write output to.

Usage

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();/* w ww. j  a v a2s. c  om*/
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:org.apache.nutch.indexer.IndexMerger.java

License:Apache License

/**
 * Merge all input indexes to the single output index
 *///from ww  w  .  j a  va 2  s  . c o  m
public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("merging indexes to: " + outputIndex);
    }
    FileSystem localFs = FileSystem.getLocal(getConf());
    if (localWorkingDir == null) {
        localWorkingDir = new Path("indexmerger-" + System.currentTimeMillis());
    }
    if (localFs.exists(localWorkingDir)) {
        localFs.delete(localWorkingDir);
    }
    localFs.mkdirs(localWorkingDir);

    // Get local output target
    //
    FileSystem fs = FileSystem.get(getConf());
    Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
    Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);

    Directory[] dirs = new Directory[indexes.length];
    for (int i = 0; i < indexes.length; i++) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Adding " + indexes[i]);
        }
        dirs[i] = new FsDirectory(fs, indexes[i], false, this.conf);
    }

    //

    //
    // Merge indices
    //
    IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
    writer.setMergeFactor(conf.getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR));
    writer.setMaxBufferedDocs(conf.getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS));
    writer.setMaxMergeDocs(conf.getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS));
    writer.setTermIndexInterval(
            conf.getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
    writer.setInfoStream(LogUtil.getDebugStream(LOG));
    writer.setUseCompoundFile(false);
    writer.setSimilarity(new NutchSimilarity());
    writer.addIndexes(dirs);
    writer.close();

    //
    // Put target back
    //
    fs.completeLocalOutput(outputIndex, tmpLocalOutput);
    FileSystem.getLocal(conf).delete(localWorkingDir);
    if (LOG.isInfoEnabled()) {
        LOG.info("done merging");
    }
}

From source file:org.archive.jbs.lucene.LuceneOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(final FileSystem fs, final JobConf job, final String name,
        final Progressable progress) throws IOException {
    // Open Lucene index in ${temp}
    this.fs = FileSystem.get(job);
    this.job = job;
    this.perm = new Path(FileOutputFormat.getOutputPath(job), name);
    this.temp = job.getLocalPath("index/_" + (new Random().nextInt()));

    this.fs.delete(perm, true); // delete old, if any

    indexer = new IndexWriter(new NIOFSDirectory(new File(fs.startLocalOutput(perm, temp).toString())),
            new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

    indexer.setMergeFactor(job.getInt("jbs.lucene.mergeFactor", 100));
    indexer.setMaxMergeDocs(job.getInt("jbs.lucene.maxMergeDocs", Integer.MAX_VALUE));
    indexer.setRAMBufferSizeMB(job.getInt("jbs.lucene.maxRAMBufferSize", 512));
    indexer.setTermIndexInterval(//from   w  w w. j a v a2s. co m
            job.getInt("jbs.lucene.termIndexInterval", IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));
    indexer.setMaxFieldLength(job.getInt("jbs.lucene.max.tokens", Integer.MAX_VALUE));
    indexer.setUseCompoundFile(false);
    indexer.setSimilarity(new WebSimilarity());

    LuceneDocumentWriter docWriter = buildDocumentWriter(job, indexer);

    return new LuceneRecordWriter(docWriter);
}

From source file:org.archive.nutchwax.IndexMerger.java

License:Apache License

/**
 * Merge all input indexes to the single output index
 *//*  ww w  . j av  a 2s .  c o  m*/
public void merge(IndexReader[] readers, Path outputIndex, Path localWorkingDir, boolean parallel)
        throws IOException {
    LOG.info("merging indexes to: " + outputIndex);

    FileSystem localFs = FileSystem.getLocal(getConf());
    if (localFs.exists(localWorkingDir)) {
        localFs.delete(localWorkingDir, true);
    }
    localFs.mkdirs(localWorkingDir);

    // Get local output target
    //
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputIndex)) {
        throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!");
    }

    Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
    Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);

    //
    // Merge indices
    //
    IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
    writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR));
    writer.setMaxBufferedDocs(getConf().getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS));
    writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS));
    writer.setTermIndexInterval(
            getConf().getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
    writer.setInfoStream(LogUtil.getDebugStream(LOG));
    writer.setUseCompoundFile(false);
    writer.setSimilarity(new NutchSimilarity());
    writer.addIndexes(readers);
    writer.close();

    //
    // Put target back
    //
    fs.completeLocalOutput(outputIndex, tmpLocalOutput);
    LOG.info("done merging");
}

From source file:org.hbasene.index.create.mapred.IndexOutputFormat.java

License:Apache License

@Override
public RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper> getRecordWriter(final FileSystem fs,
        JobConf job, String name, final Progressable progress) throws IOException {

    final Path perm = new Path(FileOutputFormat.getOutputPath(job), name);
    final Path temp = job.getLocalPath("index/_" + Integer.toString(random.nextInt()));

    LOG.info("To index into " + perm);

    // delete old, if any
    fs.delete(perm, true);//from ww  w.j av  a2 s.co m

    final IndexConfiguration indexConf = new IndexConfiguration();
    String content = job.get("hbase.index.conf");
    if (content != null) {
        indexConf.addFromXML(content);
    }

    String analyzerName = indexConf.getAnalyzerName();
    Analyzer analyzer;
    try {
        Class<? extends Analyzer> analyzerClass = Class.forName(analyzerName).asSubclass(Analyzer.class);
        Constructor<? extends Analyzer> analyzerCtor = analyzerClass.getConstructor(Version.class);

        analyzer = analyzerCtor.newInstance(Version.LUCENE_30);
    } catch (Exception e) {
        throw new IOException("Error in creating an analyzer object " + analyzerName);
    }

    // build locally first
    final IndexWriter writer = new IndexWriter(
            FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), analyzer, true,
            IndexWriter.MaxFieldLength.LIMITED);

    // no delete, so no need for maxBufferedDeleteTerms
    writer.setMaxBufferedDocs(indexConf.getMaxBufferedDocs());
    writer.setMaxFieldLength(indexConf.getMaxFieldLength());
    writer.setMaxMergeDocs(indexConf.getMaxMergeDocs());
    writer.setMergeFactor(indexConf.getMergeFactor());
    String similarityName = indexConf.getSimilarityName();
    if (similarityName != null) {
        try {
            Class<? extends Similarity> similarityClass = Class.forName(similarityName)
                    .asSubclass(Similarity.class);
            Constructor<? extends Similarity> ctor = similarityClass.getConstructor(Version.class);
            Similarity similarity = ctor.newInstance(Version.LUCENE_30);
            writer.setSimilarity(similarity);
        } catch (Exception e) {
            throw new IOException("Error in creating a similarity object " + similarityName);
        }
    }
    writer.setUseCompoundFile(indexConf.isUseCompoundFile());

    return new RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>() {
        AtomicBoolean closed = new AtomicBoolean(false);
        private long docCount = 0;

        public void write(ImmutableBytesWritable key, LuceneDocumentWrapper value) throws IOException {
            // unwrap and index doc
            Document doc = value.get();
            writer.addDocument(doc);
            docCount++;
            progress.progress();
        }

        public void close(final Reporter reporter) throws IOException {
            // spawn a thread to give progress heartbeats
            Thread prog = new Thread() {
                @Override
                public void run() {
                    while (!closed.get()) {
                        try {
                            reporter.setStatus("closing");
                            Thread.sleep(1000);
                        } catch (InterruptedException e) {
                            continue;
                        } catch (Throwable e) {
                            return;
                        }
                    }
                }
            };

            try {
                prog.start();

                // optimize index
                if (indexConf.doOptimize()) {
                    if (LOG.isInfoEnabled()) {
                        LOG.info("Optimizing index.");
                    }
                    writer.optimize();
                }

                // close index
                writer.close();
                if (LOG.isInfoEnabled()) {
                    LOG.info("Done indexing " + docCount + " docs.");
                }

                // copy to perm destination in dfs
                fs.completeLocalOutput(perm, temp);
                if (LOG.isInfoEnabled()) {
                    LOG.info("Copy done.");
                }
            } finally {
                closed.set(true);
            }
        }
    };
}