Example usage for org.apache.lucene.store SimpleFSDirectory SimpleFSDirectory

List of usage examples for org.apache.lucene.store SimpleFSDirectory SimpleFSDirectory

Introduction

In this page you can find the example usage for org.apache.lucene.store SimpleFSDirectory SimpleFSDirectory.

Prototype

public SimpleFSDirectory(Path path, LockFactory lockFactory) throws IOException 

Source Link

Document

Create a new SimpleFSDirectory for the named location.

Usage

From source file:com.b2international.index.lucene.Directories.java

License:Apache License

/**
 * Just like {@link #openFile(File)}, but allows you to also specify a custom {@link LockFactory}.
 *///from   w  ww. ja va2s.co m
public static FSDirectory openFile(final Path path, final LockFactory lockFactory) throws IOException {
    if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX || Constants.MAC_OS_X)
            && Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {

        return new MMapDirectory(path, lockFactory);
    } else if (Constants.WINDOWS) {
        return new SimpleFSDirectory(path, lockFactory);
    } else {
        return new NIOFSDirectory(path, lockFactory);
    }
}

From source file:com.luc.indexer.LuceneCommonIndexer.java

License:Apache License

public LuceneCommonIndexer(String sourcePath, String destinationPath) throws IOException {

    source = new File(sourcePath);
    destination = new File(destinationPath);
    Directory dir = new SimpleFSDirectory(destination, null);
    indexWriter = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_35), true, MaxFieldLength.UNLIMITED);
}

From source file:com.luc.searcher.LuceneFileSearcher.java

License:Apache License

public List<String> search(File indexDir, String query) throws IOException, ParseException {

    list = new ArrayList<String>();

    dir = new SimpleFSDirectory(indexDir, null);

    pQuery = new PhraseQuery();

    String[] splittedQuery = query.split(" ");

    /*//w ww.j av a 2s.  c om
     * Build a Phrase Query 
     */

    for (int i = 0; i < splittedQuery.length; i++) {

        pQuery.add(new Term("contents", splittedQuery[i]));

    }

    pQuery.setSlop(5);

    indexSearcher = new IndexSearcher(dir);

    /*
     * Call query parser to generate search query
     */

    qp = new QueryParser(Version.LUCENE_35, "contents", new StandardAnalyzer(Version.LUCENE_35));

    q = qp.parse(query);

    /*
     * create a boolean query using phrase query and output of query parser to get a query for better outputs
     */

    bQuery = new BooleanQuery();
    bQuery.add(q, BooleanClause.Occur.SHOULD);
    bQuery.add(pQuery, BooleanClause.Occur.SHOULD);

    /*
     * Retrive files Matching the query
     */

    TopDocs topDocs = indexSearcher.search(bQuery, 10);

    for (int i = 0; i < topDocs.totalHits; i++) {

        ScoreDoc match = topDocs.scoreDocs[i];

        Document doc = indexSearcher.doc(match.doc);

        list.add(doc.get("filename"));

        /*
                
        System.out.println(doc.get("filename"));
                
        */

    }

    return list;

}

From source file:com.zimbra.cs.index.LuceneDirectory.java

License:Open Source License

/**
 * Creates a new {@link LuceneDirectory} with {@code SingleInstanceLockFactory}.
 * <p>//from w  ww. j ava 2  s.  c  o m
 * You can switch Lucene's {@link FSDirectory} implementation by {@link LC#zimbra_index_lucene_io_impl}.
 * <ul>
 *  <li>{@code null} -Lucene will try to pick the best {@link FSDirectory} implementation given the current
 *      environment. Currently this returns {@link MMapDirectory} for most Solaris and Windows 64-bit JREs,
 *      {@link NIOFSDirectory} for other non-Windows JREs, and {@link SimpleFSDirectory} for other JREs on Windows.
 *  <li>{@code simple} - straightforward implementation using java.io.RandomAccessFile. However, it has poor
 *      concurrent performance (multiple threads will bottleneck) as it synchronizes when multiple threads read from
 *      the same file.
 *  <li>{@code nio} - uses java.nio's FileChannel's positional io when reading to avoid synchronization when reading
 *      from the same file. Unfortunately, due to a Windows-only Sun JRE bug this is a poor choice for Windows, but
 *      on all other platforms this is the preferred choice.
 *  <li>{@code mmap} - uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual
 *      memory relative to your index size, eg if you are running on a 64 bit JRE, or you are running on a 32 bit
 *      JRE but your index sizes are small enough to fit into the virtual memory space. Java has currently the
 *      limitation of not being able to unmap files from user code. The files are unmapped, when GC releases the
 *      byte buffers. Due to this bug in Sun's JRE, MMapDirectory's IndexInput.close() is unable to close the
 *      underlying OS file handle. Only when GC finally collects the underlying objects, which could be quite some
 *      time later, will the file handle be closed. This will consume additional transient disk usage: on Windows,
 *      attempts to delete or overwrite the files will result in an exception; on other platforms, which typically
 *      have a "delete on last close" semantics, while such operations will succeed, the bytes are still consuming
 *      space on disk. For many applications this limitation is not a problem (e.g. if you have plenty of disk
 *      space, and you don't rely on overwriting files on Windows) but it's still an important limitation to be
 *      aware of. This class supplies a (possibly dangerous) workaround mentioned in the bug report, which may fail
 *      on non-Sun JVMs.
 * </ul>
 *
 * @param path directory path
 */
public static LuceneDirectory open(File path) throws IOException {
    String impl = LC.zimbra_index_lucene_io_impl.value();
    FSDirectory dir;
    if ("nio".equals(impl)) {
        dir = new NIOFSDirectory(path, new SingleInstanceLockFactory());
    } else if ("mmap".equals(impl)) {
        dir = new MMapDirectory(path, new SingleInstanceLockFactory());
    } else if ("simple".equals(impl)) {
        dir = new SimpleFSDirectory(path, new SingleInstanceLockFactory());
    } else {
        dir = FSDirectory.open(path, new SingleInstanceLockFactory());
    }
    ZimbraLog.index.info("OpenLuceneIndex impl=%s,dir=%s", dir.getClass().getSimpleName(), path);
    return new LuceneDirectory(dir);
}

From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexReader.java

License:Open Source License

public REnvIndexReader(final IREnvConfiguration rEnvConfig) throws Exception {
    //      NIOFSDirectory doesn't like Thread#interrupt() used by the information hover manager
    //      final FSDirectory directory= FSDirectory.open(SaveUtil.getIndexDirectory(rEnvConfig));
    final FSDirectory directory = new SimpleFSDirectory(REnvIndexWriter.getIndexDirectory(rEnvConfig), null);
    this.indexReader = DirectoryReader.open(directory);
    this.indexSearcher = new IndexSearcher(this.indexReader);
    this.indexSearcher.setSimilarity(SIMILARITY);
}

From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java

License:Apache License

/**
 * Indexes all the songs in the specified path.
 * The index is created in the specified directory "indexPath". If an index
 * already exists in that path, adds the songs to the existing index.
 * Each song is processed by the method//from   ww  w .  j a  v  a  2s  .  c  o  m
 * {@link it.unipd.dei.ims.falcon.indexing.Indexing#indexSong}
 * which maps the song into a set of segments, each of one is mapped in a
 * Lucene {@link org.apache.lucene.document.Document}.
 * The segments have fixed length, specifically are constituted by 
 * "hashPerSegment" hashes. There can be an overlap of "hashInOverlap"
 * hashes between two segments. The number of hash in the overlap must be
 * smaller than the number of hash per segments, otherwise an
 * {@link it.unipd.dei.ims.falcon.indexing.IndexingException} is thrown.
 * <p>
 * Once the index has been created or updated, writes a map into a file.
 * The map associates a set of features to each hash. Those features are
 * based on occurrence statistics of the hash in the entire collection.
 * In the event of an index update the map is re-built and the map file
 * is over-written.
 * @param data Input file. If it is a directory, index all files inside it.
 * @param index Falcon index.
 * @param hashPerSegment Number of hashes per segment.
 * @param hashInOverlap Number of overlapping hashes per segment.
 * @throws IndexingException 
 */
public static void index(File data, File index, final int hashPerSegment, final int hashInOverlap,
        final int subsampling, final int nranks, final double minkurtosis,
        final TranspositionEstimator transpEst, boolean verbose) throws IndexingException, IOException {

    long start_time = System.currentTimeMillis();

    if (hashPerSegment <= hashInOverlap)
        throw new IndexingException(
                "Number of hashes in the overlap cannot be equal to the number of hash per segment");

    if (!data.canRead())
        throw new IOException("cannot read input path");
    if (data.isDirectory()) {
        for (File f : data.listFiles())
            if (!f.canRead())
                throw new IOException("cannot read one or more input files");
    }

    if (!index.exists()) // if index is being created rather than updated
        index.mkdir();
    if (!index.canWrite())
        throw new IOException("cannot write to index directory");

    SimpleFSDirectory indexDir = new SimpleFSDirectory(index, null);

    // initialize Lucene Analyzer and IndexWriter
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
    final IndexWriter writer = new IndexWriter(indexDir, analyzer, !IndexReader.indexExists(indexDir),
            IndexWriter.MaxFieldLength.UNLIMITED);
    writer.setSimilarity(new HashSimilarity());

    // transform chroma data into hashes and write into index
    File[] inputfiles = data.isDirectory() ? data.listFiles() : new File[] { data };
    int fileNo = 0;
    for (final File file : inputfiles) {
        // if the current considered files exists and is not hidden
        if (file.exists() && !file.getName().startsWith(".")) {
            if (verbose)
                System.out.println(String.format("%10.3f%% - indexing %s", fileNo * 100. / inputfiles.length,
                        file.getAbsolutePath()));
            final List<OutputStream> fout = new LinkedList<OutputStream>();
            fout.add(new PipedOutputStream());
            final PipedInputStream fin = new PipedInputStream((PipedOutputStream) fout.get(0));
            Thread t = new Thread(new Runnable() {
                public void run() {
                    try {
                        ChromaMatrixUtils.convertChromaStreamIntoHashesStream(new FileReader(file), fout,
                                nranks, transpEst, minkurtosis, subsampling);
                    } catch (IOException ex) {
                        // TODO do something better for this exception ... (might hang all ...)
                        Logger.getLogger(Indexing.class.getName()).log(Level.SEVERE, null, ex);
                    }
                }
            });
            t.start();
            indexSong(writer, fin, hashPerSegment, hashInOverlap, file.getAbsolutePath(),
                    file.getAbsolutePath());
            fileNo++;
        }
    }
    writer.optimize();
    writer.close();

    // additional falcon features
    PrintWriter pw = new PrintWriter(index.getAbsolutePath() + "/qpruning_features.map");
    IndexReader reader = IndexReader.open(new SimpleFSDirectory(index));
    int numSegments = reader.numDocs();
    long total_hcf = numSegments * hashPerSegment; // total number of hashes in the collection
    TermEnum hashes = reader.terms(); // distinct hashes in the collection

    while (hashes.next()) {
        if (!hashes.term().field().equals("CONTENT")) {
            continue;
        }
        Term curHash = hashes.term();
        pw.print(curHash.text() + "\t");
        pw.print((double) reader.docFreq(curHash) / numSegments + "\t"); // normalized document frequency
        TermDocs curHash_pl = reader.termDocs(curHash); // posting list for the current hash
        // computation of the frequency of the current hash in the
        // entire collection -- value initialization
        long hcf = 0;
        // initializes the normalized maximum frequency value
        double nmf = 0;
        // initializes the normalized frequency for max computation
        double cur_nf = 0;
        // processes posting list entries
        while (curHash_pl.next()) {
            // computation of the normalized frequency for
            // the current hash
            cur_nf = (double) curHash_pl.freq() / hashPerSegment;
            // update max if necessary
            if (cur_nf > nmf) {
                nmf = cur_nf;
            }
            hcf += curHash_pl.freq();
        }
        // prints normalized total collection frequency and
        // normalized maximum frequency for the current hash
        pw.print((double) hcf / total_hcf + "\t" + nmf + "\n");
    }
    pw.flush();
    pw.close();

    long end_time = System.currentTimeMillis();
    if (verbose)
        System.out.println(String.format("[INDEXING] - elapsed time: %10.3f", (end_time - start_time) / 1000.));

}

From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java

License:Apache License

/**
 * Prints information on the songs stored in the index in the specified path.
 * The specific information printed is that specified by the "option".
 * Available options are://from  w ww.  j  a  v  a  2  s .  c  o  m
 * <ul>
 *  <li> "show_doc_ids": prints the internal index identifier of all the
 *       segments in the index together with the title of the song which
 *       the segment belongs to;
 *  <li> "show_seg_ids": prints the internal index identifier of all the
 *       segments in the index together with the segment identifier;
 *  <li> "show_full_index": print all the distinct hashes in the index
 *       and the posting list associated to each hash     *
 * </ul>
 *  
 * @param indexPath
 *                  full path to the folder where the index is stored
 * @param option
 *                  option which specified the requested information
 *                  
 * @throws IndexingException
 */
public static void indexUtils(String indexPath, String option) throws IndexingException {
    IndexReader reader;
    try {
        reader = IndexReader.open(new SimpleFSDirectory(new File(indexPath), null));

        if (option.equals("show_doc_ids")) {
            //  prints all the internal segment identifiers together with
            //  the title of the song of the considered segment.
            //  For instance, "[6] song2" denotes that the segment with
            //  internal identifier "6" belongs to the song with title "song2"
            for (int d = 0; d < reader.numDocs(); d++) {
                System.out.println("[" + d + "] " + reader.document(d).getField("TITLE").stringValue());
            }
        } else if (option.equals("show_seg_ids")) {
            //  prints all the internal segment identifiers together with
            //  the identifier of the segment.
            //  For instance, "[8] song2_3" denotes that the third segment
            //  of "song2" has internal identifier "8
            for (int d = 0; d < reader.numDocs(); d++) {
                System.out.println("[" + d + "] " + reader.document(d).getField("ID").stringValue());
            }

        } else if (option.equals("show_full_index")) {
            // print the full index, that is each hash with the associated
            // posting list
            TermEnum terms = reader.terms();
            while (terms.next()) {
                System.out.print(terms.term() + " [SF: " + terms.docFreq() + "] <");

                TermPositions poss = reader.termPositions(terms.term());
                while (poss.next()) {
                    System.out.print(" " + reader.document(poss.doc()).getField("ID").stringValue() + " ("
                            + poss.freq() + "), ");
                }
                System.out.println(">");
            }
        }
    } catch (CorruptIndexException ex) {
        throw new IndexingException("CorruptIndexException when accessing index for printing information");
    } catch (IOException ex) {
        throw new IndexingException("IOException when accessing index for printing information");
    }
}

From source file:net.paissad.waqtsalat.utils.geoip.WorldCitiesLucene.java

License:Open Source License

/**
 * Connects to the Lucene index./*  ww w . j a va 2s .  c om*/
 * 
 * @throws IOException
 */
private static void connectToIndex() throws IOException {
    if (getIndexDir() == null) {
        File destDir = new File(LUCENE_INDEX_PATH);
        if (Platform.isWindows()) {
            setIndexDir(new SimpleFSDirectory(destDir, null));
        } else {
            setIndexDir(new NIOFSDirectory(destDir, null));
        }
    } else {
        logger.trace("Already connected to the lucene index directory ({})", indexDir.toString());
    }
}

From source file:org.apache.solr.core.backup.repository.LocalFileSystemRepository.java

License:Apache License

@Override
public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException {
    try (FSDirectory dir = new SimpleFSDirectory(Paths.get(dirPath), NoLockFactory.INSTANCE)) {
        return dir.openInput(fileName, ctx);
    }/* ww w . j  a  v a2s.c  o m*/
}

From source file:org.apache.solr.core.backup.repository.LocalFileSystemRepository.java

License:Apache License

@Override
public String[] listAll(URI dirPath) throws IOException {
    try (FSDirectory dir = new SimpleFSDirectory(Paths.get(dirPath), NoLockFactory.INSTANCE)) {
        return dir.listAll();
    }/*from w w w .j a v a  2 s  . c om*/
}