List of usage examples for org.apache.lucene.store SimpleFSDirectory SimpleFSDirectory
public SimpleFSDirectory(Path path, LockFactory lockFactory) throws IOException
From source file:com.b2international.index.lucene.Directories.java
License:Apache License
/** * Just like {@link #openFile(File)}, but allows you to also specify a custom {@link LockFactory}. *///from w ww. ja va2s.co m public static FSDirectory openFile(final Path path, final LockFactory lockFactory) throws IOException { if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX || Constants.MAC_OS_X) && Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) { return new MMapDirectory(path, lockFactory); } else if (Constants.WINDOWS) { return new SimpleFSDirectory(path, lockFactory); } else { return new NIOFSDirectory(path, lockFactory); } }
From source file:com.luc.indexer.LuceneCommonIndexer.java
License:Apache License
public LuceneCommonIndexer(String sourcePath, String destinationPath) throws IOException { source = new File(sourcePath); destination = new File(destinationPath); Directory dir = new SimpleFSDirectory(destination, null); indexWriter = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_35), true, MaxFieldLength.UNLIMITED); }
From source file:com.luc.searcher.LuceneFileSearcher.java
License:Apache License
public List<String> search(File indexDir, String query) throws IOException, ParseException { list = new ArrayList<String>(); dir = new SimpleFSDirectory(indexDir, null); pQuery = new PhraseQuery(); String[] splittedQuery = query.split(" "); /*//w ww.j av a 2s. c om * Build a Phrase Query */ for (int i = 0; i < splittedQuery.length; i++) { pQuery.add(new Term("contents", splittedQuery[i])); } pQuery.setSlop(5); indexSearcher = new IndexSearcher(dir); /* * Call query parser to generate search query */ qp = new QueryParser(Version.LUCENE_35, "contents", new StandardAnalyzer(Version.LUCENE_35)); q = qp.parse(query); /* * create a boolean query using phrase query and output of query parser to get a query for better outputs */ bQuery = new BooleanQuery(); bQuery.add(q, BooleanClause.Occur.SHOULD); bQuery.add(pQuery, BooleanClause.Occur.SHOULD); /* * Retrive files Matching the query */ TopDocs topDocs = indexSearcher.search(bQuery, 10); for (int i = 0; i < topDocs.totalHits; i++) { ScoreDoc match = topDocs.scoreDocs[i]; Document doc = indexSearcher.doc(match.doc); list.add(doc.get("filename")); /* System.out.println(doc.get("filename")); */ } return list; }
From source file:com.zimbra.cs.index.LuceneDirectory.java
License:Open Source License
/** * Creates a new {@link LuceneDirectory} with {@code SingleInstanceLockFactory}. * <p>//from w ww. j ava 2 s. c o m * You can switch Lucene's {@link FSDirectory} implementation by {@link LC#zimbra_index_lucene_io_impl}. * <ul> * <li>{@code null} -Lucene will try to pick the best {@link FSDirectory} implementation given the current * environment. Currently this returns {@link MMapDirectory} for most Solaris and Windows 64-bit JREs, * {@link NIOFSDirectory} for other non-Windows JREs, and {@link SimpleFSDirectory} for other JREs on Windows. * <li>{@code simple} - straightforward implementation using java.io.RandomAccessFile. However, it has poor * concurrent performance (multiple threads will bottleneck) as it synchronizes when multiple threads read from * the same file. * <li>{@code nio} - uses java.nio's FileChannel's positional io when reading to avoid synchronization when reading * from the same file. Unfortunately, due to a Windows-only Sun JRE bug this is a poor choice for Windows, but * on all other platforms this is the preferred choice. * <li>{@code mmap} - uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual * memory relative to your index size, eg if you are running on a 64 bit JRE, or you are running on a 32 bit * JRE but your index sizes are small enough to fit into the virtual memory space. Java has currently the * limitation of not being able to unmap files from user code. The files are unmapped, when GC releases the * byte buffers. Due to this bug in Sun's JRE, MMapDirectory's IndexInput.close() is unable to close the * underlying OS file handle. Only when GC finally collects the underlying objects, which could be quite some * time later, will the file handle be closed. This will consume additional transient disk usage: on Windows, * attempts to delete or overwrite the files will result in an exception; on other platforms, which typically * have a "delete on last close" semantics, while such operations will succeed, the bytes are still consuming * space on disk. For many applications this limitation is not a problem (e.g. if you have plenty of disk * space, and you don't rely on overwriting files on Windows) but it's still an important limitation to be * aware of. This class supplies a (possibly dangerous) workaround mentioned in the bug report, which may fail * on non-Sun JVMs. * </ul> * * @param path directory path */ public static LuceneDirectory open(File path) throws IOException { String impl = LC.zimbra_index_lucene_io_impl.value(); FSDirectory dir; if ("nio".equals(impl)) { dir = new NIOFSDirectory(path, new SingleInstanceLockFactory()); } else if ("mmap".equals(impl)) { dir = new MMapDirectory(path, new SingleInstanceLockFactory()); } else if ("simple".equals(impl)) { dir = new SimpleFSDirectory(path, new SingleInstanceLockFactory()); } else { dir = FSDirectory.open(path, new SingleInstanceLockFactory()); } ZimbraLog.index.info("OpenLuceneIndex impl=%s,dir=%s", dir.getClass().getSimpleName(), path); return new LuceneDirectory(dir); }
From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexReader.java
License:Open Source License
public REnvIndexReader(final IREnvConfiguration rEnvConfig) throws Exception { // NIOFSDirectory doesn't like Thread#interrupt() used by the information hover manager // final FSDirectory directory= FSDirectory.open(SaveUtil.getIndexDirectory(rEnvConfig)); final FSDirectory directory = new SimpleFSDirectory(REnvIndexWriter.getIndexDirectory(rEnvConfig), null); this.indexReader = DirectoryReader.open(directory); this.indexSearcher = new IndexSearcher(this.indexReader); this.indexSearcher.setSimilarity(SIMILARITY); }
From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java
License:Apache License
/** * Indexes all the songs in the specified path. * The index is created in the specified directory "indexPath". If an index * already exists in that path, adds the songs to the existing index. * Each song is processed by the method//from ww w . j a v a 2s . c o m * {@link it.unipd.dei.ims.falcon.indexing.Indexing#indexSong} * which maps the song into a set of segments, each of one is mapped in a * Lucene {@link org.apache.lucene.document.Document}. * The segments have fixed length, specifically are constituted by * "hashPerSegment" hashes. There can be an overlap of "hashInOverlap" * hashes between two segments. The number of hash in the overlap must be * smaller than the number of hash per segments, otherwise an * {@link it.unipd.dei.ims.falcon.indexing.IndexingException} is thrown. * <p> * Once the index has been created or updated, writes a map into a file. * The map associates a set of features to each hash. Those features are * based on occurrence statistics of the hash in the entire collection. * In the event of an index update the map is re-built and the map file * is over-written. * @param data Input file. If it is a directory, index all files inside it. * @param index Falcon index. * @param hashPerSegment Number of hashes per segment. * @param hashInOverlap Number of overlapping hashes per segment. * @throws IndexingException */ public static void index(File data, File index, final int hashPerSegment, final int hashInOverlap, final int subsampling, final int nranks, final double minkurtosis, final TranspositionEstimator transpEst, boolean verbose) throws IndexingException, IOException { long start_time = System.currentTimeMillis(); if (hashPerSegment <= hashInOverlap) throw new IndexingException( "Number of hashes in the overlap cannot be equal to the number of hash per segment"); if (!data.canRead()) throw new IOException("cannot read input path"); if (data.isDirectory()) { for (File f : data.listFiles()) if (!f.canRead()) throw new IOException("cannot read one or more input files"); } if (!index.exists()) // if index is being created rather than updated index.mkdir(); if (!index.canWrite()) throw new IOException("cannot write to index directory"); SimpleFSDirectory indexDir = new SimpleFSDirectory(index, null); // initialize Lucene Analyzer and IndexWriter Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); final IndexWriter writer = new IndexWriter(indexDir, analyzer, !IndexReader.indexExists(indexDir), IndexWriter.MaxFieldLength.UNLIMITED); writer.setSimilarity(new HashSimilarity()); // transform chroma data into hashes and write into index File[] inputfiles = data.isDirectory() ? data.listFiles() : new File[] { data }; int fileNo = 0; for (final File file : inputfiles) { // if the current considered files exists and is not hidden if (file.exists() && !file.getName().startsWith(".")) { if (verbose) System.out.println(String.format("%10.3f%% - indexing %s", fileNo * 100. / inputfiles.length, file.getAbsolutePath())); final List<OutputStream> fout = new LinkedList<OutputStream>(); fout.add(new PipedOutputStream()); final PipedInputStream fin = new PipedInputStream((PipedOutputStream) fout.get(0)); Thread t = new Thread(new Runnable() { public void run() { try { ChromaMatrixUtils.convertChromaStreamIntoHashesStream(new FileReader(file), fout, nranks, transpEst, minkurtosis, subsampling); } catch (IOException ex) { // TODO do something better for this exception ... (might hang all ...) Logger.getLogger(Indexing.class.getName()).log(Level.SEVERE, null, ex); } } }); t.start(); indexSong(writer, fin, hashPerSegment, hashInOverlap, file.getAbsolutePath(), file.getAbsolutePath()); fileNo++; } } writer.optimize(); writer.close(); // additional falcon features PrintWriter pw = new PrintWriter(index.getAbsolutePath() + "/qpruning_features.map"); IndexReader reader = IndexReader.open(new SimpleFSDirectory(index)); int numSegments = reader.numDocs(); long total_hcf = numSegments * hashPerSegment; // total number of hashes in the collection TermEnum hashes = reader.terms(); // distinct hashes in the collection while (hashes.next()) { if (!hashes.term().field().equals("CONTENT")) { continue; } Term curHash = hashes.term(); pw.print(curHash.text() + "\t"); pw.print((double) reader.docFreq(curHash) / numSegments + "\t"); // normalized document frequency TermDocs curHash_pl = reader.termDocs(curHash); // posting list for the current hash // computation of the frequency of the current hash in the // entire collection -- value initialization long hcf = 0; // initializes the normalized maximum frequency value double nmf = 0; // initializes the normalized frequency for max computation double cur_nf = 0; // processes posting list entries while (curHash_pl.next()) { // computation of the normalized frequency for // the current hash cur_nf = (double) curHash_pl.freq() / hashPerSegment; // update max if necessary if (cur_nf > nmf) { nmf = cur_nf; } hcf += curHash_pl.freq(); } // prints normalized total collection frequency and // normalized maximum frequency for the current hash pw.print((double) hcf / total_hcf + "\t" + nmf + "\n"); } pw.flush(); pw.close(); long end_time = System.currentTimeMillis(); if (verbose) System.out.println(String.format("[INDEXING] - elapsed time: %10.3f", (end_time - start_time) / 1000.)); }
From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java
License:Apache License
/** * Prints information on the songs stored in the index in the specified path. * The specific information printed is that specified by the "option". * Available options are://from w ww. j a v a 2 s . c o m * <ul> * <li> "show_doc_ids": prints the internal index identifier of all the * segments in the index together with the title of the song which * the segment belongs to; * <li> "show_seg_ids": prints the internal index identifier of all the * segments in the index together with the segment identifier; * <li> "show_full_index": print all the distinct hashes in the index * and the posting list associated to each hash * * </ul> * * @param indexPath * full path to the folder where the index is stored * @param option * option which specified the requested information * * @throws IndexingException */ public static void indexUtils(String indexPath, String option) throws IndexingException { IndexReader reader; try { reader = IndexReader.open(new SimpleFSDirectory(new File(indexPath), null)); if (option.equals("show_doc_ids")) { // prints all the internal segment identifiers together with // the title of the song of the considered segment. // For instance, "[6] song2" denotes that the segment with // internal identifier "6" belongs to the song with title "song2" for (int d = 0; d < reader.numDocs(); d++) { System.out.println("[" + d + "] " + reader.document(d).getField("TITLE").stringValue()); } } else if (option.equals("show_seg_ids")) { // prints all the internal segment identifiers together with // the identifier of the segment. // For instance, "[8] song2_3" denotes that the third segment // of "song2" has internal identifier "8 for (int d = 0; d < reader.numDocs(); d++) { System.out.println("[" + d + "] " + reader.document(d).getField("ID").stringValue()); } } else if (option.equals("show_full_index")) { // print the full index, that is each hash with the associated // posting list TermEnum terms = reader.terms(); while (terms.next()) { System.out.print(terms.term() + " [SF: " + terms.docFreq() + "] <"); TermPositions poss = reader.termPositions(terms.term()); while (poss.next()) { System.out.print(" " + reader.document(poss.doc()).getField("ID").stringValue() + " (" + poss.freq() + "), "); } System.out.println(">"); } } } catch (CorruptIndexException ex) { throw new IndexingException("CorruptIndexException when accessing index for printing information"); } catch (IOException ex) { throw new IndexingException("IOException when accessing index for printing information"); } }
From source file:net.paissad.waqtsalat.utils.geoip.WorldCitiesLucene.java
License:Open Source License
/** * Connects to the Lucene index./* ww w . j a va 2s . c om*/ * * @throws IOException */ private static void connectToIndex() throws IOException { if (getIndexDir() == null) { File destDir = new File(LUCENE_INDEX_PATH); if (Platform.isWindows()) { setIndexDir(new SimpleFSDirectory(destDir, null)); } else { setIndexDir(new NIOFSDirectory(destDir, null)); } } else { logger.trace("Already connected to the lucene index directory ({})", indexDir.toString()); } }
From source file:org.apache.solr.core.backup.repository.LocalFileSystemRepository.java
License:Apache License
@Override public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException { try (FSDirectory dir = new SimpleFSDirectory(Paths.get(dirPath), NoLockFactory.INSTANCE)) { return dir.openInput(fileName, ctx); }/* ww w . j a v a2s.c o m*/ }
From source file:org.apache.solr.core.backup.repository.LocalFileSystemRepository.java
License:Apache License
@Override public String[] listAll(URI dirPath) throws IOException { try (FSDirectory dir = new SimpleFSDirectory(Paths.get(dirPath), NoLockFactory.INSTANCE)) { return dir.listAll(); }/*from w w w .j a v a 2 s . c om*/ }