List of usage examples for org.apache.lucene.store RAMDirectory RAMDirectory
public RAMDirectory()
From source file:com.github.le11.nls.lucene.UIMABaseAnalyzerTest.java
License:Apache License
@Before public void setUp() throws Exception { dir = new RAMDirectory(); analyzer = new UIMABaseAnalyzer("/WhitespaceTokenizer.xml", "org.apache.uima.TokenAnnotation"); writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_32, analyzer)); }
From source file:com.github.le11.nls.lucene.UIMAPayloadsAnalyzerTest.java
License:Apache License
@Before public void setUp() throws Exception { dir = new RAMDirectory(); analyzer = new UIMAPayloadsAnalyzer("/HmmTaggerAggregate.xml"); writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_33, analyzer)); }
From source file:com.github.le11.nls.lucene.UIMATypeBasedSimilarityTest.java
License:Apache License
@Before public void setUp() throws Exception { dir = new RAMDirectory(); analyzer = new UIMAPayloadsAnalyzer("/HmmTaggerAggregate.xml"); writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_33, analyzer)); Document doc = new Document(); doc.add(new Field("title", "this is a dummy title containing an entity for London", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "there is some content written here about the british city", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer);// ww w.j a v a2 s. co m writer.commit(); // try the search over the first doc IndexSearcher indexSearcher = new IndexSearcher(writer.getReader()); TopDocs result = indexSearcher.search(new MatchAllDocsQuery("contents"), 10); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getFieldable("title")); assertNotNull(d.getFieldable("contents")); // add a second doc doc = new Document(); doc.add(new Field("title", "some title regarding some article written in English", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "this is the content of the article about", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc, analyzer); writer.commit(); }
From source file:com.github.lucene.store.jdbc.JdbcDirectoryBenchmarkITest.java
License:Apache License
@Before public void setUp() throws Exception { jdbcDirectory = new JdbcDirectory(dataSource, createDialect(), "TEST"); ramDirectory = new RAMDirectory(); fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath("target/index")); final Connection con = DataSourceUtils.getConnection(dataSource); ((JdbcDirectory) jdbcDirectory).create(); DataSourceUtils.commitConnectionIfPossible(con); DataSourceUtils.releaseConnection(con); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java
License:Open Source License
@Test public void testArabicRootIndex() throws IOException, ParseException, URISyntaxException { Directory index = new RAMDirectory(); ArabicRootExtractorAnalyzer analyzer = new ArabicRootExtractorAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); final AtomicInteger id = new AtomicInteger(0); IndexWriter w = new IndexWriter(index, config); URL url = ArabicRootExtractorStemmer.class.getClassLoader() .getResource("com/github/msarhan/lucene/fateha.txt"); if (url == null) { fail("Not able to load data file!"); }/*from ww w . j a v a2s . c o m*/ Files.lines(new File(url.toURI()).toPath()) .forEach(line -> addDoc(w, line, String.valueOf(id.incrementAndGet()))); w.close(); String querystr = ""; Query q = new QueryParser("title", analyzer).parse(querystr); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, hitsPerPage); //print(searcher, docs); assertEquals(2, docs.scoreDocs.length); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java
License:Open Source License
@Test public void testInlineStemmer() throws IOException, ParseException { //Initialize the index Directory index = new RAMDirectory(); Analyzer analyzer = new ArabicRootExtractorAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); Document doc = new Document(); doc.add(new StringField("number", "1", Field.Store.YES)); doc.add(new TextField("title", "?? ? ? ??", Field.Store.YES));/*from w ww.j a v a2 s .c om*/ writer.addDocument(doc); doc = new Document(); doc.add(new StringField("number", "2", Field.Store.YES)); doc.add(new TextField("title", "? ?? ? ?", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); doc.add(new StringField("number", "3", Field.Store.YES)); doc.add(new TextField("title", "? ??", Field.Store.YES)); writer.addDocument(doc); writer.close(); //~ //Query the index String queryStr = ""; Query query = new QueryParser("title", analyzer).parse(queryStr); int hitsPerPage = 5; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(query, hitsPerPage, Sort.INDEXORDER); ScoreDoc[] hits = docs.scoreDocs; //~ //Print results /* System.out.println("Found " + hits.length + " hits:"); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); System.out.printf("\t(%s): %s\n", d.get("number"), d.get("title")); } */ //~ }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {/*from w w w .ja va 2 s . c o m*/ Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }
From source file:com.globalsight.ling.lucene.Index.java
License:Apache License
/** * Re-creates the index by batch-loading entries into it. * The index must be closed before calling this method. * Caller must use//from w ww. j ava 2 s .c o m * <PRE> * close(); * try * { * batchOpen(); * ... * batchAddDocument(); * ... * } * finally * { * batchDone(); * } */ public void batchOpen() throws IOException { synchronized (m_state) { if (m_state != STATE_CLOSED) { throw new IOException("index is open"); } m_state = STATE_CREATING; } // setup RAMDirectory and writer m_ramdir = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); //config.setSimilarity(m_similarity); m_ramIndexWriter = new IndexWriter(m_ramdir, config); //m_ramIndexWriter.mergeFactor = 10000; }
From source file:com.google.gerrit.lucene.LuceneAccountIndex.java
License:Apache License
private static Directory dir(Schema<AccountState> schema, Config cfg, SitePaths sitePaths) throws IOException { if (LuceneIndexModule.isInMemoryTest(cfg)) { return new RAMDirectory(); }/*from w ww . jav a2 s . c o m*/ Path indexDir = LuceneVersionManager.getDir(sitePaths, ACCOUNTS + "_", schema); return FSDirectory.open(indexDir); }
From source file:com.google.gerrit.lucene.LuceneChangeIndex.java
License:Apache License
@AssistedInject LuceneChangeIndex(@GerritServerConfig Config cfg, SitePaths sitePaths, @IndexExecutor(INTERACTIVE) ListeningExecutorService executor, Provider<ReviewDb> db, ChangeData.Factory changeDataFactory, FillArgs fillArgs, @Assisted Schema<ChangeData> schema, @Assisted @Nullable String base) throws IOException { this.sitePaths = sitePaths; this.fillArgs = fillArgs; this.executor = executor; this.db = db; this.changeDataFactory = changeDataFactory; this.schema = schema; this.useDocValuesForSorting = schema.getVersion() >= 15; this.idSortField = sortFieldName(LegacyChangeIdPredicate.idField(schema)); CustomMappingAnalyzer analyzer = new CustomMappingAnalyzer(new StandardAnalyzer(CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);//from ww w. j av a 2 s. c om queryBuilder = new QueryBuilder(analyzer); BooleanQuery .setMaxClauseCount(cfg.getInt("index", "defaultMaxClauseCount", BooleanQuery.getMaxClauseCount())); GerritIndexWriterConfig openConfig = new GerritIndexWriterConfig(cfg, "changes_open"); GerritIndexWriterConfig closedConfig = new GerritIndexWriterConfig(cfg, "changes_closed"); SearcherFactory searcherFactory = newSearcherFactory(); if (cfg.getBoolean("index", "lucene", "testInmemory", false)) { openIndex = new SubIndex(new RAMDirectory(), "ramOpen", openConfig, searcherFactory); closedIndex = new SubIndex(new RAMDirectory(), "ramClosed", closedConfig, searcherFactory); } else { Path dir = base != null ? Paths.get(base) : LuceneVersionManager.getDir(sitePaths, schema); openIndex = new SubIndex(dir.resolve(CHANGES_OPEN), openConfig, searcherFactory); closedIndex = new SubIndex(dir.resolve(CHANGES_CLOSED), closedConfig, searcherFactory); } }