List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java
License:Apache License
private void openExampleIndex() throws IOException { //Create a RAM-based index from our test data file RAMDirectory rd = new RAMDirectory(); IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8)); String line = br.readLine();/*from ww w . j a va2 s . com*/ final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); doc.add(new Field("location", st.nextToken(), textNoNorms)); doc.add(new Field("salary", st.nextToken(), textNoNorms)); doc.add(new Field("type", st.nextToken(), textNoNorms)); doc.add(new Field("description", st.nextToken(), textNoNorms)); writer.addDocument(doc); } line = br.readLine(); } writer.close(); //open searcher // this example never closes it reader! IndexReader reader = DirectoryReader.open(rd); searcher = new IndexSearcher(reader); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testGetBinaryDetails() throws Exception { File index = temporaryFolder.newFolder("indexWithBinaryData"); File indexShard = new File(index, "binaryShard"); if (!indexShard.mkdirs()) { throw new RuntimeException("Unable to create directory " + indexShard.getAbsolutePath()); }/*from www .j a v a2 s. c o m*/ String textFieldName = "textField"; String binaryFieldName = "binaryField"; String textFieldContent = "sample text"; byte[] bytesFieldContent = new byte[] { 1, 2, 3 }; IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexShard), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); Document document = new Document(); document.add(new Field(binaryFieldName, bytesFieldContent, Store.YES)); document.add(new Field(textFieldName, textFieldContent, Store.NO, Index.ANALYZED)); indexWriter.addDocument(document); indexWriter.close(true); DeployClient deployClient = new DeployClient(miniCluster.getProtocol()); IndexState indexState = deployClient.addIndex(index.getName(), index.getAbsolutePath(), 1).joinDeployment(); assertEquals(IndexState.DEPLOYED, indexState); LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()) .parse(textFieldName + ": " + textFieldContent); final Hits hits = client.search(query, new String[] { index.getName() }, 10); assertNotNull(hits); assertEquals(1, hits.getHits().size()); final Hit hit = hits.getHits().get(0); final MapWritable details = client.getDetails(hit); final Set<Writable> keySet = details.keySet(); assertEquals(1, keySet.size()); final Writable writable = details.get(new Text(binaryFieldName)); assertNotNull(writable); assertThat(writable, instanceOf(BytesWritable.class)); BytesWritable bytesWritable = (BytesWritable) writable; bytesWritable.setCapacity(bytesWritable.getLength());// getBytes() returns // the full array assertArrayEquals(bytesFieldContent, bytesWritable.getBytes()); client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testFieldSortWithNoResultShard() throws Exception { String indexName = "sortIndex"; File sortIndex = temporaryFolder.newFolder(indexName); File sortShard1 = new File(sortIndex, "sortIndex1"); File sortShard2 = new File(sortIndex, "sortIndex2"); IndexWriter indexWriter1 = new IndexWriter(FSDirectory.open(sortShard1), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); IndexWriter indexWriter2 = new IndexWriter(FSDirectory.open(sortShard2), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); Document document = new Document(); document.add(new Field("text", "abc", Field.Store.YES, Index.NOT_ANALYZED)); document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l)); indexWriter1.addDocument(document); indexWriter1.close();//from w ww . j a v a 2 s . c om document = new Document(); document.add(new Field("text", "abc2", Field.Store.YES, Index.NOT_ANALYZED)); document.add(new NumericField("timesort", Field.Store.YES, false).setLongValue(1234567890123l)); indexWriter2.addDocument(document); indexWriter2.close(); miniCluster.deployIndex(indexName, sortIndex, 1); // query and compare results LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); Sort sort = new Sort(new SortField[] { new SortField("timesort", SortField.LONG) }); // query both documents Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:ab*"); Hits hits = client.search(query, null, 20, sort); assertEquals(2, hits.size()); // query only one document query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2"); hits = client.search(query, null, 20, sort); assertEquals(1, hits.size()); // query only one document on one node miniCluster.shutdownNode(0); TestUtil.waitUntilIndexBalanced(miniCluster.getProtocol(), indexName); query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()).parse("text:abc2"); hits = client.search(query, null, 20, sort); assertEquals(1, hits.size()); client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@SuppressWarnings("unchecked") @Test/*from w ww . ja va2 s.c o m*/ public void testSortedSearch() throws Exception { // write and deploy test index String queryTerm = "2"; String textFieldName = "textField"; File sortIndex = temporaryFolder.newFolder("sortIndex2"); File sortShard = new File(sortIndex, "sortShard"); String sortFieldName = "sortField"; IndexWriter indexWriter = new IndexWriter(FSDirectory.open(sortShard), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < 20; i++) { Document document = new Document(); document.add(new Field(sortFieldName, "" + i, Store.NO, Index.NOT_ANALYZED)); String textField = "sample text"; if (i % 2 == 0) {// produce some different scores for (int j = 0; j < i; j++) { textField += " " + queryTerm; } } document.add(new Field(textFieldName, textField, Store.NO, Index.ANALYZED)); indexWriter.addDocument(document); } indexWriter.close(true); DeployClient deployClient = new DeployClient(miniCluster.getProtocol()); IndexState indexState = deployClient.addIndex(sortIndex.getName(), sortIndex.getAbsolutePath(), 1) .joinDeployment(); assertEquals(IndexState.DEPLOYED, indexState); // query and compare results LuceneClient client = new LuceneClient(miniCluster.createInteractionProtocol()); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()) .parse(textFieldName + ": " + queryTerm); Sort sort = new Sort(new SortField[] { new SortField(sortFieldName, SortField.INT) }); final Hits hits = client.search(query, new String[] { sortIndex.getName() }, 20, sort); assertNotNull(hits); List<Hit> hitsList = hits.getHits(); for (final Hit hit : hitsList) { writeToLog(hit); } assertEquals(9, hits.size()); assertEquals(9, hitsList.size()); assertEquals(1, hitsList.get(0).getSortFields().length); for (int i = 0; i < hitsList.size() - 1; i++) { int compareTo = hitsList.get(i).getSortFields()[0].compareTo(hitsList.get(i + 1).getSortFields()[0]); assertTrue("results not after field", compareTo == 0 || compareTo == -1); } client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java
License:Apache License
@Test public void testFilteredSearch() throws Exception { // write and deploy test index File filterIndex = temporaryFolder.newFolder("filterIndex"); File filterShard = new File(filterIndex, "filterShard"); String textFieldName = "textField"; String filterFieldName = "filterField"; IndexWriter indexWriter = new IndexWriter(FSDirectory.open(filterShard), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); for (int i = 0; i < 100; i++) { Document document = new Document(); document.add(new Field(textFieldName, "sample " + i, Store.YES, Index.NOT_ANALYZED)); document.add(new Field(filterFieldName, "" + (i % 10), Store.YES, Index.NOT_ANALYZED)); indexWriter.addDocument(document); }//from www .j a va2s. c o m indexWriter.close(true); DeployClient deployClient = new DeployClient(miniCluster.createInteractionProtocol()); IndexState indexState = deployClient.addIndex(filterIndex.getName(), filterIndex.getAbsolutePath(), 1) .joinDeployment(); assertEquals(IndexState.DEPLOYED, indexState); // build filter for terms in set {i | (i % 10) == 3}. LuceneClient client = new LuceneClient(miniCluster.getZkConfiguration()); TermQuery filterQuery = new TermQuery(new Term(filterFieldName, "3")); QueryWrapperFilter filter = new QueryWrapperFilter(filterQuery); final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer()) .parse(textFieldName + ":" + "sample*3"); final Hits hits = client.search(query, new String[] { filterIndex.getName() }, 100, null, filter); assertNotNull(hits); List<Hit> hitsList = hits.getHits(); for (final Hit hit : hitsList) { writeToLog(hit); } assertEquals(10, hits.size()); assertEquals(10, hitsList.size()); // check that returned results conform to the filter for (final Hit hit : hitsList) { MapWritable mw = client.getDetails(hit); Text text = (Text) mw.get(new Text("textField")); assertNotNull(text); String[] parts = text.toString().split(" "); assertTrue(parts.length == 2); int num = Integer.valueOf(parts[1]); assertTrue((num % 10) == 3); } client.close(); }
From source file:com.dasasian.chok.lucene.integration.LuceneComplianceTest.java
License:Apache License
private static void writeIndex(File file, List<Document> documents) throws IOException { file.mkdirs();//w w w.j a va 2s. c o m assertTrue(file.exists()); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(file), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED); for (Document document : documents) { indexWriter.addDocument(document); } indexWriter.close(true); }
From source file:com.dasasian.chok.lucene.testutil.LuceneIndexGenerator.java
License:Apache License
public void createIndex(TestIndex testIndex, String[] wordList, int wordsPerDoc, int docsPerShard) { long startTime = System.currentTimeMillis(); try {//w w w .j ava2s . co m for (File index : testIndex.getShardFiles()) { int count = wordList.length; Random random = new Random(System.currentTimeMillis()); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(index), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < docsPerShard; i++) { // generate text first StringBuilder text = new StringBuilder(); for (int j = 0; j < wordsPerDoc; j++) { text.append(wordList[random.nextInt(count)]); text.append(" "); } Document document = new Document(); document.add(new Field("key", "key_" + i, Field.Store.NO, Field.Index.NOT_ANALYZED)); document.add(new Field("text", text.toString(), Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(document); } indexWriter.optimize(); indexWriter.close(); System.out.println("Index created with : " + docsPerShard + " documents in " + (System.currentTimeMillis() - startTime) + " ms"); // when we are ready we move the index to the final destination and write // a done flag file we can use in shell scripts to identify the move is // done. new File(index, "done").createNewFile(); } } catch (Exception e) { throw new RuntimeException("Unable to write index", e); } }
From source file:com.devb.search.IndicIndexer.java
License:Apache License
@Override public void makeIndex() { String indexPath = servletContext.getRealPath("/") + "/hindex/"; String docsPath = servletContext.getRealPath("/") + "/hdocs/"; boolean create = true; final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path\n"); return;/*from w ww .j a v a2 s . c o m*/ } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'...\n"); org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new HindiAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); if (docDir.canRead()) { if (docDir.isDirectory()) { String[] files = docDir.list(); if (files != null) { for (int i = 0; i < files.length; i++) { File file = new File(docDir, files[i]); FileInputStream fileInputStream = new FileInputStream(file); BufferedReader reader = new BufferedReader( new InputStreamReader(fileInputStream, "UTF-8")); Tokenizer tokenizer = new StandardTokenizer(reader); CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class); tokenizer.reset(); int lineNumber = 0; try { while (tokenizer.incrementToken()) { Document doc = new Document(); Field pathField = new StringField("path", file.getName(), Field.Store.YES); doc.add(pathField); TextField nField = new TextField("linenumber", new Integer(++lineNumber).toString(), Store.YES); doc.add(nField); TextField field = new TextField("contents", termAtt.toString(), Store.YES); doc.add(field); writer.addDocument(doc); } System.out.println("Adding " + file + "\n"); } catch (Exception e) { e.printStackTrace(); } finally { tokenizer.close(); reader.close(); fileInputStream.close(); } } } } } writer.close(); Date end = new Date(); System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n"); } catch (IOException e) { System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.devb.search.StandardIndexer.java
License:Apache License
@Override public void makeIndex() { String indexPath = servletContext.getRealPath("/") + "/index/"; String docsPath = servletContext.getRealPath("/") + "/docs/"; boolean create = true; final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path\n"); return;/*from w w w.j a v a 2s.c om*/ } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'...\n"); org.apache.lucene.store.Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); if (docDir.canRead()) { if (docDir.isDirectory()) { String[] files = docDir.list(); if (files != null) { for (int i = 0; i < files.length; i++) { File file = new File(docDir, files[i]); FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); String line; int lineNumber = 0; try { while ((line = br.readLine()) != null) { Document doc = new Document(); Field pathField = new StringField("path", file.getName(), Field.Store.YES); doc.add(pathField); TextField nField = new TextField("linenumber", new Integer(++lineNumber).toString(), Store.YES); doc.add(nField); TextField field = new TextField("contents", line, Store.YES); doc.add(field); writer.addDocument(doc); } System.out.println("Adding " + file + "\n"); } catch (Exception e) { e.printStackTrace(); } finally { br.close(); fr.close(); } } } } } writer.close(); Date end = new Date(); System.out.println((end.getTime() - start.getTime()) + " total milliseconds\n"); } catch (IOException e) { System.out.println("Caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.docdoku.server.IndexerBean.java
License:Open Source License
private void addDoc(IndexWriter pIndexWriter, Reader pContentReader, String pFullName) throws FileNotFoundException, CorruptIndexException, IOException { Document doc = new Document(); doc.add(new Field("fullName", pFullName, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("content", pContentReader)); pIndexWriter.addDocument(doc); }