List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.redhat.satellite.search.index.ngram.tests.NGramTestSetup.java
License:Open Source License
/** * Creates an index in RAM// w ww .ja v a2 s .co m * */ public void setUp() throws Exception { super.setUp(); initItems(); this.stanDir = new RAMDirectory(); IndexWriter stanWriter = new IndexWriter(this.stanDir, new StandardAnalyzer(), true); this.ngramDir = new RAMDirectory(); IndexWriter ngramWriter = new IndexWriter(this.ngramDir, new NGramAnalyzer(min_ngram, max_ngram), true); for (Map<String, String> item : items) { String name = item.get("name"); String descp = item.get("description"); Document doc = new Document(); doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("description", descp, Field.Store.YES, Field.Index.TOKENIZED)); stanWriter.addDocument(doc); ngramWriter.addDocument(doc); } stanWriter.close(); ngramWriter.close(); }
From source file:com.redsqirl.SimpleFileIndexer.java
License:Open Source License
public void indexFileWithIndexWriter(IndexWriter indexWriter, File f, String suffix) throws IOException { if (f.isHidden() || f.isDirectory() || !f.canRead() || !f.exists()) { return;/*from w w w . j ava 2s. c om*/ } if (suffix != null && !f.getName().endsWith(suffix)) { return; } logger.info("Indexing file " + f.getCanonicalPath()); Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED)); indexWriter.addDocument(doc); }
From source file:com.revorg.goat.IndexManager.java
License:Open Source License
/** * Creates documents inside of the Lucene Index * * @param writer Index Writer Class * @param rs Result Set for the row of data * @param columnNamesArray The array of column names to be added to the document * @param indexTypeArray The array of column types to be added to the document * @param tempHTMLDir The temporary HTML directory for HTML publishing * @throws Exception/*from www . j a va 2 s. co m*/ * @return ActionResult */ private static String createDocument(IndexWriter writer, ResultSet rs, String columnNamesArray[], String indexTypeArray[], String tempHTMLDir) { try { final Document doc = new Document(); int columns = columnNamesArray.length; /* public Field(String name, String value, Field.Store store, Field.Index index) Store: COMPRESS - Store the original field value in the index in a compressed form. This is useful for long documents and for binary valued fields. YES -Store the original field value in the index. This is useful for short texts like a document's title which should be displayed with the results. The value is stored in its original form, i.e. no analyzer is used before it is stored. NO - Do not store the field value in the index. Index: ANALYZED - Index the tokens produced by running the field's value through an Analyzer. This is useful for common text NOT_ANALYZED - Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used the value will be stored as a single term. This is useful for unique Ids like product numbers. NO - Do not index the field value. This field can thus not be searched, but one can still access its contents provided it is stored. */ for (int i = 0; i < columns; i++) { String columnName = columnNamesArray[i].trim().toLowerCase(); String columnIndexType = indexTypeArray[i]; //Map Column Type To Array String columnValue = rs.getString(columnName); //Get Value But Result Sets are at 1 Not 0 if (columnValue == null) { //Lucene Does Not Like Nulls columnValue = ""; } //System.out.println(" Values: " + columnName + " " + columnIndexType + " " + columnValue + " " + columnValue.length()); //Can't Add Triggers if (columnIndexType.equalsIgnoreCase("TriggerUpdate") == false || columnIndexType.equalsIgnoreCase("TriggerDelete") == false) { if (columnIndexType.equalsIgnoreCase("PrimaryKey") || columnIndexType.equalsIgnoreCase("Keyword") || columnIndexType.equalsIgnoreCase("Date")) { //Format Dates to Correct for Sorting if (columnIndexType.equalsIgnoreCase("Date")) { columnValue = columnValue.replace("/", ""); } doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NOT_ANALYZED)); } //UnIndexed of UnIndexed else if (columnIndexType.equalsIgnoreCase("UnIndexed")) { doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NO)); } else if (columnIndexType.equalsIgnoreCase("Text")) { doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.ANALYZED)); } else if (columnIndexType.equalsIgnoreCase("UnStored") || columnIndexType.equalsIgnoreCase("HTML")) { if (columnIndexType.equalsIgnoreCase("HTML") && columnValue.length() != 0) { String htmlString = tempHTMLDir + Utilities.CreateUUID() + ".html"; File htmlFile = new File(htmlString); BufferedWriter out = new BufferedWriter(new FileWriter(htmlString)); out.write(columnValue); out.close(); //Parse Document FileInputStream fis = new FileInputStream(htmlFile); HTMLParser parser = new HTMLParser(fis); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.add(new Field(columnName, parser.getReader())); //Parse HTML } //UnStored Field else { doc.add(new Field(columnName, columnValue, Field.Store.NO, Field.Index.ANALYZED)); } } else if (columnIndexType.equalsIgnoreCase("Binary")) { doc.add(new Field(columnName, columnValue, Field.Store.COMPRESS, Field.Index.NO)); } } } //Add Document Here //System.out.println(doc); writer.addDocument(doc); ActionResult = "Success"; return ActionResult; } catch (Exception e) { ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage(); //System.out.println("Failure of DbSchema File: " + xmlFile); } ActionResult = "Failure"; return ActionResult + ActionResultError; }
From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java
License:Apache License
/** * /*w w w. j a v a2s.c o m*/ * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat.java
License:Apache License
/** Make sure the final sub-block(s) are not skipped. */ public void testFinalBlock() throws Exception { Directory d = newDirectory();/*from www . j a v a2 s. c o m*/ IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random()))); for (int i = 0; i < 25; i++) { Document doc = new Document(); doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO)); doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO)); w.addDocument(doc); } w.forceMerge(1); DirectoryReader r = DirectoryReader.open(w); assertEquals(1, r.leaves().size()); RocanaFieldReader field = (RocanaFieldReader) r.leaves().get(0).reader().fields().terms("field"); // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z): RocanaStats stats = field.getStats(); assertEquals(0, stats.floorBlockCount); assertEquals(2, stats.nonFloorBlockCount); r.close(); w.close(); d.close(); }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private void addDocs(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newTextField("content", "aaa", Field.Store.NO)); writer.addDocument(doc); }// www. j a v a 2 s. c om }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private void addDocs2(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newTextField("content", "bbb", Field.Store.NO)); writer.addDocument(doc); }/*from w ww . j a va 2 s. c o m*/ }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private void addDocs3(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newTextField("content", "ccc", Field.Store.NO)); doc.add(newStringField("id", "" + i, Field.Store.YES)); writer.addDocument(doc); }/*from w ww . j a v a 2s . c o m*/ }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
@Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random()); final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { int num = TestUtil.nextInt(random(), 30, 60); IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random())); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = newWriter(dir, config); for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setTokenized(random().nextBoolean()); customType.setOmitNorms(random().nextBoolean()); Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128), customType);// w ww .j a v a 2 s . c o m doc.add(field); } writer.addDocument(doc); } if (random().nextBoolean()) { writer.forceMerge(1); } writer.commit(); assertEquals((i + 1) * docsPerRound, writer.maxDoc()); writer.close(); } dir.close(); }
From source file:com.search.lucene.demo.facet.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring) Document doc = new Document(); doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc);//from w w w .j a v a 2 s . c o m doc = new Document(); doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); writer.close(); }