List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:action.indexing.Fragments.java
License:Apache License
public void docBoostMethod() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED); // START/*from www . java2 s. com*/ Document doc = new Document(); String senderEmail = getSenderEmail(); String senderName = getSenderName(); String subject = getSubject(); String body = getBody(); doc.add(new Field("senderEmail", senderEmail, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("senderName", senderName, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", body, Field.Store.NO, Field.Index.ANALYZED)); String lowerDomain = getSenderDomain().toLowerCase(); if (isImportant(lowerDomain)) { doc.setBoost(1.5F); //1 } else if (isUnimportant(lowerDomain)) { doc.setBoost(0.1F); //2 } writer.addDocument(doc); // END writer.close(); /* #1 Good domain boost factor: 1.5 #2 Bad domain boost factor: 0.1 */ }
From source file:action.indexing.IndexingTest.java
License:Apache License
protected void setUp() throws Exception { //1 directory = new RAMDirectory(); IndexWriter writer = getWriter(); //2 for (int i = 0; i < ids.length; i++) { //3 Document doc = new Document(); doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); }/*from w ww . ja v a 2 s .c o m*/ writer.close(); }
From source file:action.indexing.IndexingTest.java
License:Apache License
public void testMaxFieldLength() throws IOException { assertEquals(1, getHitCount("contents", "bridges")); //1 IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), //2 new IndexWriter.MaxFieldLength(1)); //2 Document doc = new Document(); // 3 doc.add(new Field("contents", "these bridges can't be found", // 3 Field.Store.NO, Field.Index.ANALYZED)); // 3 writer.addDocument(doc); // 3 writer.close(); // 3 assertEquals(1, getHitCount("contents", "bridges")); //4 }
From source file:action.indexing.VerboseIndexing.java
License:Apache License
private void index() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); writer.setInfoStream(System.out); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); }/*from ww w .j a v a 2 s .co m*/ writer.optimize(); writer.close(); }
From source file:analysis.SynonymAnalyzerTest.java
License:Apache License
public void setUp() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, synonymAnalyzer, //#1 IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); //#2 writer.addDocument(doc); writer.close();/*from w ww . j ava2 s. c o m*/ searcher = new IndexSearcher(directory, true); }
From source file:antnlp.opie.indexsearch.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8); BufferedReader bufReader = new BufferedReader(iReader); String docLine = null;//w w w . jav a 2 s . c om while ((docLine = bufReader.readLine()) != null) { docLine = docLine.trim(); if (docLine.length() == 0) continue; String[] column = docLine.split("\\t"); System.out.println(column[0]); System.out.println(column[1]); // make a new, empty document Document doc = new Document(); // Add the id of the file as a field named "id". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field docidField = new StringField("docid", column[0], Field.Store.YES); doc.add(docidField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", column[1], Field.Store.YES)); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + column[0]); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + column[0]); writer.updateDocument(new Term("docid", column[0]), doc); } } iReader.close(); bufReader.close(); }
From source file:aos.lucene.analysis.codec.MetaphoneAnalyzerTest.java
License:Apache License
public void testKoolKat() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new MetaphoneReplacementAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("contents", "cool cat", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close();/*from ww w .j a v a 2 s . c om*/ IndexSearcher searcher = new IndexSearcher(directory); Query query = new QueryParser(Version.LUCENE_46, "contents", analyzer).parse("kool kat"); TopDocs hits = searcher.search(query, 1); assertEquals(1, hits.totalHits); int docID = hits.scoreDocs[0].doc; Document storedDoc = searcher.doc(docID); assertEquals("cool cat", storedDoc.get("contents")); searcher.close(); }
From source file:aos.lucene.analysis.Fragments.java
License:Apache License
public void frag2() throws Exception { IndexWriter writer = null; Document doc = new Document(); doc.add(new Field("title", "This is the title", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("contents", "...document contents...", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); }
From source file:aos.lucene.analysis.keyword.KeywordAnalyzerTest.java
License:Apache License
public void setUp() throws Exception { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("partnum", "Q36", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); //A doc.add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close();//w ww. ja va 2 s . c o m searcher = new IndexSearcher(directory); }
From source file:aos.lucene.analysis.positional.PositionalPorterStopAnalyzerTest.java
License:Apache License
public void setUp() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, porterAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("contents", "The quick brown fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.close();// w w w. ja va 2s.c o m searcher = new IndexSearcher(directory, true); parser = new QueryParser(Version.LUCENE_46, "contents", porterAnalyzer); }