List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.vnet.demo.service.lucene.LuceneService.java
License:Apache License
public void addDoc(DocumentData documentData) { IndexWriterConfig config = new IndexWriterConfig(version, analyzer); IndexWriter write = null; try {/*w w w . j a v a 2s. co m*/ write = new IndexWriter(index, config); Document doc = new Document(); doc.add(new LongField("id", documentData.getId(), Field.Store.YES)); doc.add(new TextField("title", documentData.getTitle(), Field.Store.YES)); doc.add(new TextField("summary", documentData.getSummary(), Field.Store.YES)); doc.add(new TextField("context", documentData.getContext(), Field.Store.YES)); doc.add(new LongField("createDate", documentData.getCreateDate(), Field.Store.YES)); write.addDocument(doc); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeQuietly(write); } }
From source file:com.weasel.lucene.ik.sample.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; ////from w ww. ja v a 2s .c om String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = IndexReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.work.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w. j ava 2 s.c o m*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongPoint("modified", lastModified)); doc.add(new StringField("modified", lastModified + "", Field.Store.YES)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.wrmsr.search.dsl.SearchServiceImpl.java
License:Apache License
@Override public synchronized void addDoc(Doc doc) throws IOException { if (!this.indexWriter.isPresent()) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); this.indexWriter = Optional.of(indexWriter); }//from w ww . j a va 2 s.c o m IndexWriter indexWriter = this.indexWriter.get(); Document document = new Document(); document.add(new Field("title", doc.getTitle(), FIELD_TYPE)); document.add(new Field("isbn", doc.getIsbn(), FIELD_TYPE)); indexWriter.addDocument(document); }
From source file:com.yangxu.searchengine.index.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory./* w w w.j a v a 2 s . c o m*/ * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException */ private void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read // title String titleValue = null; // content String contentValue = null; String urlValue = null; String indextimeValue = null; String uploadtimeValue = null; if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); LineNumberReader reader = new LineNumberReader(new InputStreamReader(fis, "UTF-8")); String line = null; StringBuilder sb = new StringBuilder(); while ((line = reader.readLine()) != null) { // int lineNumber = reader.getLineNumber(); switch (reader.getLineNumber()) { case 1: urlValue = line; break; case 2: uploadtimeValue = line; break; case 3: titleValue = line.split(":")[1]; break; case 4: break; default: sb.append(line); break; } /* * if (reader.getLineNumber() == 1) { urlValue = line; } * * if (reader.getLineNumber() == 3) { titleValue = * line.split(":")[1]; } else if (reader.getLineNumber() * > 4) { sb.append(line); } */ } contentValue = sb.toString(); reader.close(); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field urlField = new Field("url", urlValue, Field.Store.YES, Field.Index.NOT_ANALYZED); urlField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(urlField); Field titleField = new Field("title", titleValue, Field.Store.YES, Field.Index.ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(titleField); Field contentField = new Field("content", contentValue, Field.Store.YES, Field.Index.ANALYZED); contentField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(contentField); // Add the last modified date of the file a field named // "modified". // Use a NumericField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd HH:mm:ss"); //Calendar cal = Calendar.getInstance(); // //timeValue = formatter.format(cal.getTime()); Date now = new Date(); indextimeValue = formatter.format(now); Field indextimeField = new Field("indextime", indextimeValue, Field.Store.YES, Field.Index.NOT_ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(indextimeField); Field uploadtimeField = new Field("uploadtime", uploadtimeValue, Field.Store.YES, Field.Index.NOT_ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(uploadtimeField); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. // doc.add(new Field("contents", new BufferedReader( // new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("url", urlValue), doc); writer.updateDocument(new Term("title", titleValue), doc); writer.updateDocument(new Term("content", contentValue), doc); writer.updateDocument(new Term("indextime", String.valueOf(indextimeValue)), doc); writer.updateDocument(new Term("uploadtime", String.valueOf(uploadtimeValue)), doc); } } finally { fis.close(); } } } }
From source file:com.Yasna.forum.database.DbSearchIndexer.java
License:Open Source License
/** * Indexes an indivual message. The connection is assumed to be open when * passed in and will remain open after the method is done executing. *//*from w w w. java 2s . c o m*/ protected final void addMessageToIndex(IndexWriter writer, int messageID, String subject, String body, int userID, int threadID, int forumID, java.util.Date creationDate) throws IOException { if (writer == null) { return; } //Ignore messages with a null subject or body. if (subject == null || body == null) { return; } Document doc = new Document(); doc.add(Field.Keyword("messageID", Integer.toString(messageID))); doc.add(new Field("userID", Integer.toString(userID), false, true, false)); doc.add(new Field("threadID", Integer.toString(threadID), false, true, false)); doc.add(new Field("forumID", Integer.toString(forumID), false, true, false)); doc.add(Field.UnStored("subject", subject)); doc.add(Field.UnStored("body", body)); doc.add(new Field("creationDate", DateField.dateToString(creationDate), false, true, false)); writer.addDocument(doc); }
From source file:com.yida.framework.lucene5.facet.DistanceFacetsExample.java
License:Creative Commons License
/** * /* www. j a va2s . c o m*/ */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer())); // ??(?FacetField) Document doc = new Document(); doc.add(new DoubleDocValuesField("latitude", 40.759011)); doc.add(new DoubleDocValuesField("longitude", -73.9844722)); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleDocValuesField("latitude", 40.718266)); doc.add(new DoubleDocValuesField("longitude", -74.007819)); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleDocValuesField("latitude", 40.7051157)); doc.add(new DoubleDocValuesField("longitude", -74.0088305)); writer.addDocument(doc); /*doc.add(new DoubleField("latitude", 40.759011, Field.Store.YES)); doc.add(new DoubleField("longitude", -73.9844722, Field.Store.YES)); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleField("latitude", 40.718266, Field.Store.YES)); doc.add(new DoubleField("longitude", -74.007819, Field.Store.YES)); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleField("latitude", 40.7051157, Field.Store.YES)); doc.add(new DoubleField("longitude", -74.0088305, Field.Store.YES)); writer.addDocument(doc); */ searcher = new IndexSearcher(DirectoryReader.open(writer, true)); writer.commit(); writer.close(); }
From source file:com.zghw.lucene.demo.AssociationsFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer()); IndexWriter indexWriter = new IndexWriter(indexDir, iwc); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); // 3 occurrences for tag 'lucene' doc.add(new IntAssociationFacetField(3, "tags", "lucene")); // 87% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); // 1 occurrence for tag 'lucene' doc.add(new IntAssociationFacetField(1, "tags", "lucene")); // 2 occurrence for tag 'solr' doc.add(new IntAssociationFacetField(2, "tags", "solr")); // 75% confidence level of genre 'computing' doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing")); // 34% confidence level of genre 'software' doc.add(new FloatAssociationFacetField(0.34f, "genre", "software")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();//w ww . j ava2 s . com taxoWriter.close(); }
From source file:com.zghw.lucene.demo.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: Document doc = new Document(); doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO)); doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO)); writer.addDocument(doc);// w w w .j av a 2s. co m doc = new Document(); doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO)); doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO)); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); writer.close(); }
From source file:com.zghw.lucene.demo.ExpressionAggregationFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/*from w w w .j a va2 s . c om*/ taxoWriter.close(); }