List of usage examples for org.apache.lucene.document Document Document
public Document()
From source file:com.aliasi.lingmed.medline.IndexMedline.java
License:Lingpipe license
private void recordFile(IndexWriter indexWriter, String fileName) throws IOException { // if (mLogger.isDebugEnabled()) // mLogger.debug("record file: " + fileName); Document doc = new Document(); Field tagField = new Field(Fields.MEDLINE_DIST_FIELD, Fields.MEDLINE_DIST_VALUE, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(tagField);// www . jav a 2s . co m Field nameField = new Field(Fields.MEDLINE_FILE_FIELD, fileName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(nameField); indexWriter.addDocument(doc); // if (mLogger.isDebugEnabled()) // mLogger.debug("added doc: " + doc.toString()); }
From source file:com.aliasi.lingmed.medline.MedlineCodec.java
License:Lingpipe license
public Document toDocument(MedlineCitation citation) { Document doc = new Document(); // index pubmed id (as keyword) Field idField = new Field(Fields.ID_FIELD, citation.pmid(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(idField);//from w w w . j a va2s . c o m // store raw XML Field xmlField = new Field(Fields.XML_FIELD, CompressionTools.compressString(citation.xmlString()), Field.Store.YES); doc.add(xmlField); return doc; }
From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java
License:Lingpipe license
public static void main(String[] args) throws Exception { org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory(); // org.apache.lucene.analysis.SimpleAnalyzer analyzer // = new org.apache.lucene.analysis.SimpleAnalyzer(); // org.apache.lucene.analysis.KeywordAnalyzer analyzer // = new org.apache.lucene.analysis.KeywordAnalyzer(); MedlineCodec codec = new MedlineCodec(); Analyzer analyzer = codec.getAnalyzer(); org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig( org.apache.lucene.util.Version.LUCENE_36, analyzer); iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND); org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory, iwConf);/*from w w w. jav a2 s.c o m*/ Document doc = new Document(); doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED)); indexWriter.addDocument(doc); indexWriter.close(); org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory); org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader); org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser( org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer); org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg"); org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000); System.out.println("hits.length()=" + hits.scoreDocs.length); org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD, new java.io.StringReader("abc xyz efg")); org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class); org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class); while (ts.incrementToken()) { int increment = positions.getPositionIncrement(); int start = offsets.startOffset(); int end = offsets.endOffset(); String term = terms.toString(); System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end + " positionIncr=" + increment); } }
From source file:com.aliasi.lingmed.omim.OmimCodec.java
License:Lingpipe license
public Document toDocument(OmimRecord rec) { Document doc = new Document(); // index Omim id (as keyword) Field idField = new Field(Fields.ID_FIELD, String.valueOf(rec.getMimId()), Field.Store.YES, Field.Index.TOKENIZED); doc.add(idField);/*from ww w . j ava2s .c o m*/ Field rawTextField = new Field(Fields.RAW_TEXT_FIELD, rec.getRawText(), Field.Store.COMPRESS, Field.Index.NO); doc.add(rawTextField); return doc; }
From source file:com.andreig.jetty.WriteServlet.java
License:GNU General Public License
@Override protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { log.fine("doPost()"); if (!can_write(req)) { res.sendError(SC_UNAUTHORIZED);/*w ww .j ava 2 s .co m*/ return; } InputStream is = req.getInputStream(); String db_name = req.getParameter("dbname"); String col_name = req.getParameter("colname"); if (db_name == null || col_name == null) { String names[] = req2mongonames(req); if (names != null) { db_name = names[0]; col_name = names[1]; } if (db_name == null || col_name == null) { error(res, SC_BAD_REQUEST, Status.get("param name missing")); return; } } boolean upsert = Boolean.parseBoolean(req.getParameter("upsert")); boolean multi = Boolean.parseBoolean(req.getParameter("multi")); DB db = mongo.getDB(db_name); // mongo auth String user = req.getParameter("user"); String passwd = req.getParameter("passwd"); if (user != null && passwd != null && (!db.isAuthenticated())) { boolean auth = db.authenticate(user, passwd.toCharArray()); if (!auth) { res.sendError(SC_UNAUTHORIZED); return; } } DBCollection col = db.getCollection(col_name); BufferedReader r = null; DBObject q = null, o = null; try { r = new BufferedReader(new InputStreamReader(is)); String q_s = r.readLine(); if (q_s == null) { error(res, SC_BAD_REQUEST, Status.get("no data")); return; } String o_s = r.readLine(); if (o_s == null) { error(res, SC_BAD_REQUEST, Status.get("obj to update missing")); return; } try { q = (DBObject) JSON.parse(q_s); o = (DBObject) JSON.parse(o_s); } catch (JSONParseException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse data")); return; } } finally { if (r != null) r.close(); } // // search if (do_search) { String fn = col.getFullName(); DBCursor c = col.find(q); int cnt = c.count(); if (!multi) c.limit(1); long l = multi ? cnt : 1; String toupdate[] = new String[(int) l]; int n = 0; boolean insert = false; if (upsert && !multi && cnt == 0) insert = true; while (c.hasNext()) { DBObject _o = c.next(); ObjectId oid = (ObjectId) _o.get("_id"); String id = oid.toStringMongod(); toupdate[n++] = id; } c.close(); List<String> flds = Config.search_index_fields.get(fn); boolean commit = false; Document doc = null; Search _writer = search.get_writer(); if (flds != null && flds.size() > 0) { doc = new Document(); try { for (String fld : flds) { String val = (String) o.get(fld); if (val == null) continue; Search.add_searchable_s(doc, fld, val); commit = true; } if (commit) _writer.commit(doc); } catch (ClassCastException e) { error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String")); return; } catch (CorruptIndexException e) { error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e)); return; } } if (commit && insert) log.warning("upsert with search not implemented yet"); else _writer.update(toupdate, doc); } WriteResult wr = col.update(q, o, upsert, multi, write_concern); // return operation status if (do_return) { out_str(req, wr.toString()); if (wr.getError() == null) { res.setStatus(SC_BAD_REQUEST); return; } } res.setStatus(SC_CREATED); }
From source file:com.andreig.jetty.WriteServlet.java
License:GNU General Public License
@Override protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { log.fine("doPut()"); if (!can_write(req)) { res.sendError(SC_UNAUTHORIZED);/*ww w. ja va2 s.c o m*/ return; } InputStream is = req.getInputStream(); String db_name = req.getParameter("dbname"); String col_name = req.getParameter("colname"); if (db_name == null || col_name == null) { String names[] = req2mongonames(req); if (names != null) { db_name = names[0]; col_name = names[1]; } if (db_name == null || col_name == null) { error(res, SC_BAD_REQUEST, Status.get("param name missing")); return; } } DB db = mongo.getDB(db_name); // mongo auth String user = req.getParameter("user"); String passwd = req.getParameter("passwd"); if (user != null && passwd != null && (!db.isAuthenticated())) { boolean auth = db.authenticate(user, passwd.toCharArray()); if (!auth) { res.sendError(SC_UNAUTHORIZED); return; } } DBCollection col = db.getCollection(col_name); BufferedReader r = null; ArrayList<DBObject> ar = new ArrayList<DBObject>(); try { r = new BufferedReader(new InputStreamReader(is)); String data; while ((data = r.readLine()) != null) { if (data != null) { DBObject o; try { o = (DBObject) JSON.parse(data); ar.add(o); } catch (JSONParseException e) { error(res, SC_BAD_REQUEST, Status.get("can not parse data")); return; } } } } finally { if (r != null) r.close(); } if (ar.size() == 0) { error(res, SC_BAD_REQUEST, Status.get("can not parse data")); return; } WriteResult wr = col.insert(ar, write_concern); // search if (do_search) { String fn = col.getFullName(); List<String> flds = Config.search_index_fields.get(fn); if (flds != null && flds.size() > 0) { Search _writer = search.get_writer(); try { for (DBObject o : ar) { boolean commit = false; Document doc = new Document(); for (String fld : flds) { String val = (String) o.get(fld); if (val == null) continue; Search.add_searchable_s(doc, fld, val); commit = true; } if (commit) { ObjectId id = (ObjectId) o.get("_id"); String sid = id.toStringMongod(); Search.add_storable(doc, "_id", sid); Search.add_searchable_s(doc, "_dbid_", fn); _writer.commit(doc); } } } catch (ClassCastException e) { error(res, SC_BAD_REQUEST, Status.get("searchable fields must be type String")); return; } catch (CorruptIndexException e) { error(res, SC_BAD_REQUEST, Status.get("Search corrupt index" + e)); return; } } } // return operation status if (do_return) { out_str(req, wr.toString()); if (wr.getError() == null) { res.setStatus(SC_BAD_REQUEST); return; } } res.setStatus(SC_CREATED); }
From source file:com.aperigeek.dropvault.web.service.IndexService.java
License:Open Source License
public void index(String username, String password, String id, Map<String, String> metadata) throws IndexException { try {// w ww . j av a2 s . c o m Document document = new Document(); document.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED)); for (Map.Entry<String, String> e : metadata.entrySet()) { if (e.getValue() != null) { document.add(new Field(e.getKey(), e.getValue(), Field.Store.NO, Field.Index.ANALYZED)); } } IndexWriter index = getIndexWriter(username, password); index.addDocument(document); index.close(); } catch (IOException ex) { throw new IndexException(ex); } }
From source file:com.appeligo.amazon.AmazonIndexer.java
License:Apache License
public void addAmazonItem(AmazonItem item, String programId) { Document doc = new Document(); doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED)); if (item.getTitle() != null) { doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED)); }//from www . j a va 2 s . c o m if (item.getDetailsUrl() != null) { doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO)); } if (item.getSmallImageUrl() != null) { doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO)); doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO)); doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES, Index.NO)); } doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED)); doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.MINUTE), Store.YES, Index.UN_TOKENIZED)); queue.addDocument(doc); }
From source file:com.appeligo.amazon.ProgramIndexer.java
License:Apache License
/** * Adds a marker program so it won't query for this programId until the time expires. * @param programId the programId to add a marker for * @throws IOException/*from ww w. j a v a 2s. c o m*/ */ private void addMarkerProgram(String programId) throws IOException { Document doc = new Document(); doc.add(new Field("type", "marker", Store.NO, Index.UN_TOKENIZED)); doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED)); doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES, Index.UN_TOKENIZED)); writer.addDocument(doc); }
From source file:com.appeligo.amazon.ProgramIndexer.java
License:Apache License
protected Document createProductDocument(AmazonItem item, String programId) { Document doc = new Document(); doc.add(new Field("type", "product", Store.NO, Index.UN_TOKENIZED)); doc.add(new Field("asin", item.getId(), Store.YES, Index.UN_TOKENIZED)); if (item.getTitle() != null) { doc.add(new Field("title", item.getTitle(), Store.YES, Index.TOKENIZED)); }/*from w w w .j a v a2s . c o m*/ if (item.getDetailsUrl() != null) { doc.add(new Field("detailsUrl", item.getDetailsUrl(), Store.YES, Index.NO)); } if (item.getSmallImageUrl() != null) { doc.add(new Field("smallImageUrl", item.getSmallImageUrl(), Store.YES, Index.NO)); doc.add(new Field("smallImageWidth", Integer.toString(item.getSmallImageWidth()), Store.YES, Index.NO)); doc.add(new Field("smallImageHeight", Integer.toString(item.getSmallImageHeight()), Store.YES, Index.NO)); } doc.add(new Field("programId", programId, Store.YES, Index.UN_TOKENIZED)); doc.add(new Field("storeTime", DateTools.dateToString(new Date(), Resolution.DAY), Store.YES, Index.UN_TOKENIZED)); return doc; }