List of usage examples for org.apache.lucene.document Document getField
public final IndexableField getField(String name)
From source file:application.SentimentLexiconGenerator.java
License:Open Source License
public RatingHistogram makeRatingHistogramForSynset(Synset synset, ReviewStats stats) throws IOException { RatingHistogram histogram = new RatingHistogram(stats); TermSpans ts = getIndexTermInstances(synset.toString()); while (ts.next()) { histogram.incrementUniqueDocuments(); Document doc = searcher.doc(ts.doc()); int doc_rating = Integer.parseInt(doc.getField(Globals.IndexFieldNames.rating).stringValue()); Iterator<byte[]> p_i = ts.getPayload().iterator(); while (p_i.hasNext()) { ReviewTermPayload p = new ReviewTermPayload(); p.decode(p_i.next());/*www .j a v a 2s .co m*/ if (filter.filterPayload(p)) { histogram.addObeservation(doc_rating, 1); } } } return histogram; }
From source file:application.SentimentLexiconGenerator.java
License:Open Source License
public HashMap<SynsetCategory, RatingHistogram> makeRatingHistogramsForTerm(String term, ReviewStats stats) throws IOException { HashMap<SynsetCategory, RatingHistogram> histograms = new HashMap<SynsetCategory, RatingHistogram>(); for (SynsetCategory synsetcat : Synset.getSynsetCategories()) { histograms.put(synsetcat, new RatingHistogram(stats)); }/* w ww .j av a 2 s . co m*/ TermSpans ts = getIndexTermInstances(term); while (ts.next()) { Document doc = searcher.doc(ts.doc()); int doc_rating = Integer.parseInt(doc.getField(Globals.IndexFieldNames.rating).stringValue()); Iterator<byte[]> p_i = ts.getPayload().iterator(); while (p_i.hasNext()) { ReviewTermPayload p = new ReviewTermPayload(); p.decode(p_i.next()); if (filter.filterPayload(p)) { RatingHistogram histogram = histograms.get(Synset.convertPosCategory(p.getPosCat())); if (histogram != null) { histogram.incrementUniqueDocuments(); histogram.addObeservation(doc_rating, 1); } else { // TODO Remove message?? AppLogger.error.log(Level.SEVERE, "Histogram not found"); } } } } return histograms; }
From source file:au.org.ala.names.search.ALANameSearcher.java
License:Open Source License
/** * Dumps a list of the species LSID's that are contained in the index. *///w ww . ja va 2 s . c om public void dumpSpecies() { try { OutputStreamWriter fileOut = new OutputStreamWriter(new FileOutputStream("/data/species.txt"), "UTF-8"); Term term = new Term("rank", "species"); TopDocs hits = cbSearcher.search(new TermQuery(term), 2000000); for (ScoreDoc sdoc : hits.scoreDocs) { Document doc = cbReader.document(sdoc.doc); if (doc.getField("synonym") == null) { String lsid = StringUtils.trimToNull(doc.getField("lsid").stringValue()); if (lsid == null) lsid = doc.getField("id").stringValue(); fileOut.write(lsid + "\n"); } } fileOut.flush(); } catch (Exception e) { e.printStackTrace(); } }
From source file:axiom.objectmodel.dom.convert.LuceneVersion9Convertor.java
License:Open Source License
public Document convertDocument(Document doc) { Field protoField = doc.getField(LuceneManager.PROTOTYPE); if (protoField != null && ("CMSTask".equalsIgnoreCase(protoField.stringValue()) || "CMSTaskContainer".equalsIgnoreCase(protoField.stringValue()))) { return null; }//from w w w .j a va 2 s .c o m Document ndoc = new Document(); Enumeration e = doc.fields(); String id = null, layer = null; Field idField = doc.getField(LuceneManager.ID); Field layerField = doc.getField(LuceneManager.LAYER_OF_SAVE); if (idField != null && layerField != null) { id = idField.stringValue(); layer = layerField.stringValue(); } while (e.hasMoreElements()) { Field f = (Field) e.nextElement(); Field.Store currstore = Field.Store.YES; if (!f.isStored()) { currstore = Field.Store.NO; } else if (f.isCompressed()) { currstore = Field.Store.COMPRESS; } Field.Index curridx = Field.Index.UN_TOKENIZED; if (!f.isIndexed()) { curridx = Field.Index.NO; } else if (f.isTokenized()) { curridx = Field.Index.TOKENIZED; } String name = f.name(); String value = f.stringValue(); if (!("84".equals(id) && "1".equals(layer) && "_task".equals(name)) && !("71".equals(id) && "1".equals(layer) && "_task".equals(name))) { ndoc.add(new Field(name, value, currstore, curridx)); } } return ndoc; }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
private static String getFieldValue(String fieldname, Document doc, HashSet unchecked) { String value = null;// ww w. j a va2 s . c o m Field field = doc.getField(fieldname); if (field != null) { value = field.stringValue(); unchecked.remove(field); } return value; }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
private void getTheChildren(Node parent, String relChildren, NodeManager nmgr, int mode, int layerInStorage) throws Exception { IndexSearcher searcher = null;/*w w w. j ava 2s . co m*/ BooleanQuery bq = null; try { searcher = this.getIndexSearcher(); String pid = parent.getID(); HashMap ids = new HashMap(); int length; Query query1 = new TermQuery(new Term(ISCHILD, "true")); Query query2 = new TermQuery(new Term(PARENTID, pid)); for (int i = LIVE_MODE; i <= mode; i++) { bq = new BooleanQuery(); bq.add(query1, BooleanClause.Occur.MUST); bq.add(query2, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(LAYER_OF_SAVE, i + "")), BooleanClause.Occur.MUST); Hits hits = searcher.search(bq); length = hits.length(); /*if (app.debug()) app.logEvent("LuceneManager.getTheChildren(), parent = " + parent.getKey() + ", layer = " + mode + ", layerInStorage = " + layerInStorage + ", executed query " + bq + " which produced " + length + " results");*/ for (int j = 0; j < length; j++) { Document doc = hits.doc(j); ids.put(doc.getField(ID).stringValue(), doc.getField(PROTOTYPE).stringValue()); } } Collection<NodeHandle> subnodes = null; length = ids.size(); if (length > 0) { subnodes = parent.createSubnodeList(); Iterator iter = ids.keySet().iterator(); while (iter.hasNext()) { String id = (String) iter.next(); NodeHandle handle = makeNodeHandle(nmgr, id, (String) ids.get(id), mode); if (subnodes instanceof SubnodeList) { ((SubnodeList) subnodes).addSorted(handle); } else { subnodes.add(handle); } } } ids.clear(); ids = null; if (relChildren != null) { String[] charr = relChildren.split(NULL_DELIM); if (charr.length > 0 && charr.length % 2 == 0) { if (subnodes == null) { subnodes = parent.createSubnodeList(); } for (int i = 0; i < charr.length; i += 2) { if (subnodes instanceof SubnodeList) { ((SubnodeList) subnodes).addSorted(makeNodeHandle(nmgr, charr[i], charr[i + 1], mode)); } else { subnodes.add(makeNodeHandle(nmgr, charr[i], charr[i + 1], mode)); } } } } } catch (IOException ioe) { throw new Exception("Searcher failed when attempting to retrieve children of " + "id '" + parent.getID() + "', query = " + bq); } finally { this.releaseIndexSearcher(searcher); } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
private void createDocumentFromNode(ArrayList ids, ArrayList docs, INode node, HashMap analyzerMap) throws Exception { Document doc = this.createDocument(node, analyzerMap); if (doc != null) { String key = doc.getField(ID).stringValue() + DeletedInfos.KEY_SEPERATOR + doc.getField(LAYER_OF_SAVE).stringValue(); ids.add(key);/* w w w . ja v a2s . c om*/ docs.add(doc); } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
private void addToDoc(Document doc, String key, IProperty prop, ResourceProperties rprops, ArrayList ref_list, HashMap analyzerMap, String accessprop) throws Exception { if (prop.getValue() == null) { return;/*from ww w. jav a 2 s . com*/ } final Field.Store store; final Field.Index index; if (accessprop != null && (key.equals(accessprop) || (this.app.isPropertyFilesIgnoreCase() && key.equalsIgnoreCase(accessprop)))) { store = Field.Store.YES; index = Field.Index.UN_TOKENIZED; } else if (key.startsWith("_") && !key.equals(FileObject.CONTENT)) { store = Field.Store.YES; index = Field.Index.UN_TOKENIZED; } else { store = getStore(rprops, key); index = getIndex(rprops, key); } int type = getType(rprops, key); if (type < 0) { type = prop.getType(); } final Analyzer analyzer = (type == IProperty.STRING) ? getAnalyzer(rprops, key) : null; final float boost = getBoost(rprops, key); Field f; switch (type) { case IProperty.BOOLEAN: f = new Field(key, serializeBoolean(prop.getBooleanValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.DATE: f = new Field(key, serializeDate(prop.getDateValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.TIME: f = new Field(key, serializeTime(prop.getDateValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.TIMESTAMP: f = new Field(key, serializeTimestamp(prop.getDateValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.FLOAT: f = new Field(key, serializeFloat(prop.getFloatValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.SMALLFLOAT: f = new Field(key, serializeSmallFloat(prop.getFloatValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.INTEGER: f = new Field(key, serializeInt(prop.getIntegerValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.SMALLINT: f = new Field(key, serializeSmallInt(prop.getIntegerValue()), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); break; case IProperty.STRING: f = new Field(key, prop.getStringValue(), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); if (analyzer != null) { String docid = doc.getField(ID).stringValue() + DeletedInfos.KEY_SEPERATOR + doc.getField(LAYER_OF_SAVE).stringValue(); PerFieldAnalyzerWrapper ret = (PerFieldAnalyzerWrapper) analyzerMap.get(docid); if (ret == null) { ret = buildAnalyzer(); analyzerMap.put(docid, ret); } ret.addAnalyzer(key, analyzer); } break; case IProperty.NODE: INode propNode = prop.getNodeValue(); if (propNode != null) { String value = serializeNodeProp(propNode.getID(), propNode.getPrototype()); f = new Field(key, value, store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); } break; case IProperty.REFERENCE: if (prop instanceof axiom.objectmodel.db.Property) { axiom.objectmodel.db.Property p = (axiom.objectmodel.db.Property) prop; Reference relobj = p.getReferenceValue(); if (relobj != null) { String serialized_ref = serializeReference(relobj); f = new Field(key, serialized_ref, store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); StringBuffer sb = new StringBuffer(serialized_ref); sb.append(NULL_DELIM).append(key); ref_list.add(sb.toString()); } } break; case IProperty.MULTI_VALUE: if (prop instanceof axiom.objectmodel.db.Property) { axiom.objectmodel.db.Property p = (axiom.objectmodel.db.Property) prop; MultiValue mvobj = p.getMultiValue(); if (mvobj != null) { addMultiValueToDoc(doc, key, mvobj, store, index, ref_list); } } break; case IProperty.XML: if (prop instanceof axiom.objectmodel.db.Property) { axiom.objectmodel.db.Property p = (axiom.objectmodel.db.Property) prop; Object xml = p.getXMLValue(); if (xml != null) { f = new Field(key, XmlUtils.objectToXMLString(xml), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); } } case IProperty.XHTML: if (prop instanceof axiom.objectmodel.db.Property) { axiom.objectmodel.db.Property p = (axiom.objectmodel.db.Property) prop; Object xml = p.getXHTMLValue(); if (xml != null) { f = new Field(key, XmlUtils.objectToXMLString(xml), store, index); if (boost > -1f) { f.setBoost(boost); } doc.add(f); addXhtmlRefs(key, xml, ref_list); } } default: break; } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
public Key[] getSourceNodeIds(final String id, final int mode, ArrayList protos, BooleanQuery append, Sort sort) throws Exception { IndexSearcher searcher = null;/*from ww w. j a v a2s . c o m*/ Hits hits = null; Key[] keys = null; BooleanQuery query = null; try { searcher = this.getIndexSearcher(); query = new BooleanQuery(); final int sizeOfProtos; if ((sizeOfProtos = protos.size()) > 0) { BooleanQuery proto_query = new BooleanQuery(); for (int i = 0; i < sizeOfProtos; i++) { proto_query.add(new TermQuery(new Term(PROTOTYPE, (String) protos.get(i))), BooleanClause.Occur.SHOULD); } query.add(proto_query, BooleanClause.Occur.MUST); } query.add(new TermQuery(new Term(REF_LIST_FIELD, id)), BooleanClause.Occur.MUST); if (append != null && append.getClauses().length > 0) { query.add(append, BooleanClause.Occur.MUST); } hits = searcher.search(query, sort); /*if (app.debug()) app.logEvent("LuceneManager.getSourceNodeIds(): id=" + id + ",layer=" + mode + " executed query [" + query + " which resulted in " + hits.length() + " hits");*/ int size = hits.length(); ArrayList<Key> list = new ArrayList<Key>(); for (int i = 0; i < size; i++) { Document doc = hits.doc(i); if (!isIdInDocumentRefs(doc, id)) { continue; } Field id_field = doc.getField(ID); Field proto_field = doc.getField(PROTOTYPE); Field layer_field = doc.getField(LAYER_OF_SAVE); if (layer_field != null) { try { if (mode < Integer.parseInt(layer_field.stringValue())) { continue; } } catch (Exception nfe) { } } if (id_field != null && proto_field != null) { list.add(new DbKey(this.app.getDbMapping(proto_field.stringValue()), id_field.stringValue(), mode)); } } keys = new Key[list.size()]; list.toArray(keys); } catch (Exception ex) { app.logError( ErrorReporter.errorMsg(this.getClass(), "getSourceNodeIds") + "Could not retrieve document " + id + " from Lucene index with query = " + (query != null ? query : "null"), ex); throw ex; } finally { this.releaseIndexSearcher(searcher); } return keys; }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
public ArrayList getChildrenIds(INode node) throws Exception { ArrayList childrenIds = new ArrayList(); IndexSearcher searcher = null;//from w ww. j av a2s. co m BooleanQuery bq = new BooleanQuery(); try { searcher = this.getIndexSearcher(); String id = node.getID(); final Query query1 = new TermQuery(new Term(PARENTID, isSpecialNode(id) ? id : node.getID())); final Query query2 = new TermQuery(new Term(ISCHILD, "true")); bq.add(query1, BooleanClause.Occur.MUST); bq.add(query2, BooleanClause.Occur.MUST); final Hits hits = searcher.search(bq); /*if (app.debug()) app.logEvent("LuceneManager.getChildrenIds() executed query [" + bq + " which resulted in " + hits.length() + " hits");*/ final int length = hits.length(); for (int i = 0; i < length; i++) { Document doc = hits.doc(i); childrenIds.add(doc.getField(ID).stringValue()); } } catch (IOException ioe) { throw new Exception("Searcher failed when attempting to retrieve children of " + "id '" + node.getID() + "', query = " + bq); } finally { this.releaseIndexSearcher(searcher); } return childrenIds; }