Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package net.skyatlas.icd.util; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.StringReader; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.ResourceBundle; import java.util.Stack; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import love.cq.util.IOUtil; import net.skyatlas.icd.dao.IcdDiseaseDao; import net.skyatlas.icd.dao.IcdDiseaseRelationDao; import net.skyatlas.icd.domain.IcdDisease; import net.skyatlas.icd.domain.IcdDiseaseIdxRelation; import net.skyatlas.icd.domain.IcdDiseaseIndex; import net.skyatlas.icd.domain.IcdDiseaseRelation; import net.skyatlas.icd.util.parseTree.ParseTreeOperatorEnum; import net.skyatlas.icd.util.parseTree.ParseTreeOperatorNode; import net.skyatlas.icd.util.parseTree.ParseTreeResultNode; import oracle.jdbc.pool.OracleDataSource; import org.ansj.lucene4.AnsjAnalysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; /** * * @author changzhenghe */ public class MemoryGrid { private boolean dealAlias = false; // ???? private IcdDiseaseDao dao; private IcdDiseaseRelationDao icdDisRelationDao; private List<IcdDiseaseIndex> allIndexItems; // Vol3 ? private List<IcdDiseaseIdxRelation> allIndexRelations; //Vol3 ? private List<IcdDiseaseIndex> allRootIndexItems; // ? ? private List<IcdDisease> allDiseases; // Vol1 ? private List<IcdDiseaseRelation> allDisRelations; //Vol1 ? // index /* ?3 ID */ private HashMap<Integer, IcdDiseaseIndex> vol3IDIndexedItems = new HashMap(); /* ?1 ID */ private HashMap<Integer, IcdDisease> vol1IDIndexedItems = new HashMap(); /** * ?1 Relation ID */ private HashMap<Integer, IcdDiseaseRelation> vol1IDRelationIndexedItems = new HashMap(); /** * ?1 Relation mainID */ private HashMap<Integer, List<IcdDiseaseRelation>> vol1mainIDRelationIndexedItems = new HashMap(); /* ?3 ?? */ private HashMap<String, List<IcdDiseaseIndex>> vol3NameIndexedItems = new HashMap(); // nameCh /* ?3 */ private HashMap<String, List<IcdDiseaseIndex>> vol3PinYinIndexedItems = new HashMap(); // py /* ?1 ?? */ private HashMap<String, List<IcdDisease>> vol1NameIndexedItems = new HashMap(); // codeNameCh /* ?1 */ private HashMap<String, List<IcdDisease>> vol1PinYinIndexedItems = new HashMap(); // py /* ?1 ICDCODE */ private HashMap<String, List<IcdDisease>> vol1IcdIndexedItems = new HashMap(); // icdCode /* ?3 ? */ private List<SearchPath> allSearchPaths = new ArrayList(); /** * ?? * <??> * ? */ private HashMap<IcdDiseaseIndex, IcdDiseaseIndex> icdDiseaseIndexIndexedRootItems = new HashMap(); private HashMap<IcdDiseaseIndex, List<SearchPath>> rootIndexedSearchPaths = new HashMap(); // ?? //private HashMap<IcdDiseaseIndex,SearchPath> rootIndexedSearchPaths = new HashMap(); private HashMap<String, List<SearchPath>> icdIndexedSearchPaths = new HashMap(); // ICD Code? private HashMap<String, SearchPath> guidIndexedSearchPaths = new HashMap(); // ? GUID public HashMap<String, SearchPath> getGuidIndexedSearchPaths() { return guidIndexedSearchPaths; } public void setGuidIndexedSearchPaths(HashMap<String, SearchPath> guidIndexedSearchPaths) { this.guidIndexedSearchPaths = guidIndexedSearchPaths; } public List<IcdDiseaseIdxRelation> getAllIndexRelations() { return allIndexRelations; } public void setAllIndexRelations(List<IcdDiseaseIdxRelation> allIndexRelations) { this.allIndexRelations = allIndexRelations; } public List<IcdDiseaseRelation> getAllDisRelations() { return allDisRelations; } public void setAllDisRelations(List<IcdDiseaseRelation> allDisRelations) { this.allDisRelations = allDisRelations; } public IcdDiseaseRelationDao getIcdDisRelationDao() { return icdDisRelationDao; } public void setIcdDisRelationDao(IcdDiseaseRelationDao icdDisRelationDao) { this.icdDisRelationDao = icdDisRelationDao; } public HashMap<Integer, IcdDiseaseRelation> getVol1IDRelationIndexedItems() { return vol1IDRelationIndexedItems; } public HashMap<IcdDiseaseIndex, List<SearchPath>> getRootIndexedSearchPaths() { return rootIndexedSearchPaths; } public void setRootIndexedSearchPaths(HashMap<IcdDiseaseIndex, List<SearchPath>> rootIndexedSearchPaths) { this.rootIndexedSearchPaths = rootIndexedSearchPaths; } public void setVol1IDRelationIndexedItems(HashMap<Integer, IcdDiseaseRelation> vol1IDRelationIndexedItems) { this.vol1IDRelationIndexedItems = vol1IDRelationIndexedItems; } public HashMap<Integer, List<IcdDiseaseRelation>> getVol1mainIDRelationIndexedItems() { return vol1mainIDRelationIndexedItems; } public void setVol1mainIDRelationIndexedItems( HashMap<Integer, List<IcdDiseaseRelation>> vol1mainIDRelationIndexedItems) { this.vol1mainIDRelationIndexedItems = vol1mainIDRelationIndexedItems; } public List<SearchPath> getAllSearchPaths() { return allSearchPaths; } public void setAllSearchPaths(List<SearchPath> allSearchPaths) { this.allSearchPaths = allSearchPaths; } /* public HashMap<IcdDiseaseIndex, List<SearchPath>> getRootIndexedSearchPaths() { return rootIndexedSearchPaths; } public void setRootIndexedSearchPaths(HashMap<IcdDiseaseIndex, List<SearchPath>> rootIndexedSearchPaths) { this.rootIndexedSearchPaths = rootIndexedSearchPaths; } */ public HashMap<String, List<SearchPath>> getIcdIndexedSearchPaths() { return icdIndexedSearchPaths; } public void setIcdIndexedSearchPaths(HashMap<String, List<SearchPath>> icdIndexedSearchPaths) { this.icdIndexedSearchPaths = icdIndexedSearchPaths; } public boolean isDealAlias() { return dealAlias; } public void setDealAlias(boolean dealAlias) { this.dealAlias = dealAlias; } public IcdDiseaseDao getDao() { return dao; } public void setDao(IcdDiseaseDao dao) { this.dao = dao; } public List<IcdDiseaseIndex> getAllIndexItems() { return allIndexItems; } public void setAllIndexItems(List<IcdDiseaseIndex> allIndexItems) { this.allIndexItems = allIndexItems; } public List<IcdDiseaseIndex> getAllRootIndexItems() { return allRootIndexItems; } public void setAllRootIndexItems(List<IcdDiseaseIndex> allRootIndexItems) { this.allRootIndexItems = allRootIndexItems; } public List<IcdDisease> getAllDiseases() { return allDiseases; } public void setAllDiseases(List<IcdDisease> allDiseases) { this.allDiseases = allDiseases; } public HashMap<Integer, IcdDiseaseIndex> getVol3IDIndexedItems() { return vol3IDIndexedItems; } public void setVol3IDIndexedItems(HashMap<Integer, IcdDiseaseIndex> vol3IDIndexedItems) { this.vol3IDIndexedItems = vol3IDIndexedItems; } public HashMap<Integer, IcdDisease> getVol1IDIndexedItems() { return vol1IDIndexedItems; } public void setVol1IDIndexedItems(HashMap<Integer, IcdDisease> vol1IDIndexedItems) { this.vol1IDIndexedItems = vol1IDIndexedItems; } public HashMap<String, List<IcdDiseaseIndex>> getVol3NameIndexedItems() { return vol3NameIndexedItems; } public void setVol3NameIndexedItems(HashMap<String, List<IcdDiseaseIndex>> vol3NameIndexedItems) { this.vol3NameIndexedItems = vol3NameIndexedItems; } public HashMap<String, List<IcdDiseaseIndex>> getVol3PinYinIndexedItems() { return vol3PinYinIndexedItems; } public void setVol3PinYinIndexedItems(HashMap<String, List<IcdDiseaseIndex>> vol3PinYinIndexedItems) { this.vol3PinYinIndexedItems = vol3PinYinIndexedItems; } public HashMap<String, List<IcdDisease>> getVol1NameIndexedItems() { return vol1NameIndexedItems; } public void setVol1NameIndexedItems(HashMap<String, List<IcdDisease>> vol1NameIndexedItems) { this.vol1NameIndexedItems = vol1NameIndexedItems; } public HashMap<String, List<IcdDisease>> getVol1PinYinIndexedItems() { return vol1PinYinIndexedItems; } public void setVol1PinYinIndexedItems(HashMap<String, List<IcdDisease>> vol1PinYinIndexedItems) { this.vol1PinYinIndexedItems = vol1PinYinIndexedItems; } public HashMap<String, List<IcdDisease>> getVol1IcdIndexedItems() { return vol1IcdIndexedItems; } public void setVol1IcdIndexedItems(HashMap<String, List<IcdDisease>> vol1IcdIndexedItems) { this.vol1IcdIndexedItems = vol1IcdIndexedItems; } // private RAMDirectory directory; private Analyzer analyzer; /* 1. [] 2. [] () 3. [] () */ private final String pstr1 = "\\([^\\)]+?(\\[.+?\\])"; // [] private final String pstr2 = "\\[[^\\]]+?\\("; // [] () private final String pstr3 = "\\[[^\\]]+?"; // [] private final String pstr4 = "(\\([^\\)]+?.*?\\))"; // () public void init() throws IOException, CorruptIndexException, InvalidTokenOffsetsException, ParseException, SQLException { System.err.println("--------------\t\t Memory Grid Initializing -------------"); this.allDiseases = this.dao.getAllDisease(); // Vol1 ? this.allIndexItems = this.dao.getAllDiseaseIndexes(); // Vol3 ? this.allIndexRelations = this.dao.getAllIcdDiseaseIdxRelations(); //Vol3 ? this.allDisRelations = this.icdDisRelationDao.getAllDiseaseRelation();//Vol1 ? // ? if (this.allRootIndexItems == null) { this.allRootIndexItems = new ArrayList(); } System.err.println("All Index Items size:" + allIndexItems.size()); System.err.println("All Disease Items size:" + allDiseases.size()); //Vol1 Relation for (int i = 0; i < this.allDisRelations.size(); i++) { IcdDiseaseRelation r = this.allDisRelations.get(i); this.getVol1IDRelationIndexedItems().put(r.getId(), r); List<IcdDiseaseRelation> list = this.getVol1mainIDRelationIndexedItems().get(r.getMainID()); if (list == null || list.size() == 0) { list = new ArrayList(); list.add(r); this.getVol1mainIDRelationIndexedItems().put(r.getMainID(), list); } else { list.add(r); } } // Vol 1 for (int i = 0; i < this.allDiseases.size(); i++) { IcdDisease d = this.allDiseases.get(i); //set Relations List<IcdDiseaseRelation> icdDisRelations = new ArrayList(); icdDisRelations = this.getVol1mainIDRelationIndexedItems().get(d.getId()); HashMap<String, List<IcdDiseaseRelation>> refRelations = new HashMap(); List<IcdDiseaseRelation> noIncludeList = new ArrayList();//d.getRefRelations().get("?"); List<IcdDiseaseRelation> otherCodeList = new ArrayList();//d.getRefRelations().get("??"); List<IcdDiseaseRelation> swordList = new ArrayList();//d.getRefRelations().get(""); List<IcdDiseaseRelation> includeList = new ArrayList();//d.getRefRelations().get(""); if (icdDisRelations == null || icdDisRelations.size() == 0) { } else { for (int j = 0; j < icdDisRelations.size(); j++) { IcdDiseaseRelation idr = icdDisRelations.get(j); if (idr.getRelationType().equals("?")) { noIncludeList.add(idr); } else if (idr.getRelationType().equals("??")) { otherCodeList.add(idr); } else if (idr.getRelationType().equals("")) { swordList.add(idr); } else if (idr.getRelationType().equals("")) { includeList.add(idr); } } if (noIncludeList.size() > 0) { refRelations.put("?", noIncludeList); } if (otherCodeList.size() > 0) { refRelations.put("??", otherCodeList); } if (swordList.size() > 0) { refRelations.put("", swordList); } if (includeList.size() > 0) { refRelations.put("", includeList); } } d.setRefRelations(refRelations); // ID Unique Index this.getVol1IDIndexedItems().put(d.getId(), d); // System.err.println(" ? IcdDisease, ICD Code :"+d.getIcdCode()); // ICD Code if (d.getIcdCode() != null && d.getIcdCode().length() > 0) { // ICD ?? List<IcdDisease> alIndexes; if (this.getVol1IcdIndexedItems().containsKey(d.getIcdCode().toUpperCase())) { alIndexes = this.getVol1IcdIndexedItems().get(d.getIcdCode().toUpperCase()); } else { alIndexes = new ArrayList(); } // System.err.println("Index Disease :"+d.getIcdCode()); alIndexes.add(d); this.getVol1IcdIndexedItems().put(d.getIcdCode().replaceAll("$", "").toUpperCase(), alIndexes); } // Vol1 Name List<IcdDisease> alIndexes2; if (this.getVol1NameIndexedItems().containsKey(d.getCodeNameCh())) { alIndexes2 = this.getVol1NameIndexedItems().get(d.getCodeNameCh()); } else { alIndexes2 = new ArrayList(); } alIndexes2.add(d); this.getVol1NameIndexedItems().put(d.getCodeNameCh(), alIndexes2); // Vol1 List<IcdDisease> alIndexes3; if (this.getVol1PinYinIndexedItems().containsKey(d.getPy())) { alIndexes3 = this.getVol1PinYinIndexedItems().get(d.getPy()); } else { alIndexes3 = new ArrayList(); } alIndexes3.add(d); this.getVol1PinYinIndexedItems().put(d.getPy(), alIndexes3); } /* Vol1parent-children */ for (int i = 0; i < this.allDiseases.size(); i++) { IcdDisease d = this.allDiseases.get(i); if (d.getParentID() != null && d.getParentID() != 0) { IcdDisease parent = this.getVol1IDIndexedItems().get(d.getParentID()); d.setParentDisease(parent); if (parent == null) { System.err.println(" parent is null :" + d.getId() + "\t" + d.getParentID()); continue; } if (parent.getChildren() == null) { parent.setChildren(new ArrayList()); } parent.getChildren().add(d); } } // Vol 3 for (int i = 0; i < this.allIndexItems.size(); i++) { IcdDiseaseIndex e = this.allIndexItems.get(i); this.getVol3IDIndexedItems().put(e.getId(), e); // ID Unique // ?? List<IcdDiseaseIndex> alIdxes; if (this.getVol3NameIndexedItems().containsKey(e.getNameCh())) { alIdxes = this.getVol3NameIndexedItems().get(e.getNameCh()); } else { alIdxes = new ArrayList(); } alIdxes.add(e); this.getVol3NameIndexedItems().put(e.getNameCh(), alIdxes); // List<IcdDiseaseIndex> alIdxes2; if (this.getVol3PinYinIndexedItems().containsKey(e.getPy())) { alIdxes2 = this.getVol3PinYinIndexedItems().get(e.getPy()); } else { alIdxes2 = new ArrayList(); } alIdxes2.add(e); this.getVol3PinYinIndexedItems().put(e.getPy(), alIdxes2); // ????? if (this.dealAlias) { List<String> slist = new ArrayList(); slist = dao.getAliasesByIndexid(e.getId()); if (slist.size() != 0) { String[] aliases = new String[slist.size()]; slist.toArray(aliases); e.setAliases(aliases); } } if (e.getDepth() == 0) { // Vol3 Item ? this.allRootIndexItems.add(e); // Pattern p = Pattern.compile("[\\[]"); // Matcher m = p.matcher(e.getNameCh()); // while (m.find()) { // System.err.println("[]??:\t"+e.getNameCh()+"-> startIndex:"+m.start()+"-> endIndex:"+m.end()+" Match:"+m.group()); // } // Pattern p2 = Pattern.compile("[]\\)"); // Matcher m2 = p2.matcher(e.getNameCh()); // while (m2.find()) { // System.err.println("(?)??:\t"+e.getNameCh()+"-> startIndex:"+m2.start()+"-> endIndex:"+m2.end()+" Match:"+m2.group()); // } // p2 = Pattern.compile(pstr1); // m2 = p2.matcher(e.getNameCh()); // while (m2.find()) { // System.err.println(" () [] "+m2.group()+"-->"+e.getNameCh()+"->"+m2.group(1)); // } // // p2 = Pattern.compile(pstr2); // m2 = p2.matcher(e.getNameCh()); // while (m2.find()) { // System.err.println(" [] () "+m2.group()+"-->"+e.getNameCh()); // } // // // p2 = Pattern.compile(pstr3); // m2 = p2.matcher(e.getNameCh()); // while (m2.find()) { // System.err.println(" [] ? "+m2.group()+"-->"+e.getNameCh()); // } // // // p2 = Pattern.compile(pstr4); // m2 = p2.matcher(e.getNameCh()); // while (m2.find()) { // System.err.println(" () ? "+m2.group()+"-->"+e.getNameCh()); // } // ?? /* parseRootIndex(e); e.getParseTreeRoot().setDepth(0); System.out.println("? " + e.getNameCh() + " \n Parse Result :" + e.getParseTreeRoot().toString()); */ } // ??? } //Vol3 parent-children for (int i = 0; i < this.allIndexItems.size(); i++) { IcdDiseaseIndex idi = this.allIndexItems.get(i); if (idi.getParentID() != null && idi.getParentID() != 0) { IcdDiseaseIndex parent = this.getVol3IDIndexedItems().get(idi.getParentID()); idi.setParentDiseaseIndex(parent); if (parent == null) { System.err.println(" parent is null :" + idi.getId() + "\t" + idi.getParentID()); continue; } if (parent.getChildren() == null) { parent.setChildren(new ArrayList()); } parent.getChildren().add(idi); } } // ? Vol3 for (IcdDiseaseIndex idi : this.allIndexItems) { // ICD ?? if (idi.getIcdCode() != null && idi.getIcdCode().length() > 0) { Stack<IcdDiseaseIndex> v = new Stack(); createIdexSearchPath(v, idi); // ? SearchPath path = new SearchPath(); Iterator<IcdDiseaseIndex> itr = v.iterator(); while (itr.hasNext()) { IcdDiseaseIndex i = itr.next(); path.getNodeList().add(i); // IcdDiseaseIndex Parse if (idi.getDepth() > 0) { /* parseRootIndex(idi); */ } } path.setNodes(path.getNodeList().size()); path.setStartPoint(path.getNodeList().get(0)); // Get Terminal ICD Disease /* M72.5 Vol1 ? D54 Vol1 ? W13 Vol1 */ List<IcdDisease> ds = this.getVol1IcdIndexedItems() .get(idi.getIcdCode().replaceAll("$", "").toUpperCase()); if (ds == null) { // Maybe the icd code in Vol3 cannot be found in Vol1, Skip this item... ds = this.getVol1IcdIndexedItems().get(idi.getIcdCode().replaceAll("$", "") .replaceAll("\\.$", "").replace(".-", "").toUpperCase()); if (ds == null) { System.err.println("ICD CODE:" + idi.getIcdCode() + " Indexed ?" + this.getVol1IcdIndexedItems().containsKey(idi.getIcdCode().toUpperCase()) + "--" + idi.getIcdCode().replaceAll("$", "").replaceAll("\\.$", "") .replace(".-", "").toUpperCase()); continue; } } for (IcdDisease dis : ds) { if (dis.getCodeType().equals("") || dis.getCodeType().equals("")) { continue; } path.setTerminal(dis); } // ??? setDimension(path, path.getTerminal()); // this.getAllSearchPaths().add(path); this.getGuidIndexedSearchPaths().put(path.getPathHashCode(), path); // ??ICD Code List<SearchPath> al1 = this.getIcdIndexedSearchPaths() .get(path.getTerminal().getIcdCode().toUpperCase()); if (al1 == null) { al1 = new ArrayList<SearchPath>(); } al1.add(path); this.getIcdIndexedSearchPaths().put(path.getTerminal().getIcdCode().toUpperCase(), al1); // ??(ICDDiseaseIndex) List<SearchPath> al2 = this.getRootIndexedSearchPaths().get(path.getStartPoint()); // SearchPath al2 = this.getRootIndexedSearchPaths().get(path.getStartPoint()); if (al2 == null || al2.size() == 0) { //al2 = new SearchPath(); al2 = new ArrayList(); } al2.add(path); this.getRootIndexedSearchPaths().put(path.getStartPoint(), al2); } } /*2014-06-11 15:28:22 * Vol1parent-children for(int i =0;i<this.allIndexItems.size();i++){ IcdDiseaseIndex d = this.allIndexItems.get(i); if(d.getParentID()!=null&&d.getParentID()!=0){ IcdDiseaseIndex parent = this.getVol3IDIndexedItems().get(d.getParentID()); d.setParentDiseaseIndex(parent); if(parent == null){ System.err.println(" parent is null :"+d.getId()+"\t"+d.getParentID()); continue; } if(parent.getChildren() ==null){ parent.setChildren(new ArrayList()); } parent.getChildren().add(d); } } */ // ? End! /* ?: 1. ICDDisease ? children --> ? 2. ICDDisease ?Lucene --> ? 3. search ? */ System.err.println("Search Path #:" + this.getAllSearchPaths().size()); System.err.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); // for (SearchPath sp : this.getAllSearchPaths()) { // System.err.println(sp); // } System.err.println("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"); /* for (IcdDiseaseIndex idi : this.allRootIndexItems) { System.err.println("? :" + idi.getNameCh() + "->" + idi.getPage() + "-" + idi.getPathStr() + "-" + idi.getDepth()); List<SearchPath> paths = this.getRootIndexedSearchPaths().get(idi); if (paths == null) { System.err.println("\t Search Path"); } else { for (SearchPath sp : paths) { System.err.println("\t "+sp.getIndexString()); } } } */ System.err.println("******************* Begin create Idnex *********************"); HashSet<String> hs = new HashSet<String>(); BufferedReader reader2 = IOUtil.getReader(ResourceBundle.getBundle("library").getString("stopLibrary"), "UTF-8"); String word = null; while ((word = reader2.readLine()) != null) { hs.add(word); } // MemoryGrid Lucene ?? directory = new RAMDirectory(); Version matchVersion = Version.LUCENE_48; analyzer = new AnsjAnalysis(hs, false); /* TokenStream ts = analyzer.tokenStream("", "?"); // CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); System.err.println(" Test ?"); while (ts.incrementToken()) { System.out.println("?" + ts.getAttribute(CharTermAttribute.class) + ""); } ts.close(); */ IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, iwc); // ? for (SearchPath path : this.getAllSearchPaths()) { // System.err.println(" ? :"+path.getIndexString()); addContent(writer, path.getIndexString(), path.getPathHashCode(), path.getNodes(), "vol3"); } writer.commit(); writer.close(); System.err.println("***************** index created *******************"); System.out.println(""); // search(analyzer,directory,"?"); //DEBUG: HIS?? ?ICD?? /* try { OracleDataSource ods = new OracleDataSource(); ods.setURL("jdbc:oracle:thin:@//192.168.1.115:1521/omac"); ods.setUser("oracle"); ods.setPassword("quickhis"); Connection conn = ods.getConnection(); String sql = "select main_icd,diag_name from fir_out_diag_tab where diag_date > (sysdate-1000) and rownum < 1000"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { String mainIcd = rs.getString(1); String diagName = rs.getString(2); if (diagName==null || "".equals(diagName)) { continue; } diagName = diagName.replaceAll("", "(").replaceAll("", ")"); System.err.println(" ICD_CODE:"+mainIcd+"\t >>"+diagName); search(analyzer,directory,diagName); } rs.close(); stmt.close(); conn.close(); } catch (SQLException ex) { ex.printStackTrace(); } */ } private void search(Analyzer analyzer, Directory directory, String queryStr) throws CorruptIndexException, IOException, ParseException, InvalidTokenOffsetsException { IndexSearcher isearcher; IndexReader reader = DirectoryReader.open(directory); // isearcher = new IndexSearcher(reader); QueryParser tq = new QueryParser(Version.LUCENE_32, "text", ansjHeightAnalyzer); // ? ? ??? try { // Sort sort = new Sort(new SortField[] {new SortField("nodes", SortField.Type.INT, true) }); Query query = tq.parse(queryStr); System.err.println(query); TopDocs hits = isearcher.search(query, 6); System.err.println(queryStr + ":" + hits.totalHits + "?!"); for (int i = 0; i < hits.scoreDocs.length; i++) { int docId = hits.scoreDocs[i].doc; Document document = isearcher.doc(docId); String guid = document.get("guid"); String icdCode = null; if (this.getGuidIndexedSearchPaths().get(guid).getTerminal() != null) { icdCode = this.getGuidIndexedSearchPaths().get(guid).getTerminal().getIcdCode(); } System.err.println("GUID : " + guid + " ICD_CODE:" + icdCode + "\t\t" + toHighlighter(ansjHeightAnalyzer, query, document)); } } catch (ParseException pe) { pe.printStackTrace(); } } private IndexSearcher isearcher; private IndexSearcher getSearcherInstance() throws IOException { if (this.isearcher != null) { return this.isearcher; } else { IndexReader reader = DirectoryReader.open(this.directory); // isearcher = new IndexSearcher(reader); } return this.isearcher; } // ?? public List<SearchPathShowObj> searchSvc(String diagName) throws IOException, InvalidTokenOffsetsException { List<SearchPathShowObj> result = new ArrayList(); if (this.directory == null || this.analyzer == null) { throw new RuntimeException("Lucene ????"); } long beginTime = System.currentTimeMillis(); // ? ? diagName = diagName.replaceAll("", "(").replaceAll("", ")").replaceAll("\\(.*?\\)", "") .replace("*", "").replace("?", ""); // IndexSearcher isearcher; isearcher = getSearcherInstance(); QueryParser tq = new QueryParser(Version.LUCENE_48, "text", ansjHeightAnalyzer); // tq.setDefaultOperator(QueryParser.Operator.AND); // ? ? ??? try { // Sort sort = new Sort(new SortField[] {new SortField("nodes", SortField.Type.INT, true) }); Query query = tq.parse(diagName); System.err.println(query); TopDocs hits = isearcher.search(query, 6); System.err.println(diagName + ":" + hits.totalHits + "?!"); for (int i = 0; i < hits.scoreDocs.length; i++) { int docId = hits.scoreDocs[i].doc; Document document = isearcher.doc(docId); String guid = document.get("guid"); result.add(new SearchPathShowObj(this.getGuidIndexedSearchPaths().get(guid))); // String icdCode = null; // if (this.getGuidIndexedSearchPaths().get(guid).getTerminal()!=null) { // icdCode = this.getGuidIndexedSearchPaths().get(guid).getTerminal().getIcdCode(); // } // System.err.println("GUID : "+guid+" ICD_CODE:" + icdCode + "\t\t"+toHighlighter(ansjHeightAnalyzer, query, document)); } } catch (ParseException pe) { pe.printStackTrace(); } long responseTime = System.currentTimeMillis() - beginTime; System.err.println(" " + diagName + " ?" + responseTime + "ms."); return result; } public List<SearchPath> searchSvc(String name, String type) throws IOException { List<SearchPath> result = new ArrayList(); if (this.directory == null || this.analyzer == null) { throw new RuntimeException("Lucene ????"); } long beginTime = System.currentTimeMillis(); // ? ? name = name.replaceAll("", "(").replaceAll("", ")").replaceAll("\\(.*?\\)", "").replace("*", "") .replace("?", ""); isearcher = getSearcherInstance(); QueryParser tq = new QueryParser(Version.LUCENE_48, "text", ansjHeightAnalyzer); String queryStr = "(text:'" + name + "') (type:'" + type + "')"; // ? ? ??? try { // Query query = tq.parse(queryStr); System.err.println(query); TopDocs hits = isearcher.search(query, 6); System.err.println(name + ":" + hits.totalHits + "?!"); HashMap<String, SearchPath> listSp = new HashMap(); ; for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document document = isearcher.doc(docID); String text = document.get("text"); String guid = document.get("guid"); String nodes = document.get("nodes"); String stype = document.get("type"); SearchPath sp = this.getGuidIndexedSearchPaths().get(guid); System.err.println("!!!! text:" + text + "||guid:" + guid + "||nodes:" + nodes + "||type:" + stype + "||icdCode:" + sp.getStartPoint().getIcdCode()); if (listSp.get(sp.getStartPoint().getIcdCode()) != null) { } else { listSp.put(sp.getStartPoint().getIcdCode(), sp); System.err.println(" --> text:" + text + "||guid:" + guid + "||nodes:" + nodes + "||type:" + stype + "||icdCode:" + sp.getStartPoint().getIcdCode()); result.add(sp); } } } catch (ParseException pe) { pe.printStackTrace(); } long responseTime = System.currentTimeMillis() - beginTime; System.err.println(" " + name + " ?" + responseTime + "ms."); return result; } // ??? private void setDimension(SearchPath path, IcdDisease d) { String codeType = d.getCodeType(); if (codeType.equals("")) { path.setChapter(d.getIcdCode().toUpperCase()); } else if (codeType.equals("")) { path.setSection(d.getIcdCode().toUpperCase()); } else if (codeType.equals("")) { path.setClazz(d.getIcdCode().toUpperCase()); } else if (codeType.equals("")) { path.setSubclazz(d.getIcdCode().toUpperCase()); } if (d.getParentID() != null) { IcdDisease pd = this.getVol1IDIndexedItems().get(d.getParentID()); if (pd != null) { setDimension(path, pd); } } } // ?? private void createIdexSearchPath(Stack v, IcdDiseaseIndex idi) { if (idi.getParentID() != null) { IcdDiseaseIndex parent = this.getVol3IDIndexedItems().get(idi.getParentID()); if (parent != null) { createIdexSearchPath(v, parent); } } v.push(idi); // ?? } /* Vol3 ??? ParseTree ???? */ public void parseRootIndex(IcdDiseaseIndex item) { String source = item.getNameCh(); // ? string ParseTreeResultNode rootNode = new ParseTreeResultNode(); rootNode.setContent(null); rootNode.setDepth(0); // root node ParseTreeOperatorNode orOper = new ParseTreeOperatorNode(); orOper.setDepth(1); orOper.setOperator(ParseTreeOperatorEnum.LOGIC_OR); orOper.setSubResults(new ArrayList()); if (this.dealAlias) { // ???? for (String alias : item.getAliases()) { // alias OR ? if (alias == null || alias.length() == 0) { continue; } ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(alias); n.setDepth(2); orOper.getSubResults().add(n); } } // ??? OR ? orOper.getSubResults().add(parseIndexContent(source, 2)); rootNode.setOperNode(orOper); item.setParseTreeRoot(rootNode); } private static final HashMap<String, String[]> reverseWords = new HashMap(); static { // .* reverseWords.put("", new String[] { "?" }); reverseWords.put("", new String[] { "?" }); reverseWords.put("", new String[] { "" }); reverseWords.put("", new String[] { "", "" }); // reverseWords.put("?", new String[] { "" }); reverseWords.put("", new String[] { "", "" }); //reverse reverseWords.put("?", new String[] { "" }); reverseWords.put("?", new String[] { "" }); reverseWords.put("", new String[] { "" }); reverseWords.put("", new String[] { "?" }); reverseWords.put("", new String[] { "", "" }); } /* 61003 ocr 59809 61229 58607 ***?? -- *** && ?? */ // ?? ? AND? [\\-(\\(.+?\\)+)] /* ?? ? AND ? ResultNode ? OROR???AND */ private ParseTreeResultNode andRecursiveFunc(String item, int beginDepth) { // AND // [(-?)??] --> && ?? // [-???-???] --> && ??? && ??? // System.err.println("\t\tandRecursiveFunc :"+item); String[] subitems = null; ParseTreeResultNode orNode = new ParseTreeResultNode(); subitems = item.split("[\\-(\\(.+?\\)+)]"); // ? () if (subitems.length > 1) { // System.err.println("\t\tandRecursiveFunc ? AND ?"); orNode.setDepth(beginDepth + 1); ParseTreeOperatorNode oper = new ParseTreeOperatorNode(); oper.setDepth(beginDepth + 2); oper.setOperator(ParseTreeOperatorEnum.LOGIC_AND); if (oper.getSubResults() == null) { oper.setSubResults(new ArrayList()); } for (String subitem : subitems) { // System.err.println("\t\t\t? AND ? :"+subitem); if (subitem == null || subitem.length() == 0) { continue; } ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(subitem); n.setContentOper(ParseTreeOperatorEnum.MATCH); n.setDepth(beginDepth + 3); oper.getSubResults().add(n); } orNode.setOperNode(oper); } else { // System.err.println("\t\tandRecursiveFunc AND ?"); if (item != null && item.length() > 0) { orNode.setContent(item); orNode.setContentOper(ParseTreeOperatorEnum.MATCH); orNode.setDepth(beginDepth + 1); } } return orNode; } private ParseTreeResultNode orRecursiveFunc(String item, int beginDepth) { String[] subitems = null; ParseTreeResultNode resultnode = new ParseTreeResultNode(); // OR subitems = item.split("[?]"); if (subitems.length >= 1) { resultnode.setDepth(beginDepth + 1); ParseTreeOperatorNode oper = new ParseTreeOperatorNode(); oper.setDepth(beginDepth + 2); oper.setOperator(ParseTreeOperatorEnum.LOGIC_OR); if (oper.getSubResults() == null) { oper.setSubResults(new ArrayList()); } for (String subitem : subitems) { // System.err.println("\t DEBUG( ?[],OR ?, content):"+subitem); if (subitem == null || subitem.length() == 0) { continue; } ParseTreeResultNode n = andRecursiveFunc(subitem, beginDepth); oper.getSubResults().add(n); } resultnode.setOperNode(oper); } //?,??[????] //?????????? // ?? ? return resultnode; } /* ?? */ private String prepareSemanticSplit(String item) { String[] patterns = { "(.+?)()?(?)(??)$", "(.+?)()?()(??)$", "(.+?)()?(??)(?)$", "(.+?)()(??)$", "(.+?)(??)()$", "(.+?)()(??)$", "(.+?)()?()(??)", "(.+?)()(??)$", "(.+?)()?(?)?()()$", "(.+?)()?()(??)$", "(.+?)(?)?(?)()$", "(.+?)()?(??)()$", "(.+?)()()$", "(.+?)()()$", "(.+?)()(??)$", "(.+?)()()$", "(.+?)()()$", "(.+?)()(??)$", "(.+?)()()$", "(.+?)()(?)$", "(.+?)()()$", "(.+?)()(??)$", "(.+?)(?)()$", "(.+?)()()$", "(.+?)()(?)$", "(.+?)()(?)$", "(.+?)()()$", "(.+?)(??)(??)$", "(.+?)()(??)$", "(.+?)(??)()$", "(.+?)()()$", "(.+?)(?)(??)$", "(.+?)()(??)$", "(.+?)()()$", "(.+?)()()$", "(.+?)()()$", "(.+?)()()$", "(.+?)()()$", "(.+?)(?)(??)$", "(.+?)()(??)$", "(.+?)()()$", "(.+?)()()$", "(.+?)()()$", "(.+?)()(??)$", "(.+?)(??)()$", "(.+?)()()$", "(.+?)(??)?(?)$", }; // ??? item = item.replaceAll("(.)[]??", ""); Pattern specpattern = Pattern.compile("\\[???(.+?)\\]"); Matcher specmatcher = specpattern.matcher(item); if (specmatcher.find()) { item = item.replaceAll("\\[???(.+?)\\]", "[" + specmatcher.group(1) + "]"); } item = item.replaceAll(".????", ""); // System.err.println(" ? ?? :"+item); specpattern = Pattern.compile("(.+?)??$"); specmatcher = specpattern.matcher(item); if (specmatcher.find()) { item = specmatcher.group(1) + "" + specmatcher.group(1) + "??"; } for (int x = 0; x < patterns.length; x++) { String p = patterns[x]; // System.err.println("\t\t pattern :"+p); // pattern?? specpattern = Pattern.compile(p); specmatcher = specpattern.matcher(item); while (specmatcher.find()) { // System.err.println("\t\t pattern find."+p); String leadStr = specmatcher.group(1); String replacer = ""; for (int t = 2; t <= specmatcher.groupCount(); t++) { replacer += leadStr + specmatcher.group(t) + ""; } replacer = replacer.replaceAll("$", ""); item = specmatcher.replaceFirst(replacer); } // System.err.println("\t\t changed content:"+item); } return item; } /* ?? */ private String miscStringDeal(String content) { // ? content = content.replaceAll("", "]"); // ? 1 : // Step : () [] ?()() // [], Pattern pt = Pattern.compile(pstr1); Matcher mt = pt.matcher(content); while (mt.find()) { if (mt.group(1).contains("")) { content = content.replace(mt.group(1), ""); } else { // ? content = content.replace(mt.group(1), ""); String t = new String(mt.group(1)); t = t.replace("[", "("); t = t.replace("]", ")"); content += t; } } // ? 2: // Step: () ,???,??? /* () ? (???)-->?(???) () ? (??)-->?(??) () ? (?)-->(?) () ? (??)-->?(??) () ? (??)-->(??) () ? ()-->?() () ? (??)-->??(??) */ pt = Pattern.compile(pstr4); mt = pt.matcher(content); while (mt.find()) { content = content.replace(mt.group(), ""); } // ? 3: // Step 1. (-X)???- -\((.)\) Pattern p1 = Pattern.compile("\\(-(.+?)\\)"); Matcher m1 = p1.matcher(content); while (m1.find()) { String group = m1.group(); String ref = m1.group(1); content = content.replace(group, "-"); } // ? 4: // Step 2. ()?, $ '' ??? content = content.replaceAll("\\(\\)", ""); content = content.replaceAll("\\[$]", ""); return content; } /* rootNode | |-- OR | |--- ??? parseRootIndex ? |--- ?? parseIndexContent | |--- AND (andOper) |--- ???????? ? |--- OR []??? |--- ... */ private ParseTreeResultNode parseIndexContent(String content, int beginDepth) { // System.err.println("\t DEBUG( parseIndexContent,content):"+content); ParseTreeResultNode resultNode = new ParseTreeResultNode(); resultNode.setDepth(beginDepth); ParseTreeOperatorNode andOper = new ParseTreeOperatorNode(); andOper.setDepth(beginDepth + 1); andOper.setOperator(ParseTreeOperatorEnum.LOGIC_AND); resultNode.setOperNode(andOper); Pattern pt = Pattern.compile(pstr1); Matcher mt = pt.matcher(content); // ? content = miscStringDeal(content); // System.err.println("\t DEBUG( parseIndexContent ??,content):"+content); // ?-()??[-()??] --> ???[-()??] ?-??[-()??] // content.replaceAll("-\\(.{2,}?\\)", ""); // ???ParseTreeResultNode (? OR ?) List<ParseTreeResultNode> orItemResultNodes = new ArrayList(); // ?[] [] [] ??? pt = Pattern.compile("\\[(.+?)\\]"); mt = pt.matcher(content); while (mt.find()) { String item = mt.group(1); // [] // System.err.println("\t DEBUG( ?[],content):"+item); // ? item = prepareSemanticSplit(item); // System.err.println("\t DEBUG( ?[],??, content):"+item); orItemResultNodes.add(orRecursiveFunc(item, beginDepth)); // AND // [(-?)??] --> && ?? // [-???-???] --> && ??? && ??? // - ()+ // ParseTreeResultNode orNode = andRecursiveFunc(item,beginDepth); // orItemResultNodes.add(orNode); } // [] ? [] content = content.replaceAll("\\[.+?\\]", ""); // System.err.println("\t DEBUG( [], content"+content); // Step 3. ?() /* ICD ?? () ?????? ??????? ? ??? () ??? */ // ?()? Pattern p = Pattern.compile("\\((.+?)\\)"); // ? Matcher m = p.matcher(content); List<String> qualifiers = new ArrayList<String>(); // while (m.find()) { // String qualifier = m.group(1); // qualifiers.add(qualifier); // } // ??ParseTreeResultNode (? AND ?) List<ParseTreeResultNode> qualifierResultNodes = new ArrayList(); // .* ?? and not match (???) // ????? ??? for (int i = 0; i < qualifiers.size(); i++) { String q = qualifiers.get(i); if (q.endsWith("")) { String key = q.substring(0, q.length() - 1); if (reverseWords.containsKey(key)) { // ??? // ???String[] boolean needDeal = true; for (String r : reverseWords.get(key)) { if (qualifiers.contains(r)) { // namech???, ???? needDeal = false; break; } } // ????? if (needDeal) { // and not match (???) String[] rwords = reverseWords.get(key); if (rwords.length > 1) { for (String rword : rwords) { ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(rword); n.setContentOper(ParseTreeOperatorEnum.NOTMATCH); n.setDepth(beginDepth + 1); qualifierResultNodes.add(n); } } else { ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(rwords[0]); n.setContentOper(ParseTreeOperatorEnum.NOTMATCH); n.setDepth(beginDepth + 1); qualifierResultNodes.add(n); } } } else { // ???? if (q.startsWith("?")) { ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(q.replace("?", "[^?]")); n.setContentOper(ParseTreeOperatorEnum.NOTMATCH); n.setDepth(beginDepth + 1); qualifierResultNodes.add(n); } else { ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent("?" + q); n.setContentOper(ParseTreeOperatorEnum.NOTMATCH); n.setDepth(beginDepth + 1); qualifierResultNodes.add(n); } } } else if (q.startsWith("?")) { // ? OR ? ??????? ParseTreeResultNode n = new ParseTreeResultNode(); n.setContent(q.replace("?", "[^?]")); n.setContentOper(ParseTreeOperatorEnum.NOTMATCH); n.setDepth(beginDepth + 1); qualifierResultNodes.add(n); } } andOper.getSubResults().addAll(qualifierResultNodes); // ?() () content = content.replaceAll("\\(.+?\\)", ""); // System.err.println("\t DEBUG( (), content"+content); // ? content = prepareSemanticSplit(content); // System.err.println("\t DEUG(??(LAST) content :"+content); // ?[]? orItemResultNodes.add(orRecursiveFunc(content, beginDepth)); ParseTreeOperatorNode orNode = new ParseTreeOperatorNode(); orNode.setDepth(andOper.getDepth() + 2); orNode.setOperator(ParseTreeOperatorEnum.LOGIC_OR); for (ParseTreeResultNode n : orItemResultNodes) { n.setDepth(orNode.getDepth() + 1); } orNode.getSubResults().addAll(orItemResultNodes); ParseTreeResultNode betweenAndor = new ParseTreeResultNode(); betweenAndor.setDepth(andOper.getDepth() + 1); betweenAndor.setOperNode(orNode); andOper.getSubResults().add(betweenAndor); return resultNode; } private Analyzer ansjHeightAnalyzer = new AnsjAnalysis(); private String toHighlighter(Analyzer analyzer, Query query, Document doc) throws InvalidTokenOffsetsException { String field = "text"; try { SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field))); String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field)); return highlighterStr == null ? doc.get(field) : highlighterStr; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } private void addContent(IndexWriter iwriter, String text, String guid, int nodes, String type) throws CorruptIndexException, IOException { Document doc = new Document(); doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("guid", guid, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("nodes", "" + nodes, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED)); /** text ,guid,type */ iwriter.addDocument(doc); } public void searchDiagName(String diagName) throws IOException { IndexReader reader = DirectoryReader.open(this.directory); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("pathDesc", diagName)); TopDocs ts = searcher.search(query, null, 100); long endTime = System.currentTimeMillis(); for (int i = 0; i < ts.scoreDocs.length; i++) { // rs.scoreDocs[i].doc ??id, 0 Document firstHit = searcher.doc(ts.scoreDocs[i].doc); System.out.println("guid:" + firstHit.getField("guid").stringValue()); // System.out.println("sex:" + firstHit.getField("sex").stringValue()); System.out.println("pathDesc:" + firstHit.getField("pathDesc").stringValue()); } reader.close(); directory.close(); System.out.println("*****************?**********************"); } public boolean RootContentHit(String diagName) { boolean ret = false; // Hit? for (IcdDiseaseIndex i : this.allRootIndexItems) { // System.err.println(" RootContentHit ?? ="+i.getNameCh()); // System.err.println("$$$$$$$$$$$$$$"+i.getParseTreeRoot()); Boolean b = i.getParseTreeRoot().compute(diagName); if (b) { System.err.println("\t\t ???" + i.getNameCh() + " ~ " + diagName + " @ ICD CODE:" + i.getIcdCode()); ret = true; } } return ret; } // Modify for git. }