Changeset 5389
- Timestamp:
- 08/28/08 17:18:30 (3 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
openmrs-modules/patientmatching/src/org/regenstrief/linkage/analysis/ScaleWeightModifier.java
r5199 r5389 193 193 if(inScalingSet(cur_demographic, val)){ 194 194 // Calculate scaling factor obtained from two data sources 195 DataColumn dc1 = lds1_inc_cols.get(cur_demographic);196 DataColumn dc2 = lds2_inc_cols.get(cur_demographic);197 int unique_union = unionUniqueTokens(dc1, dc2, lds1_id, lds2_id);198 SWAdjustScore adjustment = SWAdjustScore.sumTwoScores( adjust1.get(cur_demographic), adjust2.get(cur_demographic), unique_union);195 int unique_union = sw_connection.unionUniqueTokens(cur_demographic).size(); 196 SWAdjustScore swas1 = adjust1.get(cur_demographic); 197 SWAdjustScore swas2 = adjust2.get(cur_demographic); 198 SWAdjustScore adjustment = SWAdjustScore.sumTwoScores(swas1, swas2, unique_union); 199 199 200 200 // Adjust the score openmrs-modules/patientmatching/src/org/regenstrief/linkage/db/ScaleWeightDBManager.java
r5267 r5389 54 54 private static final String fields_table = "patientmatching_field"; 55 55 56 private static final String UNION_FREQ_QUERY = "select c.token, sum(frequency) from (select token, frequency from " + fields_table + " as a, " + token_table + " as b where a.label = ? and a.column_id = b.column_id group by token, frequency ) as c group by c.token;"; 56 private static final String UNION_FREQ_QUERY = "select token, frequency from " + token_table + " where column_id = ?;"; 57 private static final String COL_ID_QUERY = "select column_id from " + fields_table + " where label = ?;"; 57 58 private static final String UNION_FREQ_THRESHOLD_ABOVE_QUERY = "select token, sum from (select c.token, sum(frequency) from (select token, frequency from " + fields_table + " as a, " + token_table + " as b where a.label = ? and a.column_id = b.column_id group by token, frequency ) as c group by c.token) as d where sum > ?;"; 58 59 private static final String UNION_FREQ_THRESHOLD_BELOW_QUERY = "select token, sum from (select c.token, sum(frequency) from (select token, frequency from " + fields_table + " as a, " + token_table + " as b where a.label = ? and a.column_id = b.column_id group by token, frequency ) as c group by c.token) as d where sum < ?;"; 59 PreparedStatement union_freq_stmt, union_threshold_stmt ;60 PreparedStatement union_freq_stmt, union_threshold_stmt, col_id_stmt; 60 61 61 62 // hashtable stores the frequencies for each demographic when the data sources … … 68 69 Hashtable<String,Hashtable<Integer,Hashtable<ModifySet,List<String>>>> percentile_tokens; 69 70 71 Hashtable<CountType,Hashtable<DataColumn,Hashtable<Integer,Integer>>> field_counts; 72 70 73 public ScaleWeightDBManager(String driver, String url, String user, String passwd){ 71 74 super(driver, url, user, passwd); 72 75 union_values = new Hashtable<String,Hashtable<String,Integer>>(); 73 76 percentile_tokens = new Hashtable<String,Hashtable<Integer,Hashtable<ModifySet,List<String>>>>(); 77 field_counts = new Hashtable<CountType,Hashtable<DataColumn,Hashtable<Integer,Integer>>>(); 74 78 } 75 79 … … 344 348 */ 345 349 public int getCount(CountType type, DataColumn target_col, int ds_id) { 350 Hashtable<DataColumn,Hashtable<Integer,Integer>> type_table = field_counts.get(type); 351 Hashtable<Integer,Integer> column_table = null; 352 353 if(type_table != null){ 354 column_table = type_table.get(target_col); 355 if(column_table != null){ 356 Integer i = column_table.get(ds_id); 357 if(i != null){ 358 return i; 359 } 360 } 361 } 362 363 346 364 PreparedStatement pstmt; 347 365 try { 348 366 if(type == CountType.NonNull) { 349 pstmt = db.prepareStatement("SELECT non_null_count FROM " + fields_table + " WHERE datasource_id = ? AND column_id = ?"); 367 //pstmt = db.prepareStatement("SELECT non_null_count FROM " + fields_table + " WHERE datasource_id = ? AND column_id = ?"); 368 pstmt = db.prepareStatement("SELECT sum(frequency) FROM " + token_table + " WHERE datasource_id = ? AND column_id = ?"); 350 369 } else if(type == CountType.Unique) { 351 370 pstmt = db.prepareStatement("SELECT unique_count FROM " + fields_table + " WHERE datasource_id = ? AND column_id = ?"); … … 358 377 ResultSet rs = pstmt.executeQuery(); 359 378 if(rs.next()) { 379 int i = rs.getInt(1); 380 if(type_table == null){ 381 type_table = new Hashtable<DataColumn,Hashtable<Integer,Integer>>(); 382 field_counts.put(type, type_table); 383 } 384 if(column_table == null){ 385 column_table = new Hashtable<Integer,Integer>(); 386 type_table.put(target_col, column_table); 387 } 388 389 column_table.put(ds_id, i); 360 390 return rs.getInt(1); 361 391 } … … 463 493 464 494 if(ret == null){ 465 if(union_freq_stmt == null ){495 if(union_freq_stmt == null || col_id_stmt == null){ 466 496 try{ 467 497 union_freq_stmt = db.prepareStatement(UNION_FREQ_QUERY); 498 col_id_stmt = db.prepareStatement(COL_ID_QUERY); 468 499 }catch(SQLException sqle){ 469 500 return null; … … 474 505 try{ 475 506 ret = new Hashtable<String,Integer>(); 476 union_freq_stmt.setString(1, demographic); 507 col_id_stmt.setString(1, demographic); 508 rs = col_id_stmt.executeQuery(); 509 rs.next(); 510 int col_id = rs.getInt(1); 511 union_freq_stmt.setInt(1, col_id); 477 512 rs = union_freq_stmt.executeQuery(); 478 513 while(rs.next()){ 479 514 String dem = rs.getString(1); 480 515 int freq = rs.getInt(2); 481 ret.put(dem, freq); 516 Integer entry = ret.get(dem); 517 if(entry == null){ 518 ret.put(dem, freq); 519 } else { 520 ret.put(dem, entry + freq); 521 } 482 522 } 483 523 union_values.put(demographic, ret); … … 503 543 } 504 544 double avg = sum / freqs.keySet().size(); 505 506 e = freqs.keys(); 545 if(freqs.get(value) < avg){ 546 return true; 547 } 548 549 /*e = freqs.keys(); 507 550 while(e.hasMoreElements()){ 508 551 String token = e.nextElement(); … … 511 554 return true; 512 555 } 513 } 556 }*/ 514 557 return false; 515 558 } … … 518 561 Hashtable<String,Integer> freqs = unionUniqueTokens(demographic); 519 562 Enumeration<String> e = freqs.keys(); 520 intsum = 0;563 double sum = 0; 521 564 while(e.hasMoreElements()){ 522 565 String token = e.nextElement(); … … 524 567 sum += freq; 525 568 } 526 double avg = sum / freqs.keySet().size(); 527 528 e = freqs.keys(); 569 double avg = sum / (double)freqs.keySet().size(); 570 double val_freq = freqs.get(value); 571 572 if(val_freq < avg){ 573 return true; 574 } 575 576 /*e = freqs.keys(); 529 577 while(e.hasMoreElements()){ 530 578 String token = e.nextElement(); 531 579 int freq = freqs.get(token); 532 if(freq < avg && token.equals(value)){ 533 return true; 534 } 535 } 580 581 }*/ 536 582 return false; 583 537 584 } 538 585