1
14 package gate.util;
15
16 import java.util.*;
17
18 import gate.Annotation;
19
20
37 public class AnnotationDiffer {
38
39
40
44 public static interface Pairing{
45
50 public Annotation getKey();
51
52
57 public Annotation getResponse();
58
59
65 public int getType();
66 }
67
68
75 public List calculateDiff(Collection key, Collection response){
76 if(key == null || key.size() == 0)
78 keyList = new ArrayList();
79 else
80 keyList = new ArrayList(key);
81
82 if(response == null || response.size() == 0)
83 responseList = new ArrayList();
84 else
85 responseList = new ArrayList(response);
86
87 keyChoices = new ArrayList(keyList.size());
88 keyChoices.addAll(Collections.nCopies(keyList.size(), null));
89 responseChoices = new ArrayList(responseList.size());
90 responseChoices.addAll(Collections.nCopies(responseList.size(), null));
91
92 possibleChoices = new ArrayList();
93
94 for(int i = 0; i < keyList.size(); i++){
96 for(int j =0; j < responseList.size(); j++){
97 Annotation keyAnn = (Annotation)keyList.get(i);
98 Annotation resAnn = (Annotation)responseList.get(j);
99 PairingImpl choice = null;
100 if(keyAnn.coextensive(resAnn)){
101 if(keyAnn.isCompatible(resAnn, significantFeaturesSet)){
103 choice = new PairingImpl(i, j, CORRECT);
105 }else{
106 choice = new PairingImpl(i, j, WRONG);
109 }
110 }else if(keyAnn.overlaps(resAnn)){
111 if(keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)){
113 choice = new PairingImpl(i, j, PARTIALLY_CORRECT);
114 }else{
115 choice = new PairingImpl(i, j, WRONG);
116 }
117 }
118
119
137 if (choice != null) {
139 addPairing(choice, i, keyChoices);
140 addPairing(choice, j, responseChoices);
141 possibleChoices.add(choice);
142 }
143 } }
146 Collections.sort(possibleChoices, new PairingScoreComparator());
149 Collections.reverse(possibleChoices);
150 finalChoices = new ArrayList();
151 correctMatches = 0;
152 partiallyCorrectMatches = 0;
153 missing = 0;
154 spurious = 0;
155
156 while(!possibleChoices.isEmpty()){
157 PairingImpl bestChoice = (PairingImpl)possibleChoices.remove(0);
158 bestChoice.consume();
159 finalChoices.add(bestChoice);
160 switch(bestChoice.type){
161 case CORRECT:{
162 if(correctAnnotations == null) correctAnnotations = new HashSet();
163 correctAnnotations.add(bestChoice.getResponse());
164 correctMatches++;
165 break;
166 }
167 case PARTIALLY_CORRECT:{
168 if(partiallyCorrectAnnotations == null) partiallyCorrectAnnotations = new HashSet();
169 partiallyCorrectAnnotations.add(bestChoice.getResponse());
170 partiallyCorrectMatches++;
171 break;
172 }
173 case WRONG:{
174 if(bestChoice.getKey() != null){
175 if(missingAnnotations == null) missingAnnotations = new HashSet();
177 missingAnnotations.add(bestChoice.getKey());
178 missing ++;
179 }
180 if(bestChoice.getResponse() != null){
181 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
183 spuriousAnnotations.add(bestChoice.getResponse());
184 spurious ++;
185 }
186 break;
187 }
188 default:{
189 throw new GateRuntimeException("Invalid pairing type: " +
190 bestChoice.type);
191 }
192 }
193 }
194 for(int i = 0; i < keyChoices.size(); i++){
197 List aList = (List)keyChoices.get(i);
198 if(aList == null || aList.isEmpty()){
199 if(missingAnnotations == null) missingAnnotations = new HashSet();
200 missingAnnotations.add((Annotation)(keyList.get(i)));
201 finalChoices.add(new PairingImpl(i, -1, WRONG));
202 missing ++;
203 }
204 }
205
206 for(int i = 0; i < responseChoices.size(); i++){
208 List aList = (List)responseChoices.get(i);
209 if(aList == null || aList.isEmpty()){
210 if(spuriousAnnotations == null) spuriousAnnotations = new HashSet();
211 spuriousAnnotations.add((Annotation)(responseList.get(i)));
212 finalChoices.add(new PairingImpl(-1, i, WRONG));
213 spurious ++;
214 }
215 }
216
217 return finalChoices;
218 }
219
220
225 public double getPrecisionStrict(){
226 if(responseList.size() == 0) {
227 return 1.0;
228 }
229 return correctMatches/(double)responseList.size();
230 }
231
232
237 public double getRecallStrict(){
238 if(keyList.size() == 0) {
239 return 1.0;
240 }
241 return correctMatches/(double)keyList.size();
242 }
243
244
249 public double getPrecisionLenient(){
250 if(responseList.size() == 0) {
251 return 1.0;
252 }
253 return ((double)correctMatches + partiallyCorrectMatches) / (double)responseList.size();
254 }
255
256
260 public double getPrecisionAverage() {
261 return ((double)getPrecisionLenient() + getPrecisionStrict()) / (double)(2.0);
262 }
263
264
269 public double getRecallLenient(){
270 if(keyList.size() == 0) {
271 return 1.0;
272 }
273 return ((double)correctMatches + partiallyCorrectMatches) / (double)keyList.size();
274 }
275
276
280 public double getRecallAverage() {
281 return ((double) getRecallLenient() + getRecallStrict()) / (double)(2.0);
282 }
283
284
293 public double getFMeasureStrict(double beta){
294 double precision = getPrecisionStrict();
295 double recall = getRecallStrict();
296 double betaSq = beta * beta;
297 double answer = (double)(((double)(betaSq + 1) * precision * recall ) / (double)(betaSq * precision + recall));
298 if(Double.isNaN(answer)) answer = 0.0;
299 return answer;
300 }
301
302
310 public double getFMeasureLenient(double beta){
311 double precision = getPrecisionLenient();
312 double recall = getRecallLenient();
313 double betaSq = beta * beta;
314 double answer = (double)(((double)(betaSq + 1) * precision * recall) / ((double)betaSq * precision + recall));
315 if(Double.isNaN(answer)) answer = 0.0;
316 return answer;
317 }
318
319
326 public double getFMeasureAverage(double beta) {
327 double answer = ((double)getFMeasureLenient(beta) + (double)getFMeasureStrict(beta)) / (double)(2.0);
328 return answer;
329 }
330
331
335 public int getCorrectMatches(){
336 return correctMatches;
337 }
338
339
343 public int getPartiallyCorrectMatches(){
344 return partiallyCorrectMatches;
345 }
346
347
351 public int getMissing(){
352 return missing;
353 }
354
355
359 public int getSpurious(){
360 return spurious;
361 }
362
363
367 public int getFalsePositivesStrict(){
368 return responseList.size() - correctMatches;
369 }
370
371
376 public int getFalsePositivesLenient(){
377 return responseList.size() - correctMatches - partiallyCorrectMatches;
378 }
379
380
384 public int getKeysCount() {
385 return keyList.size();
386 }
387
388
392 public int getResponsesCount() {
393 return responseList.size();
394 }
395
396
399 public void printMissmatches(){
400 Iterator iter = finalChoices.iterator();
402 while(iter.hasNext()){
403 PairingImpl aChoice = (PairingImpl)iter.next();
404 switch(aChoice.type){
405 case PARTIALLY_CORRECT:{
406 System.out.println("Missmatch (partially correct):");
407 System.out.println("Key: " + keyList.get(aChoice.keyIndex).toString());
408 System.out.println("Response: " + responseList.get(aChoice.responseIndex).toString());
409 break;
410 }
411 }
412 }
413
414 for(int i = 0; i < keyChoices.size(); i++){
416 List aList = (List)keyChoices.get(i);
417 if(aList == null || aList.isEmpty()){
418 System.out.println("Missed Key: " + keyList.get(i).toString());
419 }
420 }
421
422 for(int i = 0; i < responseChoices.size(); i++){
424 List aList = (List)responseChoices.get(i);
425 if(aList == null || aList.isEmpty()){
426 System.out.println("Spurious Response: " + responseList.get(i).toString());
427 }
428 }
429 }
430
431
432
433
438 void sanityCheck()throws Exception{
439 Iterator iter =keyChoices.iterator();
441 while(iter.hasNext()){
442 List choices = (List)iter.next();
443 if(choices != null){
444 if(choices.size() > 1){
445 throw new Exception("Multiple choices found!");
446 }else if(!choices.isEmpty()){
447 PairingImpl aChoice = (PairingImpl)choices.get(0);
449 List otherChoices = (List)responseChoices.get(aChoice.responseIndex);
451 if(otherChoices == null ||
452 otherChoices.size() != 1 ||
453 otherChoices.get(0) != aChoice){
454 throw new Exception("Reciprocity error!");
455 }
456 }
457 }
458 }
459
460 iter =responseChoices.iterator();
461 while(iter.hasNext()){
462 List choices = (List)iter.next();
463 if(choices != null){
464 if(choices.size() > 1){
465 throw new Exception("Multiple choices found!");
466 }else if(!choices.isEmpty()){
467 PairingImpl aChoice = (PairingImpl)choices.get(0);
469 List otherChoices = (List)keyChoices.get(aChoice.keyIndex);
471 if(otherChoices == null){
472 throw new Exception("Reciprocity error : null!");
473 }else if(otherChoices.size() != 1){
474 throw new Exception("Reciprocity error: not 1!");
475 }else if(otherChoices.get(0) != aChoice){
476 throw new Exception("Reciprocity error: different!");
477 }
478 }
479 }
480 }
481 }
482
483
490 protected void addPairing(PairingImpl pairing, int index, List listOfPairings){
491 List existingChoices = (List)listOfPairings.get(index);
492 if(existingChoices == null){
493 existingChoices = new ArrayList();
494 listOfPairings.set(index, existingChoices);
495 }
496 existingChoices.add(pairing);
497 }
498
499
503 public java.util.Set getSignificantFeaturesSet() {
504 return significantFeaturesSet;
505 }
506
507
515 public void setSignificantFeaturesSet(java.util.Set significantFeaturesSet) {
516 this.significantFeaturesSet = significantFeaturesSet;
517 }
518
519
523 public class PairingImpl implements Pairing{
524 PairingImpl(int keyIndex, int responseIndex, int type) {
525 this.keyIndex = keyIndex;
526 this.responseIndex = responseIndex;
527 this.type = type;
528 scoreCalculated = false;
529 }
530
531 public int getScore(){
532 if(scoreCalculated) return score;
533 else{
534 calculateScore();
535 return score;
536 }
537 }
538
539 public Annotation getKey(){
540 return keyIndex == -1 ? null : (Annotation)keyList.get(keyIndex);
541 }
542
543 public Annotation getResponse(){
544 return responseIndex == -1 ? null :
545 (Annotation)responseList.get(responseIndex);
546 }
547
548 public int getType(){
549 return type;
550 }
551
552
557 public void consume(){
558 possibleChoices.remove(this);
559 List sameKeyChoices = (List)keyChoices.get(keyIndex);
560 sameKeyChoices.remove(this);
561 possibleChoices.removeAll(sameKeyChoices);
562
563 List sameResponseChoices = (List)responseChoices.get(responseIndex);
564 sameResponseChoices.remove(this);
565 possibleChoices.removeAll(sameResponseChoices);
566
567 Iterator iter = new ArrayList(sameKeyChoices).iterator();
568 while(iter.hasNext()){
569 ((PairingImpl)iter.next()).remove();
570 }
571 iter = new ArrayList(sameResponseChoices).iterator();
572 while(iter.hasNext()){
573 ((PairingImpl)iter.next()).remove();
574 }
575 sameKeyChoices.add(this);
576 sameResponseChoices.add(this);
577 }
578
579
582 protected void remove(){
583 List fromKey = (List)keyChoices.get(keyIndex);
584 fromKey.remove(this);
585 List fromResponse = (List)responseChoices.get(responseIndex);
586 fromResponse.remove(this);
587 }
588
589
593 void calculateScore(){
594 Set conflictSet = new HashSet();
596 conflictSet.addAll((List)responseChoices.get(responseIndex));
598 conflictSet.addAll((List)keyChoices.get(keyIndex));
600 conflictSet.remove(this);
602 score = type;
603 Iterator conflictIter = conflictSet.iterator();
604 while(conflictIter.hasNext()) score -= ((PairingImpl)conflictIter.next()).type;
605 scoreCalculated = true;
606 }
607
608 int keyIndex;
609 int responseIndex;
610 int type;
611 int score;
612 boolean scoreCalculated;
613 }
614
615
621 protected static class PairingScoreComparator implements Comparator{
622
630
631 public int compare(Object o1, Object o2){
632 PairingImpl first = (PairingImpl)o1;
633 PairingImpl second = (PairingImpl)o2;
634 int res = first.getScore() - second.getScore();
636 if(res == 0) res = first.getType() - second.getType();
638 if(res == 0){
641 res = (first.getKey() == null ? 0 : 1) +
642 (first.getResponse() == null ? 0 : 1) +
643 (second.getKey() == null ? 0 : -1) +
644 (second.getResponse() == null ? 0 : -1);
645 }
646 return res;
647 }
648 }
649
650
654 public static class PairingOffsetComparator implements Comparator{
655
659 public int compare(Object o1, Object o2){
660 Pairing first = (Pairing)o1;
661 Pairing second = (Pairing)o2;
662 Annotation key1 = first.getKey();
663 Annotation key2 = second.getKey();
664 Annotation res1 = first.getResponse();
665 Annotation res2 = second.getResponse();
666 Long start1 = key1 == null ? null : key1.getStartNode().getOffset();
667 if(start1 == null) start1 = res1.getStartNode().getOffset();
668 Long start2 = key2 == null ? null : key2.getStartNode().getOffset();
669 if(start2 == null) start2 = res2.getStartNode().getOffset();
670 int res = start1.compareTo(start2);
671 if(res == 0){
672 res = second.getType() - first.getType();
674 }
675
676 return res;
712 }
713
714 }
715
716
721 public Set getAnnotationsOfType(int type) {
722 switch(type) {
723 case CORRECT_TYPE:
724 return (correctAnnotations == null)? new HashSet() : correctAnnotations;
725 case PARTIALLY_CORRECT_TYPE:
726 return (partiallyCorrectAnnotations == null) ? new HashSet() : partiallyCorrectAnnotations;
727 case SPURIOUS_TYPE:
728 return (spuriousAnnotations == null) ? new HashSet() : spuriousAnnotations;
729 case MISSING_TYPE:
730 return (missingAnnotations == null) ? new HashSet() : missingAnnotations;
731 default:
732 return new HashSet();
733 }
734 }
735
736
737 public HashSet correctAnnotations, partiallyCorrectAnnotations,
738 missingAnnotations, spuriousAnnotations;
739
740
741
742 public static final int CORRECT_TYPE = 1;
743
744
749 public static final int PARTIALLY_CORRECT_TYPE = 2;
750
751
754 public static final int SPURIOUS_TYPE = 3;
755
756
759 public static final int MISSING_TYPE = 4;
760
761
764 public static final int CORRECT = 2;
765
766
769 public static final int PARTIALLY_CORRECT = 1;
770
771
774 public static final int WRONG = 0;
775
776
779 private java.util.Set significantFeaturesSet;
780
781
784 protected int correctMatches;
785
786
789 protected int partiallyCorrectMatches;
790
791
794 protected int missing;
795
796
799 protected int spurious;
800
801
804 protected List keyList;
805
806
809 protected List responseList;
810
811
814 protected List keyChoices;
815
816
819 protected List responseChoices;
820
821
824 protected List possibleChoices;
825
826
829 protected List finalChoices;
830
831 }
832