1   /*
2    *  DatabaseCorpusImpl.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 05/Nov/2001
12   *
13   *  $Id: DatabaseCorpusImpl.java,v 1.20 2005/01/11 13:51:31 ian Exp $
14   */
15  
16  package gate.corpora;
17  
18  import java.util.*;
19  
20  import junit.framework.Assert;
21  
22  import gate.*;
23  import gate.creole.ResourceInstantiationException;
24  import gate.event.*;
25  import gate.persist.*;
26  import gate.security.SecurityInfo;
27  import gate.util.*;
28  
29  
30  public class DatabaseCorpusImpl extends CorpusImpl
31                                  implements DatastoreListener,
32                                             EventAwareCorpus {
33  
34    /** Debug flag */
35    private static final boolean DEBUG = false;
36  
37    private boolean featuresChanged;
38    private boolean nameChanged;
39    /**
40     * The listener for the events coming from the features.
41     */
42    protected EventsHandler eventHandler;
43    protected List documentData;
44    protected List removedDocuments;
45    protected List  addedDocuments;
46  
47    public DatabaseCorpusImpl() {
48      super();
49    }
50  
51  
52    public DatabaseCorpusImpl(String _name,
53                              DatabaseDataStore _ds,
54                              Long _persistenceID,
55                              FeatureMap _features,
56                              Vector _dbDocs) {
57  
58      super();
59  
60      this.name = _name;
61      this.dataStore = _ds;
62      this.lrPersistentId = _persistenceID;
63      this.features = _features;
64      this.documentData =  _dbDocs;
65      this.supportList = new ArrayList(this.documentData.size());
66      this.removedDocuments = new ArrayList();
67      //just allocate space for this one, don't initialize it -
68      //invokations of add() will add elements to it
69      this.addedDocuments = new ArrayList();
70  
71      //init the document list
72      for (int i=0; i< this.documentData.size(); i++) {
73        this.supportList.add(null);
74      }
75  
76      this.featuresChanged = false;
77      this.nameChanged = false;
78  
79      //3. add the listeners for the features
80      if (eventHandler == null)
81        eventHandler = new EventsHandler();
82      this.features.addFeatureMapListener(eventHandler);
83  
84  
85      //4. add self as listener for the data store, so that we'll know when the DS is
86      //synced and we'll clear the isXXXChanged flags
87      this.dataStore.addDatastoreListener(this);
88    }
89  
90  
91    public boolean add(Object o){
92  
93      Assert.assertNotNull(o);
94      boolean result = false;
95  
96      //accept only documents
97      if (false == o instanceof Document) {
98        throw new IllegalArgumentException();
99      }
100 
101     Document doc = (Document)o;
102 
103     //assert docs are either transient or from the same datastore
104     if (isValidForAdoption(doc)) {
105       result = super.add(doc);
106     }
107     else {
108       return false;
109     }
110 
111     //add to doc data too
112 /* Was:
113     DocumentData newDocData = new DocumentData(doc.getName(),null);
114 */
115     DocumentData newDocData = new DocumentData(doc.getName(),
116                                                doc.getLRPersistenceId());
117 
118     this.documentData.add(newDocData);
119 
120     //add the LRID to the set of newly added documents so that upon sync() a reference
121     // from the doc to the corpus will be added in the database
122     if (null != doc.getLRPersistenceId()) {
123       this.addedDocuments.add(doc.getLRPersistenceId());
124 //Out.prln("adding [" + doc.getLRPersistenceId() + "] to NewlyAddedDocs...");
125     }
126 
127     if (result) {
128       fireDocumentAdded(new CorpusEvent(this,
129                                         doc,
130                                         this.supportList.size()-1,
131                                         CorpusEvent.DOCUMENT_ADDED));
132     }
133 
134     return result;
135   }
136 
137 
138   public void add(int index, Object element){
139 
140     Assert.assertNotNull(element);
141     Assert.assertTrue(index >= 0);
142 
143     long    collInitialSize = this.supportList.size();
144 
145     //accept only documents
146     if (false == element instanceof Document) {
147       throw new IllegalArgumentException();
148     }
149 
150     Document doc = (Document)element;
151 
152     //assert docs are either transient or from the same datastore
153     if (isValidForAdoption(doc)) {
154       super.add(index,doc);
155 
156       //add to doc data too
157       DocumentData newDocData = new DocumentData(doc.getName(),null);
158       this.documentData.add(index,newDocData);
159 
160       //add the LRID to the set of newly added documents so that upon sync() a reference
161       // from the doc to the corpus will be added in the database
162       if (null != doc.getLRPersistenceId()) {
163 
164       this.addedDocuments.add(doc.getLRPersistenceId());
165 //Out.prln("adding ["+doc.getLRPersistenceId()+"] to NewlyAddedDocs...");
166       }
167 
168       //if added then fire event
169       if (this.supportList.size() > collInitialSize) {
170         fireDocumentAdded(new CorpusEvent(this,
171                                           doc,
172                                           index,
173                                           CorpusEvent.DOCUMENT_ADDED));
174       }
175     }
176   }
177 
178 
179 
180   public boolean addAll(Collection c){
181 
182     boolean collectionChanged = false;
183 
184     Iterator it = c.iterator();
185     while (it.hasNext()) {
186       Document doc = (Document)it.next();
187       if (isValidForAdoption(doc)) {
188         collectionChanged |= add(doc);
189       }
190     }
191 
192     return collectionChanged;
193   }
194 
195 
196   public boolean addAll(int index, Collection c){
197 
198     Assert.assertTrue(index >=0);
199 
200     //funny enough add(index,element) returns void and not boolean
201     //so we can't use it
202     boolean collectionChanged = false;
203     int collInitialSize = this.supportList.size();
204     int currIndex = index;
205 
206     Iterator it = c.iterator();
207     while (it.hasNext()) {
208       Document doc = (Document)it.next();
209       if (isValidForAdoption(doc)) {
210         add(currIndex++,doc);
211       }
212     }
213 
214     return (this.supportList.size() > collInitialSize);
215   }
216 
217 
218   private boolean isValidForAdoption(LanguageResource lr) {
219 
220     Long lrID = (Long)lr.getLRPersistenceId();
221 
222     if (null == lrID ||
223         (this.getDataStore() != null && lr.getDataStore().equals(this.getDataStore()))) {
224       return true;
225     }
226     else {
227       return false;
228     }
229   }
230 
231   public void resourceAdopted(DatastoreEvent evt){
232   }
233 
234   public void resourceDeleted(DatastoreEvent evt){
235 
236     Assert.assertNotNull(evt);
237     Long  deletedID = (Long)evt.getResourceID();
238     Assert.assertNotNull(deletedID);
239 
240     //unregister self as listener from the DataStore
241     if (deletedID.equals(this.getLRPersistenceId())) {
242       //someone deleted this corpus
243       this.supportList.clear();
244       getDataStore().removeDatastoreListener(this);
245     }
246 
247     //check if the ID is of a document the corpus contains
248     Iterator it = this.supportList.iterator();
249     while (it.hasNext()) {
250       Document doc = (Document)it.next();
251       if (doc.getLRPersistenceId().equals(deletedID)) {
252         this.supportList.remove(doc);
253         break;
254       }
255     }
256   }
257 
258   public void resourceWritten(DatastoreEvent evt){
259     Assert.assertNotNull(evt);
260     Assert.assertNotNull(evt.getResourceID());
261 
262     //is the event for us?
263     if (evt.getResourceID().equals(this.getLRPersistenceId())) {
264       //wow, the event is for me
265       //clear all flags, the content is synced with the DB
266       this.featuresChanged =
267         this.nameChanged = false;
268 
269       this.removedDocuments.clear();
270       this.addedDocuments.clear();
271     }
272   }
273 
274 
275   public void resourceUnloaded(CreoleEvent e) {
276 
277     Assert.assertNotNull(e);
278     Assert.assertNotNull(e.getResource());
279 
280     Resource res = e.getResource();
281 
282     if (res instanceof Document) {
283 
284       Document doc = (Document) res;
285 
286       if (DEBUG) {
287         Out.prln("resource Unloaded called ");
288       }
289 
290       //remove from the corpus too, if a transient one
291       if (null == doc.getLRPersistenceId()) {
292         //@FIXME - not sure we need this
293         super.remove(doc);
294       }
295       else {
296         //unload all occurences
297         //see if we can find it first. If not, then judt return
298         int index = findDocument(doc);
299         if (index == -1) {
300           //not our document
301           return;
302         }
303         else {
304           //3. unload from internal data structures
305 
306           //@FIXME - not sure we need this
307           //super.remove(doc);
308 
309           //remove from the list of loaded documents
310 //System.out.println("resourceUnloaded called -- removing doc[" + index +"] from supportList...");
311           //WAS: Document oldDoc = (Document)this.supportList.remove(index);
312           this.supportList.set(index, null);
313 
314           if (DEBUG)
315             Out.prln("corpus: document " + index + " unloaded and set to null");
316         } //else
317       } //else
318     } //if
319   }
320 
321 
322   public boolean isResourceChanged(int changeType) {
323 
324     switch(changeType) {
325 
326       case EventAwareLanguageResource.RES_FEATURES:
327         return this.featuresChanged;
328       case EventAwareLanguageResource.RES_NAME:
329         return this.nameChanged;
330       default:
331         throw new IllegalArgumentException();
332     }
333   }
334 
335   /**
336    * Returns true of an LR has been modified since the last sync.
337    * Always returns false for transient LRs.
338    */
339   public boolean isModified() {
340     return this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) ||
341             this.isResourceChanged(EventAwareLanguageResource.RES_NAME);
342   }
343 
344 
345 
346   /** Sets the name of this resource*/
347   public void setName(String name){
348     super.setName(name);
349 
350     this.nameChanged = true;
351   }
352 
353 
354   /** Set the feature set */
355   public void setFeatures(FeatureMap features) {
356     //1. save them first, so we can remove the listener
357     FeatureMap oldFeatures = this.features;
358 
359     super.setFeatures(features);
360 
361     this.featuresChanged = true;
362 
363     //4. sort out the listeners
364     if (eventHandler != null)
365       oldFeatures.removeFeatureMapListener(eventHandler);
366     else
367       eventHandler = new EventsHandler();
368     this.features.addFeatureMapListener(eventHandler);
369   }
370 
371 
372   /**
373    * All the events from the features are handled by
374    * this inner class.
375    */
376   class EventsHandler implements gate.event.FeatureMapListener {
377     public void featureMapUpdated(){
378       //tell the document that its features have been updated
379       featuresChanged = true;
380     }
381   }
382 
383   /**
384    * Overriden to remove the features listener, when the document is closed.
385    */
386   public void cleanup() {
387     super.cleanup();
388     if (eventHandler != null)
389       this.features.removeFeatureMapListener(eventHandler);
390   }///inner class EventsHandler
391 
392 
393 
394   public void setInitData__$$__(Object data) {
395 
396     HashMap initData = (HashMap)data;
397 
398     this.name = (String)initData.get("CORP_NAME");
399     this.dataStore = (DatabaseDataStore)initData.get("DS");
400     this.lrPersistentId = (Long)initData.get("LR_ID");
401     this.features = (FeatureMap)initData.get("CORP_FEATURES");
402     this.supportList = new ArrayList((List)initData.get("CORP_SUPPORT_LIST"));
403 
404     this.documentData = new ArrayList(this.supportList.size());
405     this.removedDocuments = new ArrayList();
406     this.addedDocuments = new ArrayList();
407 
408     //init the documentData list
409     for (int i=0; i< this.supportList.size(); i++) {
410       Document dbDoc = (Document)this.supportList.get(i);
411       DocumentData dd = new DocumentData(dbDoc.getName(),dbDoc.getLRPersistenceId());
412       this.documentData.add(dd);
413     }
414 
415     this.featuresChanged = false;
416     this.nameChanged = false;
417 
418      //3. add the listeners for the features
419     if (eventHandler == null)
420       eventHandler = new EventsHandler();
421     this.features.addFeatureMapListener(eventHandler);
422 
423 
424     //4. add self as listener for the data store, so that we'll know when the DS is
425     //synced and we'll clear the isXXXChanged flags
426     this.dataStore.addDatastoreListener(this);
427   }
428 
429   public Object getInitData__$$__(Object initData) {
430     return null;
431   }
432 
433   /**
434    * Gets the names of the documents in this corpus.
435    * @return a {@link List} of Strings representing the names of the documents
436    * in this corpus.
437    */
438   public List getDocumentNames(){
439 
440     List docsNames = new ArrayList();
441 
442     if(this.documentData == null)
443       return docsNames;
444 
445     Iterator iter = this.documentData.iterator();
446     while (iter.hasNext()) {
447       DocumentData data = (DocumentData)iter.next();
448       docsNames.add(data.getDocumentName());
449     }
450 
451     return docsNames;
452   }
453 
454 
455   /**
456    * Gets the name of a document in this corpus.
457    * @param index the index of the document
458    * @return a String value representing the name of the document at
459    * <tt>index</tt> in this corpus.<P>
460    */
461   public String getDocumentName(int index){
462 
463     if (index >= this.documentData.size()) return "No such document";
464 
465     return ((DocumentData)this.documentData.get(index)).getDocumentName();
466   }
467 
468   /**
469    * returns a document in the coprus by index
470    * @param index the index of the document
471    * @return an Object value representing DatabaseDocumentImpl
472    */
473   public Object get(int index){
474 
475     //0. preconditions
476     Assert.assertTrue(index >= 0);
477     Assert.assertTrue(index < this.documentData.size());
478     Assert.assertTrue(index < this.supportList.size());
479     Assert.assertTrue(this.documentData.size() == this.supportList.size());
480 
481     if (index >= this.documentData.size())
482       return null;
483 
484     Object res = this.supportList.get(index);
485 
486     //if the document is null, then I must get it from the database
487     if (null == res) {
488       Long currLRID = (Long)((DocumentData)this.documentData.get(index)).getPersistentID();
489       FeatureMap params = Factory.newFeatureMap();
490       params.put(DataStore.DATASTORE_FEATURE_NAME, this.getDataStore());
491       params.put(DataStore.LR_ID_FEATURE_NAME, currLRID);
492 
493       try {
494         Document dbDoc = (Document)Factory.createResource(DBHelper.DOCUMENT_CLASS, params);
495 
496         if (DEBUG) {
497           Out.prln("Loaded document :" + dbDoc.getName());
498         }
499 
500         //change the result to the newly loaded doc
501         res = dbDoc;
502 
503         //finally replace the doc with the instantiated version
504         Assert.assertNull(this.supportList.get(index));
505         this.supportList.set(index, dbDoc);
506       }
507       catch (ResourceInstantiationException ex) {
508         Err.prln("Error reading document inside a serialised corpus.");
509         throw new GateRuntimeException(ex.getMessage());
510       }
511     }
512 
513     return res;
514   }
515 
516   public Object remove(int index){
517 
518     //1. get the persistent id and add it to the removed list
519     DocumentData docData = (DocumentData)this.documentData.get(index);
520     Long removedID = (Long)docData.getPersistentID();
521 //    Assert.assertTrue(null != removedID);
522     //removedID may be NULL if the doc is still transient
523 
524     //2. add to the list of removed documents but only if it's not newly added
525     //othewrwise just ignore
526     if (null != removedID && false == this.addedDocuments.contains(removedID)) {
527       this.removedDocuments.add(removedID);
528 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
529     }
530 
531     //3. delete
532     this.documentData.remove(index);
533     Document res = (Document)this.supportList.remove(index);
534 
535     //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
536     // from the doc to the corpus will NOT be added in the database
537     if (this.addedDocuments.contains(removedID)) {
538       this.addedDocuments.remove(removedID);
539 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
540     }
541 
542     //5, fire events
543     fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
544                                         res,
545                                         index,
546                                         CorpusEvent.DOCUMENT_REMOVED));
547     return res;
548 
549   }
550 
551 
552   public boolean remove(Object obj){
553 
554     //0. preconditions
555     Assert.assertNotNull(obj);
556     Assert.assertTrue(obj instanceof DatabaseDocumentImpl);
557 
558     if (false == obj instanceof Document) {
559       return false;
560     }
561 
562     Document doc = (Document) obj;
563 
564     //see if we can find it first. If not, then judt return
565     int index = findDocument(doc);
566     if (index == -1) {
567       return false;
568     }
569 
570     if(index < this.documentData.size()) {
571       //we found it, so remove it
572 
573       //1. get the persistent id and add it to the removed list
574       DocumentData docData = (DocumentData)this.documentData.get(index);
575       Long removedID = (Long)docData.getPersistentID();
576       //Assert.assertTrue(null != removedID);
577       //removed ID may be null - doc is still transient
578 
579       //2. add to the list of removed documents
580       if (null != removedID && false == this.addedDocuments.contains(removedID)) {
581         this.removedDocuments.add(removedID);
582 //Out.prln("adding ["+removedID+"] to RemovedDocs...");
583       }
584 
585       //3. delete
586       this.documentData.remove(index);
587       Document oldDoc = (Document) this.supportList.remove(index);
588 
589       //4. remove the LRID to the set of newly added documents (if there) so that upon sync() a reference
590       // from the doc to the corpus will NOT be added in the database
591       if (this.addedDocuments.contains(removedID)) {
592         this.addedDocuments.remove(removedID);
593 //Out.prln("removing ["+removedID+"] from NewlyAddedDocs...");
594       }
595 
596       //5. fire events
597       fireDocumentRemoved(new CorpusEvent(DatabaseCorpusImpl.this,
598                                           oldDoc,
599                                           index,
600                                           CorpusEvent.DOCUMENT_REMOVED));
601     }
602 
603     return true;
604   }
605 
606 
607   public int findDocument(Document doc) {
608 
609     boolean found = false;
610     DocumentData docData = null;
611 
612     //first try finding the document in memory
613     int index = this.supportList.indexOf(doc);
614 
615     if (index > -1 && index < this.documentData.size()) {
616       return index;
617     }
618 
619     //else try finding a document with the same name and persistent ID
620     Iterator iter = this.documentData.iterator();
621 
622     for (index = 0;  iter.hasNext(); index++) {
623       docData = (DocumentData) iter.next();
624       if (docData.getDocumentName().equals(doc.getName()) &&
625           docData.getPersistentID().equals(doc.getLRPersistenceId())) {
626         found = true;
627         break;
628       }
629     }
630 
631     if (found && index < this.documentData.size()) {
632       return index;
633     }
634     else {
635       return -1;
636     }
637   }//findDocument
638 
639 
640   public boolean contains(Object o){
641     //return true if:
642     // - the document data list contains a document with such a name
643     //   and persistent id
644 
645     if(false == o instanceof Document)
646       return false;
647 
648     int index = findDocument((Document) o);
649 
650     if (index < 0) {
651       return false;
652     }
653     else {
654       return true;
655     }
656   }
657 
658   public Iterator iterator(){
659     return new DatabaseCorpusIterator(this.documentData);
660   }
661 
662   public List getLoadedDocuments() {
663     return new ArrayList(this.supportList);
664   }
665 
666   public List getRemovedDocuments() {
667     return new ArrayList(this.removedDocuments);
668   }
669 
670   public List getAddedDocuments() {
671     return new ArrayList(this.addedDocuments);
672   }
673 
674   private class DatabaseCorpusIterator implements Iterator {
675 
676       private Iterator docDataIter;
677       private List docDataList;
678 
679       public DatabaseCorpusIterator(List docDataList) {
680         this.docDataList = docDataList;
681         this.docDataIter = this.docDataList.iterator();
682       }
683 
684       public boolean hasNext() {
685         return docDataIter.hasNext();
686       }
687 
688       public Object next(){
689 
690         //try finding a document with the same name and persistent ID
691         DocumentData docData = (DocumentData)docDataIter.next();
692         int index = this.docDataList.indexOf(docData);
693         return DatabaseCorpusImpl.this.get(index);
694       }
695 
696       public void remove() {
697         throw new UnsupportedOperationException("DatabaseCorpusImpl does not " +
698                     "support remove in the iterators");
699       }
700   }
701 
702 
703   /**
704    * Unloads the document from memory, but calls sync() first, to store the
705    * changes
706    */
707   public void unloadDocument(int index) {
708 
709     //preconditions
710     Assert.assertTrue(index >= 0);
711 
712     //1. check whether its been loaded and is a persistent one
713     // if a persistent doc is not loaded, there's nothing we need to do
714     if ( (! isDocumentLoaded(index)) && isPersistentDocument(index)) {
715       return;
716     }
717 
718     //2. sync the document before releasing it from memory, because the
719     //creole register garbage collects all LRs which are not used any more
720     Document doc = (Document)this.supportList.get(index);
721     Assert.assertNotNull(doc);
722 
723     try {
724 
725       //if the document is not already adopted, we need to do that first
726       if (doc.getLRPersistenceId() == null) {
727 
728         //3.2 get the security info for the corpus
729         SecurityInfo si = this.getDataStore().getSecurityInfo(this);
730         Document dbDoc = (Document) this.getDataStore().adopt(doc, si);
731       }
732       else {
733         //if it is adopted, just sync it
734         this.getDataStore().sync(doc);
735       }
736 
737       //3. remove the document from the memory
738       //do this, only if the saving has succeeded
739       // WAS this.supportList.remove(index);
740       this.supportList.set(index,null);
741     }
742     catch (PersistenceException pex) {
743       throw new GateRuntimeException("Error unloading document from corpus"
744                       + "because document sync failed: " + pex.getMessage());
745     }
746     catch (gate.security.SecurityException sex) {
747       throw new GateRuntimeException("Error unloading document from corpus"
748                       + "because of document access error: " + sex.getMessage());
749     }
750 
751   }
752 
753   /**
754    * Unloads a document from memory
755    */
756   public void unloadDocument(Document doc) {
757 
758     Assert.assertNotNull(doc);
759 
760     //1. determine the index of the document; if not there, do nothing
761     int index = findDocument(doc);
762 
763     if (index == -1) {
764       return;
765     }
766 
767     unloadDocument(index);
768   }
769 
770 
771   /**
772    * This method returns true when the document is already loaded in memory
773    */
774   public boolean isDocumentLoaded(int index) {
775 
776     //preconditions
777     Assert.assertTrue(index >= 0);
778 
779     if (this.supportList == null || this.supportList.isEmpty()) {
780       return false;
781     }
782 
783     return this.supportList.get(index) != null;
784   }
785 
786   /**
787    * This method returns true when the document is already stored on disk
788    * i.e., is not transient
789    */
790   public boolean isPersistentDocument(int index) {
791 
792     //preconditions
793     Assert.assertTrue(index >= 0);
794 
795     if (this.supportList == null || this.supportList.isEmpty()) {
796       return false;
797     }
798 
799     return (((DocumentData)this.documentData.get(index)).getPersistentID() != null);
800   }
801 
802 
803   public boolean equals(Object o){
804 
805     if (! (o instanceof DatabaseCorpusImpl))
806       return false;
807 
808     DatabaseCorpusImpl dbCorp = (DatabaseCorpusImpl)o;
809 
810     if (this.getDataStore() != null && this.getDataStore() != dbCorp.getDataStore()) {
811       return false;
812     }
813 
814     if (this.getLRPersistenceId() != null && this.getLRPersistenceId() != dbCorp.getLRPersistenceId()) {
815       return false;
816     }
817 
818     return supportList.equals(o);
819   }
820 
821 
822 }