1   /*
2    *  DatabaseDocumentImpl.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 16/Oct/2001
12   *
13   *  $Id: DatabaseDocumentImpl.java,v 1.66 2005/01/11 13:51:31 ian Exp $
14   */
15  
16  package gate.corpora;
17  
18  
19  import java.io.IOException;
20  import java.net.URL;
21  import java.sql.*;
22  import java.util.*;
23  
24  import junit.framework.Assert;
25  import oracle.jdbc.driver.OraclePreparedStatement;
26  
27  import gate.*;
28  import gate.annotation.AnnotationSetImpl;
29  import gate.annotation.DatabaseAnnotationSetImpl;
30  import gate.creole.ResourceInstantiationException;
31  import gate.event.*;
32  import gate.persist.*;
33  import gate.util.*;
34  
35  public class DatabaseDocumentImpl extends DocumentImpl
36                                    implements  //DatastoreListener,
37                                                //Document,
38                                                EventAwareDocument {
39  
40    private static final boolean DEBUG = false;
41  
42    private boolean     isContentRead;
43    private Object      contentLock;
44    private Connection  jdbcConn;
45    private String      jdbcSchema;
46    protected int       dbType;
47  
48    private boolean     contentChanged;
49    private boolean     featuresChanged;
50    private boolean     nameChanged;
51    private boolean     documentChanged;
52  
53    private Collection  removedAnotationSets;
54    private Collection  addedAnotationSets;
55  
56    private Document    parentDocument;
57    private int         maxAnnotationId;
58  
59    /**
60     * The listener for the events coming from the features.
61     */
62    protected EventsHandler eventHandler;
63  
64  
65    public DatabaseDocumentImpl() {
66  
67      //super();
68      contentLock = new Object();
69  
70      this.namedAnnotSets = new HashMap();
71  //    this.defaultAnnots = new DatabaseAnnotationSetImpl(this);
72  
73      this.isContentRead = false;
74  
75      this.contentChanged = false;
76      this.featuresChanged = false;
77      this.nameChanged = false;
78      this.documentChanged = false;
79  
80      this.removedAnotationSets = new Vector();
81      this.addedAnotationSets = new Vector();
82  
83      parentDocument = null;
84    }
85  
86    private void setDatabaseInfo(Connection conn)
87      throws PersistenceException {
88  
89      String url = null;
90  
91      try {
92        url = conn.getMetaData().getURL();
93      }
94      catch(SQLException sqle) {
95        throw new PersistenceException("cannot get jdbc metadata: ["+sqle.getMessage()+"]");
96      }
97  
98      this.jdbcSchema = DBHelper.getSchemaPrefix(url);
99      this.dbType = DBHelper.getDatabaseType(url);
100     Assert.assertNotNull(this.jdbcSchema);
101     Assert.assertTrue(this.dbType == DBHelper.ORACLE_DB ||
102                       this.dbType == DBHelper.POSTGRES_DB);
103 
104   }
105 
106 
107   public DatabaseDocumentImpl(Connection conn)
108     throws PersistenceException {
109 
110     //super();
111     contentLock = new Object();
112 
113     this.namedAnnotSets = new HashMap();
114 //    this.defaultAnnots = new DatabaseAnnotationSetImpl(this);
115 
116     this.isContentRead = false;
117     this.jdbcConn = conn;
118     setDatabaseInfo(this.jdbcConn);
119 
120     this.contentChanged = false;
121     this.featuresChanged = false;
122     this.nameChanged = false;
123     this.documentChanged = false;
124 
125     this.removedAnotationSets = new Vector();
126     this.addedAnotationSets = new Vector();
127 
128     parentDocument = null;
129   }
130 
131 
132 /*  public DatabaseDocumentImpl(Connection _conn,
133                               String _name,
134                               DatabaseDataStore _ds,
135                               Long _persistenceID,
136                               DocumentContent _content,
137                               FeatureMap _features,
138                               Boolean _isMarkupAware,
139                               URL _sourceURL,
140                               Long _urlStartOffset,
141                               Long _urlEndOffset,
142                               AnnotationSet _default,
143                               Map _named) {
144 
145     //this.jdbcConn =  _conn;
146     this(_conn);
147 
148     this.name = _name;
149     this.dataStore = _ds;
150     this.lrPersistentId = _persistenceID;
151     this.content = _content;
152     this.isContentRead = true;
153     this.features = _features;
154     this.markupAware = _isMarkupAware;
155     this.sourceUrl = _sourceURL;
156     this.sourceUrlStartOffset = _urlStartOffset;
157     this.sourceUrlEndOffset = _urlEndOffset;
158 
159     //annotations
160     //1. default
161     _setAnnotations(null,_default);
162 
163     //2. named (if any)
164     if (null != _named) {
165       Iterator itNamed = _named.values().iterator();
166       while (itNamed.hasNext()){
167         AnnotationSet currSet = (AnnotationSet)itNamed.next();
168         //add them all to the DBAnnotationSet
169         _setAnnotations(currSet.getName(),currSet);
170       }
171     }
172 
173     //3. add the listeners for the features
174     if (eventHandler == null)
175       eventHandler = new EventsHandler();
176     this.features.addFeatureMapListener(eventHandler);
177 
178     //4. add self as listener for the data store, so that we'll know when the DS is
179     //synced and we'll clear the isXXXChanged flags
180     this.dataStore.addDatastoreListener(this);
181   }
182 */
183 
184   /** The content of the document: a String for text; MPEG for video; etc. */
185   public DocumentContent getContent() {
186 
187     //1. if this is a child document then return the content of the parent resource
188     if (null != this.parentDocument) {
189       return this.parentDocument.getContent();
190     }
191     else {
192       //2. assert that no one is reading from DB now
193       synchronized(this.contentLock) {
194         if (false == this.isContentRead) {
195           _readContent();
196           this.isContentRead = true;
197         }
198       }
199 
200       //return content
201       return super.getContent();
202     }
203   }
204 
205   private void _readContent() {
206 
207     //preconditions
208     if (null == getLRPersistenceId()) {
209       throw new GateRuntimeException("can't construct a DatabaseDocument - not associated " +
210                                     " with any data store");
211     }
212 
213     if (false == getLRPersistenceId() instanceof Long) {
214       throw new GateRuntimeException("can't construct a DatabaseDocument -  " +
215                                       " invalid persistence ID");
216     }
217 
218     Long lrID = (Long)getLRPersistenceId();
219     //0. preconditions
220     Assert.assertNotNull(lrID);
221     Assert.assertTrue(false == this.isContentRead);
222     Assert.assertNotNull(this.content);
223 
224     //1. read from DB
225     PreparedStatement pstmt = null;
226     ResultSet rs = null;
227 
228     try {
229 
230       String sql = " select v1.enc_name, " +
231                    "        v1.dc_character_content, " +
232                    "        v1.dc_binary_content, " +
233                    "        v1.dc_content_type " +
234                    " from  "+this.jdbcSchema+"v_content v1 " +
235                    " where  v1.lr_id = ? ";
236       pstmt = this.jdbcConn.prepareStatement(sql);
237       pstmt.setLong(1,lrID.longValue());
238       pstmt.execute();
239       rs = pstmt.getResultSet();
240 
241       if (false == rs.next()) {
242         throw new SynchronisationException("empty reault set");
243       }
244 
245       if (this.dbType == DBHelper.ORACLE_DB) {
246 
247         String encoding = rs.getString("enc_name");
248         if (encoding.equals(DBHelper.DUMMY_ENCODING)) {
249           //no encoding was specified for this document
250           encoding = "";
251         }
252         Clob   clb = rs.getClob("dc_character_content");
253         Blob   blb = rs.getBlob("dc_binary_content");
254         long   contentType = rs.getLong("dc_content_type");
255 
256         //binary documents are not supported yet
257         Assert.assertTrue(DBHelper.CHARACTER_CONTENT == contentType ||
258                           DBHelper.EMPTY_CONTENT == contentType);
259 
260         StringBuffer buff = new StringBuffer();
261         OracleDataStore.readCLOB(clb,buff);
262 
263         //2. set data members that were not previously initialized
264         this.encoding = encoding;
265 
266         //be aware than document content may be empty
267         if (null != buff) {
268           this.content = new DocumentContentImpl(buff.toString());
269         }
270         else {
271           this.content = new DocumentContentImpl();
272         }
273 
274       }
275 
276       else if (this.dbType == DBHelper.POSTGRES_DB) {
277 
278         String encoding = rs.getString("enc_name");
279         if (encoding.equals(DBHelper.DUMMY_ENCODING)) {
280           //no encoding was specified for this document
281           encoding = "";
282         }
283 
284         String content = rs.getString("dc_character_content");
285         long   contentType = rs.getLong("dc_content_type");
286 
287         //binary documents are not supported yet
288         Assert.assertTrue(DBHelper.CHARACTER_CONTENT == contentType ||
289                           DBHelper.EMPTY_CONTENT == contentType);
290 
291         //2. set data members that were not previously initialized
292 
293         this.encoding = encoding;
294 
295         //be aware than document content may be empty
296         if (null != content) {
297           this.content = new DocumentContentImpl(content);
298         }
299         else {
300           this.content = new DocumentContentImpl();
301         }
302       }
303       else {
304         Assert.fail();
305       }
306     }
307     catch(SQLException sqle) {
308       throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
309     }
310     catch(IOException ioe) {
311       throw new SynchronisationException(ioe);
312     }
313     finally {
314       try {
315         DBHelper.cleanup(rs);
316         DBHelper.cleanup(pstmt);
317       }
318       catch(PersistenceException pe) {
319         throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
320       }
321     }
322   }
323 
324 
325   /** Get the encoding of the document content source */
326   public String getEncoding() {
327 
328     //1. assert that no one is reading from DB now
329     synchronized(this.contentLock) {
330       if (false == this.isContentRead) {
331         _readContent();
332 
333         this.isContentRead = true;
334       }
335     }
336 
337     return super.getEncoding();
338   }
339 
340   /** Returns a map with the named annotation sets. It returns <code>null</code>
341    *  if no named annotaton set exists. */
342   public Map getNamedAnnotationSets() {
343 
344     Vector annNames = new Vector();
345 
346     PreparedStatement pstmt = null;
347     ResultSet rs = null;
348 
349     //1. get the names of all sets
350     try {
351       String sql = " select as_name " +
352                    " from  "+this.jdbcSchema+"v_annotation_set " +
353                    " where  lr_id = ? " +
354                    "  and as_name is not null";
355 
356       pstmt = this.jdbcConn.prepareStatement(sql);
357       pstmt.setLong(1,((Long)this.lrPersistentId).longValue());
358       pstmt.execute();
359       rs = pstmt.getResultSet();
360 
361       while (rs.next()) {
362         annNames.add(rs.getString("as_name"));
363       }
364     }
365     catch(SQLException sqle) {
366       throw new SynchronisationException("can't get named annotatios: ["+ sqle.getMessage()+"]");
367     }
368     finally {
369       try {
370         DBHelper.cleanup(rs);
371         DBHelper.cleanup(pstmt);
372       }
373       catch(PersistenceException pe) {
374         throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
375       }
376     }
377 
378     //2. read annotations
379     for (int i=0; i< annNames.size(); i++) {
380       //delegate because of the data is already read getAnnotations() will just return
381       getAnnotations((String)annNames.elementAt(i));
382     }
383 
384     //3. delegate to the parent method
385     return super.getNamedAnnotationSets();
386 
387   } // getNamedAnnotationSets
388 
389 
390   /** Get the default set of annotations. The set is created if it
391     * doesn't exist yet.
392     */
393   public AnnotationSet getAnnotations() {
394 
395     //1. read from DB
396     _getAnnotations(null);
397 
398     //2. is there such set in the DB?
399     if (null == this.defaultAnnots) {
400       //create a DatabaseAnnotationSetImpl
401       //NOTE: we create the set and then delegate to the super mehtod, otherwise
402       //the super mehtod will create AnnotationSetImpl instead of DatabaseAnnotationSetImpl
403       //which will not work with DatabaseDocumentImpl
404       AnnotationSet aset = new DatabaseAnnotationSetImpl(this);
405 
406       //set internal member
407       this.defaultAnnots = aset;
408 
409       //3. fire events
410       fireAnnotationSetAdded(new DocumentEvent(this,
411                                                 DocumentEvent.ANNOTATION_SET_ADDED,
412                                                 null));
413     }
414 
415     //4. delegate
416     return super.getAnnotations();
417   } // getAnnotations()
418 
419 
420   /** Get a named set of annotations. Creates a new set if one with this
421     * name doesn't exist yet.
422     * If the provided name is null then it returns the default annotation set.
423     */
424   public AnnotationSet getAnnotations(String name) {
425 
426     //0. preconditions
427     Assert.assertNotNull(name);
428 
429     //1. read from DB if the set is there at all
430     _getAnnotations(name);
431 
432     //2. is there such set in the DB?
433     if (false == this.namedAnnotSets.keySet().contains(name)) {
434       //create a DatabaseAnnotationSetImpl
435       //NOTE: we create the set and then delegate to the super mehtod, otherwise
436       //the super mehtod will create AnnotationSetImpl instead of DatabaseAnnotationSetImpl
437       //which will not work with DatabaseDocumentImpl
438       AnnotationSet aset = new DatabaseAnnotationSetImpl(this,name);
439 
440       //add to internal collection
441       this.namedAnnotSets.put(name,aset);
442 
443       //add the set name to the list with the recently created sets
444       this.addedAnotationSets.add(name);
445 
446       //3. fire events
447       DocumentEvent evt = new DocumentEvent(this, DocumentEvent.ANNOTATION_SET_ADDED, name);
448       fireAnnotationSetAdded(evt);
449     }
450 
451     //3. delegate
452     return super.getAnnotations(name);
453   }
454 
455 
456   private void _getAnnotations(String name) {
457 
458     AnnotationSet as = null;
459 
460     //preconditions
461     if (null == getLRPersistenceId()) {
462       throw new GateRuntimeException("can't construct a DatabaseDocument - not associated " +
463                                     " with any data store");
464     }
465 
466     if (false == getLRPersistenceId() instanceof Long) {
467       throw new GateRuntimeException("can't construct a DatabaseDocument -  " +
468                                       " invalid persistence ID");
469     }
470 
471     //have we already read this set?
472 
473     if (null == name) {
474       //default set
475       if (this.defaultAnnots != null) {
476         //the default set is alredy read - do nothing
477         //super methods will take care
478         return;
479       }
480     }
481     else {
482       //named set
483       if (this.namedAnnotSets.containsKey(name)) {
484         //we've already read it - do nothing
485         //super methods will take care
486         return;
487       }
488     }
489 
490     Long lrID = (Long)getLRPersistenceId();
491     Long asetID = null;
492     //0. preconditions
493     Assert.assertNotNull(lrID);
494 
495     //1. read a-set info
496     PreparedStatement pstmt = null;
497     ResultSet rs = null;
498     try {
499       String sql = " select as_id " +
500                    " from  "+this.jdbcSchema+"v_annotation_set " +
501                    " where  lr_id = ? ";
502       //do we have aset name?
503       String clause = null;
504       if (null != name) {
505         clause =   "        and as_name = ? ";
506       }
507       else {
508         clause =   "        and as_name is null ";
509       }
510       sql = sql + clause;
511 
512       pstmt = this.jdbcConn.prepareStatement(sql);
513       pstmt.setLong(1,lrID.longValue());
514       if (null != name) {
515         pstmt.setString(2,name);
516       }
517       pstmt.execute();
518       rs = pstmt.getResultSet();
519 
520       if (rs.next()) {
521         //ok, there is such aset in the DB
522         asetID = new Long(rs.getLong(1));
523       }
524       else {
525         //wow, there is no such aset, so create new ...
526         //... by delegating to the super method
527         return;
528       }
529 
530       //1.5 cleanup
531       DBHelper.cleanup(rs);
532       DBHelper.cleanup(pstmt);
533 
534       //2. read annotation Features
535       HashMap featuresByAnnotationID = _readFeatures(asetID);
536 
537       //3. read annotations
538       AnnotationSetImpl transSet = new AnnotationSetImpl(this);
539 
540       String hint;
541 
542       if (this.dbType == DBHelper.ORACLE_DB) {
543         hint = "/*+ use_nl(v.t_annotation v.t_as_annotation) " +
544               "     use_nl(v.t_annotation_type v.t_annotation) "+
545               " */";
546       }
547       else {
548         hint = "";
549       }
550 
551       String sql1 = " select "+hint+
552                     "        ann_local_id, " +
553                     "        at_name, " +
554                     "        start_offset, " +
555                     "        end_offset " +
556                     " from  "+this.jdbcSchema+"v_annotation  v" +
557                     " where  asann_as_id = ? ";
558 
559       if (DEBUG) Out.println(">>>>> asetID=["+asetID+"]");
560 
561       pstmt = this.jdbcConn.prepareStatement(sql1);
562       pstmt.setLong(1,asetID.longValue());
563 
564       if (this.dbType == DBHelper.ORACLE_DB) {
565         ((OraclePreparedStatement)pstmt).setRowPrefetch(DBHelper.CHINK_SIZE_LARGE);
566       }
567       pstmt.execute();
568       rs = pstmt.getResultSet();
569 
570       while (rs.next()) {
571         //1. read data memebers
572         Integer annID = new Integer(rs.getInt(1));
573         String type = rs.getString(2);
574         Long startOffset = new Long(rs.getLong(3));
575         Long endOffset = new Long(rs.getLong(4));
576 
577         if (DEBUG) Out.println("ann_local_id=["+annID+"]");
578         if (DEBUG) Out.println("start_off=["+startOffset+"]");
579         if (DEBUG) Out.println("end_off=["+endOffset+"]");
580 
581         //2. get the features
582         FeatureMap fm = (FeatureMap)featuresByAnnotationID.get(annID);
583         //fm should NOT be null
584         if (null == fm) {
585           fm =  new SimpleFeatureMapImpl();
586         }
587 
588         //3. add to annotation set
589         transSet.add(annID,startOffset,endOffset,type,fm);
590       }//while
591 
592       //1.5, create a-set
593       if (null == name) {
594         as = new DatabaseAnnotationSetImpl(this, transSet);
595       }
596       else {
597         as = new DatabaseAnnotationSetImpl(this,name, transSet);
598       }
599     }
600     catch(SQLException sqle) {
601       throw new SynchronisationException("can't read annotations from DB: ["+ sqle.getMessage()+"]");
602     }
603     catch(InvalidOffsetException oe) {
604       throw new SynchronisationException(oe);
605     }
606     catch(PersistenceException pe) {
607       throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
608     }
609     finally {
610       try {
611         DBHelper.cleanup(rs);
612         DBHelper.cleanup(pstmt);
613       }
614       catch(PersistenceException pe) {
615         throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
616       }
617     }
618 
619 
620     //4. update internal data members
621     if (name == null) {
622       //default as
623       this.defaultAnnots = as;
624     }
625     else {
626       //named as
627       this.namedAnnotSets.put(name,as);
628     }
629 
630     //don't return the new aset, the super method will take care
631     return;
632   }
633 
634 
635 
636 
637   private HashMap _readFeatures(Long asetID) {
638 
639     PreparedStatement pstmt = null;
640     ResultSet rs = null;
641 
642     //1
643     String      prevKey = DBHelper.DUMMY_FEATURE_KEY;
644     String      currKey = null;
645 
646     Integer     prevAnnID = null;
647     Integer     currAnnID = null;
648 
649     Object      currFeatureValue = null;
650     Vector      currFeatureArray = new Vector();
651 
652     HashMap     currFeatures = new HashMap();
653     FeatureMap  annFeatures = null;
654 
655     HashMap     featuresByAnnotID = new HashMap();
656 
657     //2. read the features from DB
658 
659     try {
660 
661       if (this.dbType == DBHelper.ORACLE_DB) {
662         String sql = " select /*+ use_nl(v.t_annotation v.t_as_annotation) "+
663                      "            use_nl(v.t_feature v.t_annotation) "+
664                      "            index(v.t_feature xt_feature_01) "+
665                      "            use_nl(v.t_feature_key v.t_feature) "+
666                      "           full(v.t_feature_key)           "+
667                      "        */                                  "+
668                      "                                            " +
669                      "        ann_local_id, " +
670                      "        key, " +
671                      "        ft_value_type, " +
672                      "        ft_number_value, " +
673                      "        ft_character_value, " +
674                      "        ft_long_character_value, " +
675                      "        ft_binary_value " +
676                      " from  "+this.jdbcSchema+"v_annotation_features v" +
677                      " where  set_id = ? " +
678                      " order by ann_local_id,key ";
679 
680         pstmt = this.jdbcConn.prepareStatement(sql);
681         pstmt.setLong(1,asetID.longValue());
682         ((OraclePreparedStatement)pstmt).setRowPrefetch(DBHelper.CHINK_SIZE_LARGE);
683         pstmt.execute();
684         rs = pstmt.getResultSet();
685       }
686 
687       else if (this.dbType == DBHelper.POSTGRES_DB) {
688 
689         String sql = " select " +
690                      "        ann_local_id, " +
691                      "        key, " +
692                      "        ft_value_type, " +
693                      "        ft_int_value, " +
694                      "        ft_float_value, " +
695                      "        ft_character_value, " +
696                      "        ft_binary_value " +
697                      " from  "+this.jdbcSchema+"v_annotation_features " +
698                      " where  set_id = ? " +
699                      " order by ann_local_id,key ";
700 
701         pstmt = this.jdbcConn.prepareStatement(sql);
702         pstmt.setLong(1,asetID.longValue());
703         pstmt.execute();
704         rs = pstmt.getResultSet();
705       }
706 
707       else {
708         Assert.fail();
709       }
710 
711       while (rs.next()) {
712         //NOTE: because there are LOBs in the resulset
713         //the columns should be read in the order they appear
714         //in the query
715 
716         prevAnnID = currAnnID;
717         currAnnID = new Integer(rs.getInt("ann_local_id"));
718 
719         //2.1 is this a new Annotation?
720         if (!currAnnID.equals(prevAnnID) && prevAnnID != null) {
721           //new one
722           //2.1.1 normalize the hashmap with the features, and add
723           //the elements into a new FeatureMap
724           annFeatures = new SimpleFeatureMapImpl();
725           Set entries = currFeatures.entrySet();
726           Iterator itFeatureArrays = entries.iterator();
727 
728           while(itFeatureArrays.hasNext()) {
729             Map.Entry currEntry = (Map.Entry)itFeatureArrays.next();
730             String key = (String)currEntry.getKey();
731             Vector val = (Vector)currEntry.getValue();
732 
733             //add to feature map normalized array
734             Assert.assertTrue(val.size() >= 1);
735 
736             if (val.size() == 1) {
737               //the single elemnt of the array
738               annFeatures.put(key,val.firstElement());
739             }
740             else {
741               //the whole array
742               annFeatures.put(key,val);
743             }
744           }//while
745 
746           //2.1.2. add the featuremap for this annotation to the hashmap
747           featuresByAnnotID.put(prevAnnID,annFeatures);
748           //2.1.3. clear temp hashtable with feature vectors
749           currFeatures.clear();
750 /*??*/          prevAnnID = currAnnID;
751         }//if -- is new annotation
752 
753         currKey = rs.getString("key");
754         Long valueType = new Long(rs.getLong("ft_value_type"));
755 
756         //we don't quite know what is the type of the NUMBER
757         //stored in DB
758         Object numberValue = null;
759 
760         //for all numeric types + boolean -> read from DB as appropriate
761         //Java object
762         switch(valueType.intValue()) {
763 
764           case DBHelper.VALUE_TYPE_BOOLEAN:
765 
766             if (this.dbType == DBHelper.ORACLE_DB) {
767               numberValue = new Boolean(rs.getBoolean("ft_number_value"));
768             }
769             else if (this.dbType == DBHelper.POSTGRES_DB){
770               numberValue = new Boolean(rs.getBoolean("ft_int_value"));
771             }
772             else {
773               Assert.fail();
774             }
775 
776             break;
777 
778 
779           case DBHelper.VALUE_TYPE_FLOAT:
780 
781             if (this.dbType == DBHelper.ORACLE_DB) {
782               numberValue = new Float(rs.getFloat("ft_number_value"));
783             }
784             else if (this.dbType == DBHelper.POSTGRES_DB){
785               numberValue = new Float(rs.getFloat("ft_float_value"));
786             }
787             else {
788               Assert.fail();
789             }
790 
791             break;
792 
793           case DBHelper.VALUE_TYPE_INTEGER:
794 
795             if (this.dbType == DBHelper.ORACLE_DB) {
796               numberValue = new Integer(rs.getInt("ft_number_value"));
797             }
798             else if (this.dbType == DBHelper.POSTGRES_DB){
799               numberValue = new Integer(rs.getInt("ft_int_value"));
800             }
801             else {
802               Assert.fail();
803             }
804 
805             break;
806 
807           case DBHelper.VALUE_TYPE_LONG:
808 
809             if (this.dbType == DBHelper.ORACLE_DB) {
810               numberValue = new Long(rs.getLong("ft_number_value"));
811             }
812             else if (this.dbType == DBHelper.POSTGRES_DB){
813               numberValue = new Long(rs.getLong("ft_int_value"));
814             }
815             else {
816               Assert.fail();
817             }
818 
819             break;
820 
821           default:
822             //do nothing, will be handled in the next switch statement
823         }
824 
825         //don't forget to read the rest of the current row
826         String stringValue = rs.getString("ft_character_value");
827         Clob clobValue = null;
828         Blob blobValue = null;
829 
830         if (this.dbType == DBHelper.ORACLE_DB) {
831           clobValue = rs.getClob("ft_long_character_value");
832           blobValue = rs.getBlob("ft_binary_value");
833         }
834 
835         switch(valueType.intValue()) {
836 
837           case DBHelper.VALUE_TYPE_NULL:
838             currFeatureValue = null;
839             break;
840 
841           case DBHelper.VALUE_TYPE_BINARY:
842             throw new MethodNotImplementedException();
843 
844           case DBHelper.VALUE_TYPE_BOOLEAN:
845           case DBHelper.VALUE_TYPE_FLOAT:
846           case DBHelper.VALUE_TYPE_INTEGER:
847           case DBHelper.VALUE_TYPE_LONG:
848             currFeatureValue = numberValue;
849             break;
850 
851           case DBHelper.VALUE_TYPE_STRING:
852 
853             if (this.dbType == DBHelper.ORACLE_DB && null == stringValue) {
854               //this one is tricky too
855               //if the string is < 4000 bytes long then it's stored as varchar2
856               //otherwise as CLOB
857 
858               StringBuffer temp = new StringBuffer();
859               OracleDataStore.readCLOB(clobValue,temp);
860               currFeatureValue = temp.toString();
861             }
862             else { /* PostgresDB or (Oracle DB + value is stored in varchar column) */
863               currFeatureValue = stringValue;
864             }
865             break;
866 
867           default:
868             throw new SynchronisationException("Invalid feature type found in DB, value is ["+valueType+"]");
869         }//switch
870 
871         //ok, we got the key/value pair now
872         //2.2 is this a new feature key?
873         if (false == currFeatures.containsKey(currKey)) {
874           //new key
875           Vector keyValue = new Vector();
876           keyValue.add(currFeatureValue);
877           currFeatures.put(currKey,keyValue);
878         }
879         else {
880           //key is present, append to existing vector
881           ((Vector)currFeatures.get(currKey)).add(currFeatureValue);
882         }
883 
884         prevKey = currKey;
885       }//while
886 
887 
888       //2.3 process the last Annotation left
889       annFeatures = new SimpleFeatureMapImpl();
890 
891       Set entries = currFeatures.entrySet();
892       Iterator itFeatureArrays = entries.iterator();
893 
894       while(itFeatureArrays.hasNext()) {
895         Map.Entry currEntry = (Map.Entry)itFeatureArrays.next();
896         String key = (String)currEntry.getKey();
897         Vector val = (Vector)currEntry.getValue();
898 
899         //add to feature map normalized array
900         Assert.assertTrue(val.size() >= 1);
901 
902         if (val.size() == 1) {
903           //the single elemnt of the array
904           annFeatures.put(key,val.firstElement());
905         }
906         else {
907           //the whole array
908           annFeatures.put(key,val);
909         }
910       }//while
911 
912       //2.3.1. add the featuremap for this annotation to the hashmap
913       if (null != currAnnID) {
914         // do we have features at all for this annotation?
915         featuresByAnnotID.put(currAnnID,annFeatures);
916       }
917 
918       //3. return the hashmap
919       return featuresByAnnotID;
920     }
921     catch(SQLException sqle) {
922       throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
923     }
924     catch(IOException sqle) {
925       throw new SynchronisationException("can't read content from DB: ["+ sqle.getMessage()+"]");
926     }
927     finally {
928       try {
929         DBHelper.cleanup(rs);
930         DBHelper.cleanup(pstmt);
931       }
932       catch(PersistenceException pe) {
933         throw new SynchronisationException("JDBC error: ["+ pe.getMessage()+"]");
934       }
935     }
936   }
937 
938 
939   /** Set method for the document content */
940   public void setContent(DocumentContent content) {
941 
942     //if the document is a child document then setContent()is prohibited
943     if (null != this.parentDocument) {
944       Err.prln("content of document ["+this.name+"] cannot be changed!");
945       return;
946     }
947     else {
948       super.setContent(content);
949       this.contentChanged = true;
950     }
951   }
952 
953   /** Set the feature set */
954   public void setFeatures(FeatureMap features) {
955     //1. save them first, so we can remove the listener
956     FeatureMap oldFeatures = this.features;
957 
958     super.setFeatures(features);
959 
960     this.featuresChanged = true;
961 
962     //4. sort out the listeners
963     if (eventHandler != null)
964       oldFeatures.removeFeatureMapListener(eventHandler);
965     else
966       eventHandler = new EventsHandler();
967     this.features.addFeatureMapListener(eventHandler);
968   }
969 
970   /** Sets the name of this resource*/
971   public void setName(String name){
972     super.setName(name);
973 
974     this.nameChanged = true;
975   }
976 
977 
978   private List getAnnotationsForOffset(AnnotationSet aDumpAnnotSet,Long offset){
979     throw new MethodNotImplementedException();
980   }
981 
982 
983   public void setNextNodeId(int nextID){
984     Assert.assertTrue(nextID >= 0);
985     this.nextNodeId = nextID;
986   }
987 
988 
989   public boolean isResourceChanged(int changeType) {
990 
991     switch(changeType) {
992 
993       case EventAwareLanguageResource.DOC_CONTENT:
994         return this.contentChanged;
995       case EventAwareLanguageResource.RES_FEATURES:
996         return this.featuresChanged;
997       case EventAwareLanguageResource.RES_NAME:
998         return this.nameChanged;
999       case EventAwareLanguageResource.DOC_MAIN:
1000        return this.documentChanged;
1001      default:
1002        throw new IllegalArgumentException();
1003    }
1004
1005  }
1006
1007  private void _setAnnotations(String setName,Collection annotations)
1008    throws InvalidOffsetException {
1009
1010    AnnotationSet tempSet = null;
1011
1012    if (null == setName) {
1013      Assert.assertTrue(null == this.defaultAnnots);
1014//      this.defaultAnnots = new DatabaseAnnotationSetImpl(this,annotations);
1015      tempSet = new DatabaseAnnotationSetImpl(this);
1016      this.defaultAnnots = tempSet;
1017    }
1018    else {
1019      Assert.assertTrue(false == this.namedAnnotSets.containsKey(setName));
1020//      AnnotationSet annSet = new DatabaseAnnotationSetImpl(this,setName,annotations);
1021      tempSet = new DatabaseAnnotationSetImpl(this,setName);
1022      this.namedAnnotSets.put(setName,tempSet);
1023    }
1024
1025    //NOTE - the source aset is not from this document, so we can't use the proper constructor -
1026    //we should iterate all elements from the original aset and create equiva elements in the new aset
1027    Iterator itAnnotations = annotations.iterator();
1028    while (itAnnotations.hasNext()) {
1029      Annotation currAnn = (Annotation)itAnnotations.next();
1030      tempSet.add(currAnn.getId(),
1031                  currAnn.getStartNode().getOffset(),
1032                  currAnn.getEndNode().getOffset(),
1033                  currAnn.getType(),
1034                  currAnn.getFeatures());
1035
1036      //adjust the maxAnnotationID
1037      this.maxAnnotationId = (currAnn.getId().intValue() >= this.maxAnnotationId)
1038                              ? currAnn.getId().intValue()
1039                              : this.maxAnnotationId;
1040    }
1041
1042  }
1043
1044  /** Set method for the document's URL */
1045  public void setSourceUrl(URL sourceUrl) {
1046
1047    this.documentChanged = true;
1048    super.setSourceUrl(sourceUrl);
1049  } // setSourceUrl
1050
1051
1052  /** Documents may be packed within files; in this case an optional pair of
1053    * offsets refer to the location of the document. This method sets the
1054    * end offset.
1055    */
1056  public void setSourceUrlEndOffset(Long sourceUrlEndOffset) {
1057
1058    this.documentChanged = true;
1059    super.setSourceUrlEndOffset(sourceUrlEndOffset);
1060  } // setSourceUrlStartOffset
1061
1062
1063  /** Documents may be packed within files; in this case an optional pair of
1064    * offsets refer to the location of the document. This method sets the
1065    * start offset.
1066    */
1067  public void setSourceUrlStartOffset(Long sourceUrlStartOffset) {
1068
1069    this.documentChanged = true;
1070    super.setSourceUrlStartOffset(sourceUrlStartOffset);
1071  } // setSourceUrlStartOffset
1072
1073  /** Make the document markup-aware. This will trigger the creation
1074   *  of a DocumentFormat object at Document initialisation time; the
1075   *  DocumentFormat object will unpack the markup in the Document and
1076   *  add it as annotations. Documents are <B>not</B> markup-aware by default.
1077   *
1078   *  @param newMarkupAware markup awareness status.
1079   */
1080  public void setMarkupAware(Boolean newMarkupAware) {
1081
1082    this.documentChanged = true;
1083    super.setMarkupAware(newMarkupAware);
1084  }
1085
1086  /**
1087   * All the events from the features are handled by
1088   * this inner class.
1089   */
1090  class EventsHandler implements gate.event.FeatureMapListener {
1091    public void featureMapUpdated(){
1092      //tell the document that its features have been updated
1093      featuresChanged = true;
1094    }
1095  }
1096
1097  /**
1098   * Overriden to remove the features listener, when the document is closed.
1099   */
1100  public void cleanup() {
1101
1102    if (eventHandler != null)
1103
1104    this.features.removeFeatureMapListener(eventHandler);
1105    getDataStore().removeDatastoreListener(this);
1106
1107    //unregister annot-sets
1108    if (null != this.defaultAnnots) {
1109      getDataStore().removeDatastoreListener((DatastoreListener)this.defaultAnnots);
1110    }
1111
1112    Set loadedNamedAnnots = this.namedAnnotSets.entrySet();
1113    Iterator it = loadedNamedAnnots.iterator();
1114    while (it.hasNext()) {
1115      Map.Entry currEntry = (Map.Entry)it.next();
1116      AnnotationSet currSet = (AnnotationSet)currEntry.getValue();
1117      //unregister
1118      getDataStore().removeDatastoreListener((DatastoreListener)currSet);
1119    }
1120
1121    super.cleanup();
1122  }///inner class EventsHandler
1123
1124
1125  /**
1126   * Called by a datastore when a new resource has been adopted
1127   */
1128  public void resourceAdopted(DatastoreEvent evt){
1129  }
1130
1131  /**
1132   * Called by a datastore when a resource has been deleted
1133   */
1134  public void resourceDeleted(DatastoreEvent evt){
1135
1136    Assert.assertNotNull(evt);
1137    Assert.assertNotNull(evt.getResourceID());
1138
1139    //unregister self as listener from the DataStore
1140    if (evt.getResourceID().equals(this.getLRPersistenceId())) {
1141
1142      //someone deleted this document
1143      getDataStore().removeDatastoreListener(this);
1144
1145      //unregister annot-sets
1146      if (null != this.defaultAnnots) {
1147        getDataStore().removeDatastoreListener((DatastoreListener)this.defaultAnnots);
1148      }
1149
1150      Set loadedNamedAnnots = this.namedAnnotSets.entrySet();
1151      Iterator it = loadedNamedAnnots.iterator();
1152      while (it.hasNext()) {
1153        Map.Entry currEntry = (Map.Entry)it.next();
1154        AnnotationSet currSet = (AnnotationSet)currEntry.getValue();
1155        //unregister
1156        getDataStore().removeDatastoreListener((DatastoreListener)currSet);
1157      }
1158    }
1159  }//resourceDeleted
1160
1161  /**
1162   * Called by a datastore when a resource has been wrote into the datastore
1163   */
1164  public void resourceWritten(DatastoreEvent evt){
1165
1166    Assert.assertNotNull(evt);
1167    Assert.assertNotNull(evt.getResourceID());
1168
1169    //is the event for us?
1170    if (evt.getResourceID().equals(this.getLRPersistenceId())) {
1171      //wow, the event is for me
1172      //clear all flags, the content is synced with the DB
1173      this.contentChanged =
1174        this.documentChanged =
1175          this.featuresChanged =
1176            this.nameChanged = false;
1177
1178      this.removedAnotationSets.clear();
1179      this.addedAnotationSets.clear();
1180    }
1181
1182
1183  }
1184
1185  public Collection getLoadedAnnotationSets() {
1186
1187    //never return the data member - return a clone
1188    Assert.assertNotNull(this.namedAnnotSets);
1189    Vector result = new Vector(this.namedAnnotSets.values());
1190    if (null != this.defaultAnnots) {
1191      result.add(this.defaultAnnots);
1192    }
1193
1194    return result;
1195  }
1196
1197
1198  public Collection getRemovedAnnotationSets() {
1199
1200    //return a clone
1201    return new Vector(this.removedAnotationSets);
1202  }
1203
1204  public Collection getAddedAnnotationSets() {
1205
1206    //return a clone
1207    return new Vector(this.addedAnotationSets);
1208  }
1209
1210  public void removeAnnotationSet(String name) {
1211
1212    //1. add to the list of removed a-sets
1213    this.removedAnotationSets.add(name);
1214
1215    //if the set was read from the DB then it is registered as datastore listener and ...
1216    //there may be chnges in it
1217    //NOTE that default set cannot be reoved, so we just ignore it
1218
1219    if (this.namedAnnotSets.keySet().contains(name)) {
1220      //set was loaded
1221      AnnotationSet aset = (AnnotationSet)this.namedAnnotSets.get(name);
1222
1223      Assert.assertNotNull(aset);
1224      Assert.assertTrue(aset instanceof DatabaseAnnotationSetImpl);
1225
1226      //3. unregister it as a DataStoreListener
1227      this.dataStore.removeDatastoreListener((DatastoreListener)aset);
1228    }
1229
1230    //4. delegate
1231    super.removeAnnotationSet(name);
1232  }
1233
1234  /**
1235   * Returns true of an LR has been modified since the last sync.
1236   * Always returns false for transient LRs.
1237   */
1238  public boolean isModified() {
1239    return this.isResourceChanged(EventAwareLanguageResource.DOC_CONTENT) ||
1240            this.isResourceChanged(EventAwareLanguageResource.RES_FEATURES) ||
1241              this.isResourceChanged(EventAwareLanguageResource.RES_NAME) ||
1242                this.isResourceChanged(EventAwareLanguageResource.DOC_MAIN);
1243  }
1244
1245
1246  /**
1247   * Returns the parent LR of this LR.
1248   * Only relevant for LRs that support shadowing. Most do not by default.
1249   */
1250  public LanguageResource getParent()
1251    throws PersistenceException,SecurityException {
1252
1253    return this.parentDocument;
1254  }//getParent
1255
1256  /**
1257   * Sets the parent LR of this LR.
1258   * Only relevant for LRs that support shadowing. Most do not by default.
1259   */
1260  public void setParent(LanguageResource parentLR)
1261    throws PersistenceException,SecurityException {
1262
1263    //0. preconditions
1264    Assert.assertNotNull(parentLR);
1265
1266    if (false == parentLR instanceof DatabaseDocumentImpl) {
1267      throw new IllegalArgumentException("invalid parent resource set");
1268    }
1269
1270    //1.
1271    this.parentDocument = (Document)parentLR;
1272
1273  }//setParent
1274
1275  public void setInitData__$$__(Object data)
1276    throws PersistenceException, InvalidOffsetException {
1277
1278    HashMap initData = (HashMap)data;
1279
1280    this.jdbcConn = (Connection)initData.get("JDBC_CONN");
1281    setDatabaseInfo(this.jdbcConn);
1282    this.dataStore = (DatabaseDataStore)initData.get("DS");
1283    this.lrPersistentId = (Long)initData.get("LR_ID");
1284    this.name = (String)initData.get("DOC_NAME");
1285    this.content = (DocumentContent)initData.get("DOC_CONTENT");
1286    this.isContentRead = true;
1287    this.features = (FeatureMap)initData.get("DOC_FEATURES");
1288    this.markupAware = (Boolean)initData.get("DOC_MARKUP_AWARE");
1289    this.sourceUrl = (URL)initData.get("DOC_SOURCE_URL");
1290    this.sourceUrlStartOffset = (Long)initData.get("DOC_SOURCE_URL_START");
1291    this.sourceUrlEndOffset = (Long)initData.get("DOC_SOURCE_URL_END");
1292    if(initData.containsKey("DOC_STRING_CONTENT"))
1293      this.setStringContent((String)initData.get("DOC_STRING_CONTENT"));
1294
1295
1296    Integer nextNodeID = (Integer)initData.get("DOC_NEXT_NODE_ID");
1297    if (null != nextNodeID) {
1298      this.setNextNodeId(nextNodeID.intValue());
1299    }
1300
1301    Integer nextAnnID = (Integer)initData.get("DOC_NEXT_ANN_ID");
1302    if (null != nextAnnID) {
1303      this.setNextAnnotationId(nextAnnID.intValue());
1304    }
1305
1306    this.parentDocument = (Document)initData.get("PARENT_LR");
1307
1308    //annotations
1309    //1. default
1310    AnnotationSet _default = (AnnotationSet)initData.get("DOC_DEFAULT_ANNOTATIONS");
1311    if (null != _default) {
1312      _setAnnotations(null,_default);
1313    }
1314
1315    //2. named (if any)
1316    Map _named = (Map)initData.get("DOC_NAMED_ANNOTATION_SETS");
1317    if (null != _named) {
1318      Iterator itNamed = _named.values().iterator();
1319      while (itNamed.hasNext()){
1320        AnnotationSet currSet = (AnnotationSet)itNamed.next();
1321        //add them all to the DBAnnotationSet, except the ORIGINAL MARKUPS - handled in the super init()
1322        if (false == currSet.getName().equals(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME)) {
1323          _setAnnotations(currSet.getName(),currSet);
1324        }
1325      }
1326    }
1327
1328    //3. add the listeners for the features (if any)
1329    if (null != this.features) {
1330      if (eventHandler == null)
1331        eventHandler = new EventsHandler();
1332      this.features.addFeatureMapListener(eventHandler);
1333    }
1334
1335    //4. add self as listener for the data store, so that we'll know when the DS is
1336    //synced and we'll clear the isXXXChanged flags
1337    if (null != this.dataStore) {
1338      this.dataStore.addDatastoreListener(this);
1339    }
1340
1341  }
1342
1343  public Object getInitData__$$__(Object initData) {
1344    return null;
1345  }
1346
1347  /** Initialise this resource, and return it. */
1348  public Resource init() throws ResourceInstantiationException {
1349
1350    Resource result = super.init();
1351
1352    if (this.nextAnnotationId <= this.maxAnnotationId) {
1353      this.nextAnnotationId = this.maxAnnotationId +1;
1354    }
1355
1356    return result;
1357  }
1358
1359}