1
15
16 package gate.corpora;
17
18 import java.io.*;
19 import java.net.URL;
20
21 import gate.DocumentContent;
22 import gate.util.InvalidOffsetException;
23
24
26 public class DocumentContentImpl implements DocumentContent
27 {
28
29 private static final boolean DEBUG = false;
30
31
35 private static final int INTERNAL_BUFFER_SIZE = 16*1024;
36
37
38 public DocumentContentImpl() {
39 content = new String();
40 }
42
43 public DocumentContentImpl(URL u, String encoding, Long start, Long end)
44 throws IOException {
45
46 int readLength = 0;
47 char[] readBuffer = new char[INTERNAL_BUFFER_SIZE];
48
49 BufferedReader uReader = null;
50 StringBuffer buf = new StringBuffer();
51 char c;
52 long s = 0, e = Long.MAX_VALUE, counter = 0;
53 if(start != null && end != null) {
54 s = start.longValue();
55 e = end.longValue();
56 }
57
58 if(encoding != null && !encoding.equalsIgnoreCase("")) {
59 uReader = new BufferedReader(
60 new InputStreamReader(u.openStream(), encoding), INTERNAL_BUFFER_SIZE
61 );
62 } else {
63 uReader = new BufferedReader(
64 new InputStreamReader(u.openStream()), INTERNAL_BUFFER_SIZE
65 );
66 };
67
68 uReader.skip(s);
70
71 long toRead = e - s;
73
74 while (
76 toRead > 0 &&
77 (readLength = uReader.read(readBuffer, 0, INTERNAL_BUFFER_SIZE)) != -1
78 ) {
79 if (toRead < readLength) {
80 readLength = (int)toRead;
83 }
84
85 buf.append(readBuffer, 0, readLength);
86 toRead -= readLength;
87 }
88
89 uReader.close();
91
92 content = new String(buf);
93 originalContent = content;
94 }
96
97 void edit(Long start, Long end, DocumentContent replacement)
98 {
99 int s = start.intValue(), e = end.intValue();
100 String repl = ((DocumentContentImpl) replacement).content;
101 StringBuffer newContent = new StringBuffer(content);
102 newContent.replace(s, e, repl);
103 content = newContent.toString();
104 }
106
107 public DocumentContent getContent(Long start, Long end)
108 throws InvalidOffsetException
109 {
110 if(! isValidOffsetRange(start, end))
111 throw new InvalidOffsetException();
112
113 return new DocumentContentImpl(
114 content.substring(start.intValue(), end.intValue())
115 );
116 }
118
121 public String toString(){
122 return content;
123 }
124
125
128 public Long size() {
129 return new Long(content.length());
130 }
132
133 boolean isValidOffset(Long offset) {
134 if(offset == null)
135 return false;
136
137 long o = offset.longValue();
138 long len = content.length();
139 if(o > len || o < 0)
140 return false;
141
142 return true;
143 }
145
148 boolean isValidOffsetRange(Long start, Long end) {
149 return
150 isValidOffset(start) && isValidOffset(end) &&
151 start.longValue() <= end.longValue();
152 }
154
156 public boolean equals(Object other) {
157 if (!(other instanceof DocumentContentImpl)) return false;
158
159 DocumentContentImpl docImpl = (DocumentContentImpl) other;
160 return content.equals(docImpl.toString());
161 }
163
164 public int hashCode(){ return toString().hashCode(); }
165
166
169 String content;
170
171
177 String originalContent;
178
179
183 public String getOriginalContent() { return originalContent; }
184
185
186 public DocumentContentImpl(String s)
187 { content = s; originalContent = content; }
188
189
190 static final long serialVersionUID = -1426940535575467461L;
191 }