1   /*
2    *  Files.java
3    *
4    *  Copyright (c) 1998-2005, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  $Id: Files.java,v 1.36 2006/03/28 14:50:18 ian_roberts Exp $
12   */
13  
14  package gate.util;
15  import java.io.*;
16  import java.util.*;
17  import java.util.regex.Matcher;
18  import java.util.regex.Pattern;
19  import java.net.URL;
20  import java.net.URI;
21  import java.net.URISyntaxException;
22  
23  
24  /** Some utilities for use with Files and with resources.
25    * <P>
26    * <B>Note</B> that there is a terminology conflict between the use
27    * of "resources" here and <TT>gate.Resource</TT> and its inheritors.
28    * <P>
29    * Java "resources" are files that live on the CLASSPATH or in a Jar
30    * file that are <I>not</I> <TT>.class</TT> files. For example: a
31    * <TT>.gif</TT> file that is used by a GUI, or one of the XML files
32    * used for testing GATE's document format facilities. This class
33    * allows you to access these files in various ways (as streams, as
34    * byte arrays, etc.).
35    * <P>
36    * GATE resources are components (Java Beans) that provide all of the
37    * natural language processing capabilities of a GATE-based system, and
38    * the language data that such systems analsyse and produce. For
39    * example: parsers, lexicons, generators, corpora.
40    * <P>
41    * Where we say "resource" in this class we mean Java resource; elsewhere
42    * in the system we almost always mean GATE resource.
43    */
44  public class Files {
45  
46    /** Debug flag */
47    private static final boolean DEBUG = false;
48  
49    /** Used to generate temporary resources names*/
50    static long resourceIndex = 0;
51  
52    /**Where on the classpath the gate resources are to be found*/
53    protected static String resourcePath = "/gate/resources";
54  
55    /**Gets the path for the gate resources within the classpath*/
56    public static String getResourcePath(){
57      return resourcePath;
58    }
59  
60    /** It returns the last component in a file path.
61      * It takes E.g: d:/tmp/file.txt and returns file.txt
62      */
63    public static String getLastPathComponent(String path){
64      if(path == null || path.length() == 0) return "";
65      //we should look both for "/" and "\" as on windows the file separator is"\"
66      //but a path coming from an URL will be separated by "/"
67      int index = path.lastIndexOf('/');
68      if(index == -1) index = path.lastIndexOf('\\');
69      if(index == -1) return path;
70      else return path.substring(index + 1);
71    }// getLastPathComponent()
72  
73    /** Get a string representing the contents of a text file. */
74    public static String getString(String fileName) throws IOException {
75      return getString(new File(fileName));
76    } // getString(fileName)
77  
78    /** Get a string representing the contents of a text file. */
79    public static String getString(File textFile) throws IOException {
80      FileInputStream fis = new FileInputStream(textFile);
81      int len = (int) textFile.length();
82      byte[] textBytes = new byte[len];
83      fis.read(textBytes, 0, len);
84      fis.close();
85      return new String(textBytes);
86    } // getString(File)
87  
88    /** Get a byte array representing the contents of a binary file. */
89    public static byte[] getByteArray(File binaryFile) throws IOException {
90      FileInputStream fis = new FileInputStream(binaryFile);
91      int len = (int) binaryFile.length();
92      byte[] bytes = new byte[len];
93      fis.read(bytes, 0, len);
94      fis.close();
95      return bytes;
96    } // getByteArray(File)
97  
98    /** Get a resource from the classpath as a String.
99      */
100   public static String getResourceAsString(String resourceName)
101   throws IOException {
102     InputStream resourceStream = getResourceAsStream(resourceName);
103     BufferedReader resourceReader =
104       new BufferedReader(new InputStreamReader(resourceStream));
105     StringBuffer resourceBuffer = new StringBuffer();
106 
107     int i;
108 
109     int charsRead = 0;
110     final int size = 1024;
111     char[] charArray = new char[size];
112 
113     while( (charsRead = resourceReader.read(charArray,0,size)) != -1 )
114       resourceBuffer.append (charArray,0,charsRead);
115 
116     while( (i = resourceReader.read()) != -1 )
117       resourceBuffer.append((char) i);
118 
119     resourceReader.close();
120     return resourceBuffer.toString();
121   } // getResourceAsString(String)
122 
123   /** Get a resource from the GATE resources directory as a String.
124     * The resource name should be relative to <code>resourcePath</code> which
125     * is equal with <TT>gate/resources</TT>; e.g.
126     * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>,
127     * this method should be passed the name <TT>jape/Test11.jape</TT>.
128     */
129   public static String getGateResourceAsString(String resourceName)
130     throws IOException {
131 
132     InputStream resourceStream = getGateResourceAsStream(resourceName);
133     BufferedReader resourceReader =
134       new BufferedReader(new InputStreamReader(resourceStream));
135     StringBuffer resourceBuffer = new StringBuffer();
136 
137     int i;
138 
139     int charsRead = 0;
140     final int size = 1024;
141     char[] charArray = new char[size];
142 
143     while( (charsRead = resourceReader.read(charArray,0,size)) != -1 )
144       resourceBuffer.append (charArray,0,charsRead);
145 
146     while( (i = resourceReader.read()) != -1 )
147       resourceBuffer.append((char) i);
148 
149     resourceReader.close();
150     return resourceBuffer.toString();
151   } // getGateResourceAsString(String)
152 
153   /**
154     * Writes a temporary file into the default temporary directory,
155     * form an InputStream a unique ID is generated and associated automaticaly
156     * with the file name...
157     */
158   public static File writeTempFile(InputStream contentStream)
159     throws IOException {
160 
161     File resourceFile  = null;
162     FileOutputStream resourceFileOutputStream = null;
163 
164     // create a temporary file name
165     resourceFile = File.createTempFile ("gateResource", ".tmp");
166     resourceFileOutputStream = new FileOutputStream(resourceFile);
167     resourceFile.deleteOnExit ();
168 
169     if (contentStream == null)
170       return resourceFile;
171 
172     int bytesRead = 0;
173     final int readSize = 1024;
174     byte[] bytes = new byte[readSize];
175     while( (bytesRead = contentStream.read(bytes,0,readSize) ) != -1 )
176       resourceFileOutputStream.write(bytes,0, bytesRead);
177 
178     resourceFileOutputStream.close();
179     contentStream.close ();
180     return resourceFile;
181   }// writeTempFile()
182 
183   /**
184     * Writes aString into a temporary file located inside
185     * the default temporary directory defined by JVM, using the specific
186     * anEncoding.
187     * An unique ID is generated and associated automaticaly with the file name.
188     * @param aString the String to be written. If is null then the file will be
189     * empty.
190     * @param anEncoding the encoding to be used. If is null then the default
191     * encoding will be used.
192     * @return the tmp file containing the string.
193     */
194   public static File writeTempFile(String aString, String anEncoding) throws
195       UnsupportedEncodingException, IOException{
196     File resourceFile  = null;
197     OutputStreamWriter writer = null;
198 
199     // Create a temporary file name
200     resourceFile = File.createTempFile ("gateResource", ".tmp");
201     resourceFile.deleteOnExit ();
202 
203     if (aString == null) return resourceFile;
204     // Prepare the writer
205     if (anEncoding == null){
206       // Use default encoding
207       writer = new OutputStreamWriter(new FileOutputStream(resourceFile));
208 
209     }else {
210       // Use the specified encoding
211       writer = new OutputStreamWriter(
212                       new FileOutputStream(resourceFile),anEncoding);
213     }// End if
214 
215     // This Action is added only when a gate.Document is created.
216     // So, is for sure that the resource is a gate.Document
217     writer.write(aString);
218     writer.flush();
219     writer.close();
220     return resourceFile;
221   }// writeTempFile()
222 
223   /**
224     * Writes aString into a temporary file located inside
225     * the default temporary directory defined by JVM, using the default
226     * encoding.
227     * An unique ID is generated and associated automaticaly with the file name.
228     * @param aString the String to be written. If is null then the file will be
229     * empty.
230     * @return the tmp file containing the string.
231     */
232   public static File writeTempFile(String aString) throws IOException{
233     return writeTempFile(aString,null);
234   }// writeTempFile()
235 
236 
237   /** Get a resource from the classpath as a byte array.
238     */
239   public static byte[] getResourceAsByteArray(String resourceName)
240     throws IOException, IndexOutOfBoundsException, ArrayStoreException {
241 
242     InputStream resourceInputStream = getResourceAsStream(resourceName);
243     BufferedInputStream resourceStream =
244       new BufferedInputStream(resourceInputStream);
245     byte b;
246     final int bufSize = 1024;
247     byte[] buf = new byte[bufSize];
248     int i = 0;
249 
250     // get the whole resource into buf (expanding the array as needed)
251     while( (b = (byte) resourceStream.read()) != -1 ) {
252       if(i == buf.length) {
253         byte[] newBuf = new byte[buf.length * 2];
254         System.arraycopy (buf,0,newBuf,0,i);
255         buf = newBuf;
256       }
257       buf[i++] = b;
258     }
259 
260     // close the resource stream
261     resourceStream.close();
262 
263     // copy the contents of buf to an array of the correct size
264     byte[] bytes = new byte[i];
265     // copy from buf to bytes
266     System.arraycopy (buf,0,bytes,0,i);
267     return bytes;
268   } // getResourceAsByteArray(String)
269 
270   /** Get a resource from the GATE resources directory as a byte array.
271     * The resource name should be relative to <code>resourcePath<code> which
272     * is equal with <TT>gate/resources</TT>; e.g.
273     * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>,
274     * this method should be passed the name <TT>jape/Test11.jape</TT>.
275     */
276   public static byte[] getGateResourceAsByteArray(String resourceName)
277     throws IOException, IndexOutOfBoundsException, ArrayStoreException {
278 
279     InputStream resourceInputStream = getGateResourceAsStream(resourceName);
280     BufferedInputStream resourceStream =
281       new BufferedInputStream(resourceInputStream);
282     byte b;
283     final int bufSize = 1024;
284     byte[] buf = new byte[bufSize];
285     int i = 0;
286 
287     // get the whole resource into buf (expanding the array as needed)
288     while( (b = (byte) resourceStream.read()) != -1 ) {
289       if(i == buf.length) {
290         byte[] newBuf = new byte[buf.length * 2];
291         System.arraycopy (buf,0,newBuf,0,i);
292         buf = newBuf;
293       }
294       buf[i++] = b;
295     }
296 
297     // close the resource stream
298     resourceStream.close();
299 
300     // copy the contents of buf to an array of the correct size
301     byte[] bytes = new byte[i];
302 
303     // copy from buf to bytes
304     System.arraycopy (buf,0,bytes,0,i);
305     return bytes;
306   } // getResourceGateAsByteArray(String)
307 
308 
309   /** Get a resource from the classpath as an InputStream.
310     */
311   public static InputStream getResourceAsStream(String resourceName)
312     throws IOException {
313 
314     return  Files.class.getResourceAsStream(resourceName);
315     //return  ClassLoader.getSystemResourceAsStream(resourceName);
316   } // getResourceAsStream(String)
317 
318   /** Get a resource from the GATE resources directory as an InputStream.
319     * The resource name should be relative to <code>resourcePath<code> which
320     * is equal with <TT>gate/resources</TT>; e.g.
321     * for a resource stored as <TT>gate/resources/jape/Test11.jape</TT>,
322     * this method should be passed the name <TT>jape/Test11.jape</TT>.
323     */
324   public static InputStream getGateResourceAsStream(String resourceName)
325     throws IOException {
326 
327     if(resourceName.startsWith("/") || resourceName.startsWith("\\") )
328       return getResourceAsStream(resourcePath + resourceName);
329     else return getResourceAsStream(resourcePath + "/" + resourceName);
330   } // getResourceAsStream(String)
331 
332 
333   /** This method takes a regular expression and a directory name and returns
334     * the set of Files that match the pattern under that directory.
335     */
336   public static Set Find(String regex, String pathFile) {
337     Set regexfinal = new HashSet();
338     String[] tab;
339     File file = null;
340     PrintStream printstr = null;
341     Object obj = new Object();
342     //open a file
343     try {
344       file = new File(pathFile);
345     } catch(NullPointerException npe) {
346       npe.printStackTrace(Err.getPrintWriter());
347     }
348       
349       Pattern pattern = Pattern.compile("^"+regex);
350       
351       if (file.isDirectory()){
352         tab = file.list();
353         for (int i=0;i<=tab.length-1;i++){
354           String finalPath = pathFile+"/"+tab[i];
355           Matcher matcher = pattern.matcher(finalPath);
356           if (matcher.matches()){
357               regexfinal.add(finalPath);
358           }         
359         }
360       }
361       else {
362         if (file.isFile()){
363             Matcher matcher = pattern.matcher(pathFile);
364             if (matcher.matches()){
365                 regexfinal.add(pathFile);
366             }         
367         }
368       }
369 
370     return regexfinal;
371   } //find
372 
373   /** Recursively remove a directory <B>even if it contains other files
374     * or directories</B>. Returns true when the directory and all its
375     * contents are successfully removed, else false.
376     */
377   public static boolean rmdir(File dir) {
378     if(dir == null || ! dir.isDirectory()) // only delete directories
379       return false;
380 
381     // list all the members of the dir
382     String[] members = dir.list();
383 
384     // return value indicating success or failure
385     boolean succeeded = true;
386 
387     // for each member, if is dir then recursively delete; if file then delete
388     for(int i = 0; i<members.length; i++) {
389       File member = new File(dir, members[i]);
390 
391       if(member.isFile()) {
392         if(! member.delete())
393           succeeded = false;
394       } else {
395         if(! Files.rmdir(member))
396           succeeded = false;
397       }
398     }
399 
400     // delete the directory itself
401     dir.delete();
402 
403     // return status value
404     return succeeded;
405   } // rmdir(File)
406 
407   /**
408    * This method updates an XML element with a new set of attributes.
409    * If the element is not found the XML is unchanged. The attributes
410    * keys and values must all be Strings.
411    *
412    * @param xml A stream of the XML data.
413    * @param elementName The name of the element to update.
414    * @param newAttrs The new attributes to place on the element.
415    * @return A string of the whole XML source, with the element updated.
416    */
417   public static String updateXmlElement(
418     BufferedReader xml, String elementName, Map newAttrs
419   ) throws IOException {
420     String line = null;
421     String nl = Strings.getNl();
422     StringBuffer newXml = new StringBuffer();
423 
424     // read the whole source
425     while( ( line = xml.readLine() ) != null ) {
426       newXml.append(line);
427       newXml.append(nl);
428     }
429 
430     // find the location of the element
431     int start = newXml.toString().indexOf("<" + elementName);
432     if(start == -1) return newXml.toString();
433     int end =   newXml.toString().indexOf(">", start);
434     if(end == -1)   return newXml.toString();
435 
436     // check if the old element is empty (ends in "/>") or not
437     boolean isEmpty = false;
438     if(newXml.toString().charAt(end - 1) == '/') isEmpty = true;
439 
440     // create the new element string with the new attributes
441     StringBuffer newElement = new StringBuffer();
442     newElement.append("<");
443     newElement.append(elementName);
444 
445     // add in the new attributes
446     Iterator iter = newAttrs.entrySet().iterator();
447     while(iter.hasNext()) {
448       Map.Entry entry = (Map.Entry) iter.next();
449       String key =   (String) entry.getKey();
450       String value = (String) entry.getValue();
451 
452       newElement.append(" ");newElement.append(key);
453       newElement.append("=\"");
454       newElement.append(value);
455       newElement.append("\"" + nl);
456     }
457 
458     // terminate the element
459     if(isEmpty) newElement.append("/");
460     newElement.append(">");
461 
462     // replace the old string
463     newXml.replace(start, end + 1, newElement.toString());
464 
465     return newXml.toString();
466   } // updateXmlElement(Reader...)
467 
468   /**
469    * This method updates an XML element in an XML file
470    * with a new set of attributes. If the element is not found the XML
471    * file is unchanged. The attributes keys and values must all be Strings.
472    *
473    * @param xmlFile An XML file.
474    * @param elementName The name of the element to update.
475    * @param newAttrs The new attributes to place on the element.
476    * @return A string of the whole XML file, with the element updated (the
477    *   file is also overwritten).
478    */
479   public static String updateXmlElement(
480     File xmlFile, String elementName, Map newAttrs
481   ) throws IOException {
482     BufferedReader fileReader = new BufferedReader(new FileReader(xmlFile));
483     String newXml = updateXmlElement(fileReader, elementName, newAttrs);
484     fileReader.close();
485 
486     FileWriter fileWriter = new FileWriter(xmlFile);
487     fileWriter.write(newXml);
488     fileWriter.close();
489 
490     return newXml;
491   } // updateXmlElement(File...)
492 
493 
494   /**
495    * Convert a file: URL to a <code>java.io.File</code>.  First tries to parse
496    * the URL's toExternalForm as a URI and create the File object from that
497    * URI.  If this fails, just uses the path part of the URL.  This handles
498    * URLs that contain spaces or other unusual characters, both as literals and
499    * when encoded as (e.g.) %20.
500    *
501    * @exception IllegalArgumentException if the URL is not convertable into a
502    * File.
503    */
504   public static File fileFromURL(URL theURL) throws IllegalArgumentException {
505     try {
506       URI uri = new URI(theURL.toExternalForm());
507       return new File(uri);
508     }
509     catch(URISyntaxException use) {
510       try {
511         URI uri = new URI(theURL.getProtocol(), null, theURL.getPath(), null, null);
512         return new File(uri);
513       }
514       catch(URISyntaxException use2) {
515         throw new IllegalArgumentException("Cannot convert " + theURL + " to a file path");
516       }
517     }
518   }
519 
520 } // class Files
521