001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.net.URI;
023import java.net.URISyntaxException;
024import java.util.regex.Pattern;
025
026import org.apache.avro.reflect.Stringable;
027import org.apache.commons.lang.StringUtils;
028import org.apache.hadoop.HadoopIllegalArgumentException;
029import org.apache.hadoop.classification.InterfaceAudience;
030import org.apache.hadoop.classification.InterfaceStability;
031import org.apache.hadoop.conf.Configuration;
032
033/** Names a file or directory in a {@link FileSystem}.
034 * Path strings use slash as the directory separator.
035 */
036@Stringable
037@InterfaceAudience.Public
038@InterfaceStability.Stable
039public class Path implements Comparable {
040
041  /** The directory separator, a slash. */
042  public static final String SEPARATOR = "/";
043  public static final char SEPARATOR_CHAR = '/';
044  
045  public static final String CUR_DIR = ".";
046  
047  public static final boolean WINDOWS
048    = System.getProperty("os.name").startsWith("Windows");
049
050  /**
051   *  Pre-compiled regular expressions to detect path formats.
052   */
053  private static final Pattern hasUriScheme =
054      Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:");
055  private static final Pattern hasDriveLetterSpecifier =
056      Pattern.compile("^/?[a-zA-Z]:");
057
058  private URI uri;                                // a hierarchical uri
059
060  /**
061   * Pathnames with scheme and relative path are illegal.
062   */
063  void checkNotSchemeWithRelative() {
064    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
065      throw new HadoopIllegalArgumentException(
066          "Unsupported name: has scheme but relative path-part");
067    }
068  }
069
070  void checkNotRelative() {
071    if (!isAbsolute() && toUri().getScheme() == null) {
072      throw new HadoopIllegalArgumentException("Path is relative");
073    }
074  }
075
076  public static Path getPathWithoutSchemeAndAuthority(Path path) {
077    // This code depends on Path.toString() to remove the leading slash before
078    // the drive specification on Windows.
079    Path newPath = path.isUriPathAbsolute() ?
080      new Path(null, null, path.toUri().getPath()) :
081      path;
082    return newPath;
083  }
084
085  /** Resolve a child path against a parent path. */
086  public Path(String parent, String child) {
087    this(new Path(parent), new Path(child));
088  }
089
090  /** Resolve a child path against a parent path. */
091  public Path(Path parent, String child) {
092    this(parent, new Path(child));
093  }
094
095  /** Resolve a child path against a parent path. */
096  public Path(String parent, Path child) {
097    this(new Path(parent), child);
098  }
099
100  /** Resolve a child path against a parent path. */
101  public Path(Path parent, Path child) {
102    // Add a slash to parent's path so resolution is compatible with URI's
103    URI parentUri = parent.uri;
104    String parentPath = parentUri.getPath();
105    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
106      try {
107        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
108                      parentUri.getPath()+"/", null, parentUri.getFragment());
109      } catch (URISyntaxException e) {
110        throw new IllegalArgumentException(e);
111      }
112    }
113    URI resolved = parentUri.resolve(child.uri);
114    initialize(resolved.getScheme(), resolved.getAuthority(),
115               resolved.getPath(), resolved.getFragment());
116  }
117
118  private void checkPathArg( String path ) throws IllegalArgumentException {
119    // disallow construction of a Path from an empty string
120    if ( path == null ) {
121      throw new IllegalArgumentException(
122          "Can not create a Path from a null string");
123    }
124    if( path.length() == 0 ) {
125       throw new IllegalArgumentException(
126           "Can not create a Path from an empty string");
127    }   
128  }
129  
130  /** Construct a path from a String.  Path strings are URIs, but with
131   * unescaped elements and some additional normalization. */
132  public Path(String pathString) throws IllegalArgumentException {
133    checkPathArg( pathString );
134    
135    // We can't use 'new URI(String)' directly, since it assumes things are
136    // escaped, which we don't require of Paths. 
137    
138    // add a slash in front of paths with Windows drive letters
139    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
140      pathString = "/" + pathString;
141    }
142
143    // parse uri components
144    String scheme = null;
145    String authority = null;
146
147    int start = 0;
148
149    // parse uri scheme, if any
150    int colon = pathString.indexOf(':');
151    int slash = pathString.indexOf('/');
152    if ((colon != -1) &&
153        ((slash == -1) || (colon < slash))) {     // has a scheme
154      scheme = pathString.substring(0, colon);
155      start = colon+1;
156    }
157
158    // parse uri authority, if any
159    if (pathString.startsWith("//", start) &&
160        (pathString.length()-start > 2)) {       // has authority
161      int nextSlash = pathString.indexOf('/', start+2);
162      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
163      authority = pathString.substring(start+2, authEnd);
164      start = authEnd;
165    }
166
167    // uri path is the rest of the string -- query & fragment not supported
168    String path = pathString.substring(start, pathString.length());
169
170    initialize(scheme, authority, path, null);
171  }
172
173  /**
174   * Construct a path from a URI
175   */
176  public Path(URI aUri) {
177    uri = aUri.normalize();
178  }
179  
180  /** Construct a Path from components. */
181  public Path(String scheme, String authority, String path) {
182    checkPathArg( path );
183
184    // add a slash in front of paths with Windows drive letters
185    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
186      path = "/" + path;
187    }
188
189    // add "./" in front of Linux relative paths so that a path containing
190    // a colon e.q. "a:b" will not be interpreted as scheme "a".
191    if (!WINDOWS && path.charAt(0) != '/') {
192      path = "./" + path;
193    }
194
195    initialize(scheme, authority, path, null);
196  }
197
198  private void initialize(String scheme, String authority, String path,
199      String fragment) {
200    try {
201      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
202        .normalize();
203    } catch (URISyntaxException e) {
204      throw new IllegalArgumentException(e);
205    }
206  }
207
208  /**
209   * Merge 2 paths such that the second path is appended relative to the first.
210   * The returned path has the scheme and authority of the first path.  On
211   * Windows, the drive specification in the second path is discarded.
212   * 
213   * @param path1 Path first path
214   * @param path2 Path second path, to be appended relative to path1
215   * @return Path merged path
216   */
217  public static Path mergePaths(Path path1, Path path2) {
218    String path2Str = path2.toUri().getPath();
219    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
220    // Add path components explicitly, because simply concatenating two path
221    // string is not safe, for example:
222    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
223    return new Path(path1.toUri().getScheme(), 
224        path1.toUri().getAuthority(), 
225        path1.toUri().getPath() + path2Str);
226  }
227
228  /**
229   * Normalize a path string to use non-duplicated forward slashes as
230   * the path separator and remove any trailing path separators.
231   * @param scheme Supplies the URI scheme. Used to deduce whether we
232   *               should replace backslashes or not.
233   * @param path Supplies the scheme-specific part
234   * @return Normalized path string.
235   */
236  private static String normalizePath(String scheme, String path) {
237    // Remove double forward slashes.
238    path = StringUtils.replace(path, "//", "/");
239
240    // Remove backslashes if this looks like a Windows path. Avoid
241    // the substitution if it looks like a non-local URI.
242    if (WINDOWS &&
243        (hasWindowsDrive(path) ||
244         (scheme == null) ||
245         (scheme.isEmpty()) ||
246         (scheme.equals("file")))) {
247      path = StringUtils.replace(path, "\\", "/");
248    }
249    
250    // trim trailing slash from non-root path (ignoring windows drive)
251    int minLength = startPositionWithoutWindowsDrive(path) + 1;
252    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
253      path = path.substring(0, path.length()-1);
254    }
255    
256    return path;
257  }
258
259  private static boolean hasWindowsDrive(String path) {
260    return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find());
261  }
262
263  private static int startPositionWithoutWindowsDrive(String path) {
264    if (hasWindowsDrive(path)) {
265      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
266    } else {
267      return 0;
268    }
269  }
270  
271  /**
272   * Determine whether a given path string represents an absolute path on
273   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
274   *
275   * @param pathString Supplies the path string to evaluate.
276   * @param slashed true if the given path is prefixed with "/".
277   * @return true if the supplied path looks like an absolute path with a Windows
278   * drive-specifier.
279   */
280  public static boolean isWindowsAbsolutePath(final String pathString,
281                                              final boolean slashed) {
282    int start = startPositionWithoutWindowsDrive(pathString);
283    return start > 0
284        && pathString.length() > start
285        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
286            (pathString.charAt(start) == '\\'));
287  }
288
289  /** Convert this to a URI. */
290  public URI toUri() { return uri; }
291
292  /** Return the FileSystem that owns this Path. */
293  public FileSystem getFileSystem(Configuration conf) throws IOException {
294    return FileSystem.get(this.toUri(), conf);
295  }
296
297  /**
298   * Is an absolute path (ie a slash relative path part)
299   *  AND  a scheme is null AND  authority is null.
300   */
301  public boolean isAbsoluteAndSchemeAuthorityNull() {
302    return  (isUriPathAbsolute() && 
303        uri.getScheme() == null && uri.getAuthority() == null);
304  }
305  
306  /**
307   *  True if the path component (i.e. directory) of this URI is absolute.
308   */
309  public boolean isUriPathAbsolute() {
310    int start = startPositionWithoutWindowsDrive(uri.getPath());
311    return uri.getPath().startsWith(SEPARATOR, start);
312   }
313  
314  /** True if the path is not a relative path and starts with root. */
315  public boolean isAbsolute() {
316     return isUriPathAbsolute();
317  }
318
319  /**
320   * @return true if and only if this path represents the root of a file system
321   */
322  public boolean isRoot() {
323    return getParent() == null;
324  }
325
326  /** Returns the final component of this path.*/
327  public String getName() {
328    String path = uri.getPath();
329    int slash = path.lastIndexOf(SEPARATOR);
330    return path.substring(slash+1);
331  }
332
333  /** Returns the parent of a path or null if at root. */
334  public Path getParent() {
335    String path = uri.getPath();
336    int lastSlash = path.lastIndexOf('/');
337    int start = startPositionWithoutWindowsDrive(path);
338    if ((path.length() == start) ||               // empty path
339        (lastSlash == start && path.length() == start+1)) { // at root
340      return null;
341    }
342    String parent;
343    if (lastSlash==-1) {
344      parent = CUR_DIR;
345    } else {
346      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
347    }
348    return new Path(uri.getScheme(), uri.getAuthority(), parent);
349  }
350
351  /** Adds a suffix to the final name in the path.*/
352  public Path suffix(String suffix) {
353    return new Path(getParent(), getName()+suffix);
354  }
355
356  @Override
357  public String toString() {
358    // we can't use uri.toString(), which escapes everything, because we want
359    // illegal characters unescaped in the string, for glob processing, etc.
360    StringBuilder buffer = new StringBuilder();
361    if (uri.getScheme() != null) {
362      buffer.append(uri.getScheme());
363      buffer.append(":");
364    }
365    if (uri.getAuthority() != null) {
366      buffer.append("//");
367      buffer.append(uri.getAuthority());
368    }
369    if (uri.getPath() != null) {
370      String path = uri.getPath();
371      if (path.indexOf('/')==0 &&
372          hasWindowsDrive(path) &&                // has windows drive
373          uri.getScheme() == null &&              // but no scheme
374          uri.getAuthority() == null)             // or authority
375        path = path.substring(1);                 // remove slash before drive
376      buffer.append(path);
377    }
378    if (uri.getFragment() != null) {
379      buffer.append("#");
380      buffer.append(uri.getFragment());
381    }
382    return buffer.toString();
383  }
384
385  @Override
386  public boolean equals(Object o) {
387    if (!(o instanceof Path)) {
388      return false;
389    }
390    Path that = (Path)o;
391    return this.uri.equals(that.uri);
392  }
393
394  @Override
395  public int hashCode() {
396    return uri.hashCode();
397  }
398
399  @Override
400  public int compareTo(Object o) {
401    Path that = (Path)o;
402    return this.uri.compareTo(that.uri);
403  }
404  
405  /** Return the number of elements in this path. */
406  public int depth() {
407    String path = uri.getPath();
408    int depth = 0;
409    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
410    while (slash != -1) {
411      depth++;
412      slash = path.indexOf(SEPARATOR, slash+1);
413    }
414    return depth;
415  }
416
417  /**
418   *  Returns a qualified path object.
419   *  
420   *  Deprecated - use {@link #makeQualified(URI, Path)}
421   */
422  @Deprecated
423  public Path makeQualified(FileSystem fs) {
424    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
425  }
426  
427  /** Returns a qualified path object. */
428  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
429  public Path makeQualified(URI defaultUri, Path workingDir ) {
430    Path path = this;
431    if (!isAbsolute()) {
432      path = new Path(workingDir, this);
433    }
434
435    URI pathUri = path.toUri();
436      
437    String scheme = pathUri.getScheme();
438    String authority = pathUri.getAuthority();
439    String fragment = pathUri.getFragment();
440
441    if (scheme != null &&
442        (authority != null || defaultUri.getAuthority() == null))
443      return path;
444
445    if (scheme == null) {
446      scheme = defaultUri.getScheme();
447    }
448
449    if (authority == null) {
450      authority = defaultUri.getAuthority();
451      if (authority == null) {
452        authority = "";
453      }
454    }
455    
456    URI newUri = null;
457    try {
458      newUri = new URI(scheme, authority , 
459        normalizePath(scheme, pathUri.getPath()), null, fragment);
460    } catch (URISyntaxException e) {
461      throw new IllegalArgumentException(e);
462    }
463    return new Path(newUri);
464  }
465}