001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.net.URI; 023import java.net.URISyntaxException; 024import java.util.regex.Pattern; 025 026import org.apache.avro.reflect.Stringable; 027import org.apache.commons.lang.StringUtils; 028import org.apache.hadoop.HadoopIllegalArgumentException; 029import org.apache.hadoop.classification.InterfaceAudience; 030import org.apache.hadoop.classification.InterfaceStability; 031import org.apache.hadoop.conf.Configuration; 032 033/** Names a file or directory in a {@link FileSystem}. 034 * Path strings use slash as the directory separator. 035 */ 036@Stringable 037@InterfaceAudience.Public 038@InterfaceStability.Stable 039public class Path implements Comparable { 040 041 /** The directory separator, a slash. */ 042 public static final String SEPARATOR = "/"; 043 public static final char SEPARATOR_CHAR = '/'; 044 045 public static final String CUR_DIR = "."; 046 047 public static final boolean WINDOWS 048 = System.getProperty("os.name").startsWith("Windows"); 049 050 /** 051 * Pre-compiled regular expressions to detect path formats. 052 */ 053 private static final Pattern hasUriScheme = 054 Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]+:"); 055 private static final Pattern hasDriveLetterSpecifier = 056 Pattern.compile("^/?[a-zA-Z]:"); 057 058 private URI uri; // a hierarchical uri 059 060 /** 061 * Pathnames with scheme and relative path are illegal. 062 */ 063 void checkNotSchemeWithRelative() { 064 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 065 throw new HadoopIllegalArgumentException( 066 "Unsupported name: has scheme but relative path-part"); 067 } 068 } 069 070 void checkNotRelative() { 071 if (!isAbsolute() && toUri().getScheme() == null) { 072 throw new HadoopIllegalArgumentException("Path is relative"); 073 } 074 } 075 076 public static Path getPathWithoutSchemeAndAuthority(Path path) { 077 // This code depends on Path.toString() to remove the leading slash before 078 // the drive specification on Windows. 079 Path newPath = path.isUriPathAbsolute() ? 080 new Path(null, null, path.toUri().getPath()) : 081 path; 082 return newPath; 083 } 084 085 /** Resolve a child path against a parent path. */ 086 public Path(String parent, String child) { 087 this(new Path(parent), new Path(child)); 088 } 089 090 /** Resolve a child path against a parent path. */ 091 public Path(Path parent, String child) { 092 this(parent, new Path(child)); 093 } 094 095 /** Resolve a child path against a parent path. */ 096 public Path(String parent, Path child) { 097 this(new Path(parent), child); 098 } 099 100 /** Resolve a child path against a parent path. */ 101 public Path(Path parent, Path child) { 102 // Add a slash to parent's path so resolution is compatible with URI's 103 URI parentUri = parent.uri; 104 String parentPath = parentUri.getPath(); 105 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 106 try { 107 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 108 parentUri.getPath()+"/", null, parentUri.getFragment()); 109 } catch (URISyntaxException e) { 110 throw new IllegalArgumentException(e); 111 } 112 } 113 URI resolved = parentUri.resolve(child.uri); 114 initialize(resolved.getScheme(), resolved.getAuthority(), 115 resolved.getPath(), resolved.getFragment()); 116 } 117 118 private void checkPathArg( String path ) throws IllegalArgumentException { 119 // disallow construction of a Path from an empty string 120 if ( path == null ) { 121 throw new IllegalArgumentException( 122 "Can not create a Path from a null string"); 123 } 124 if( path.length() == 0 ) { 125 throw new IllegalArgumentException( 126 "Can not create a Path from an empty string"); 127 } 128 } 129 130 /** Construct a path from a String. Path strings are URIs, but with 131 * unescaped elements and some additional normalization. */ 132 public Path(String pathString) throws IllegalArgumentException { 133 checkPathArg( pathString ); 134 135 // We can't use 'new URI(String)' directly, since it assumes things are 136 // escaped, which we don't require of Paths. 137 138 // add a slash in front of paths with Windows drive letters 139 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 140 pathString = "/" + pathString; 141 } 142 143 // parse uri components 144 String scheme = null; 145 String authority = null; 146 147 int start = 0; 148 149 // parse uri scheme, if any 150 int colon = pathString.indexOf(':'); 151 int slash = pathString.indexOf('/'); 152 if ((colon != -1) && 153 ((slash == -1) || (colon < slash))) { // has a scheme 154 scheme = pathString.substring(0, colon); 155 start = colon+1; 156 } 157 158 // parse uri authority, if any 159 if (pathString.startsWith("//", start) && 160 (pathString.length()-start > 2)) { // has authority 161 int nextSlash = pathString.indexOf('/', start+2); 162 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 163 authority = pathString.substring(start+2, authEnd); 164 start = authEnd; 165 } 166 167 // uri path is the rest of the string -- query & fragment not supported 168 String path = pathString.substring(start, pathString.length()); 169 170 initialize(scheme, authority, path, null); 171 } 172 173 /** 174 * Construct a path from a URI 175 */ 176 public Path(URI aUri) { 177 uri = aUri.normalize(); 178 } 179 180 /** Construct a Path from components. */ 181 public Path(String scheme, String authority, String path) { 182 checkPathArg( path ); 183 184 // add a slash in front of paths with Windows drive letters 185 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 186 path = "/" + path; 187 } 188 189 // add "./" in front of Linux relative paths so that a path containing 190 // a colon e.q. "a:b" will not be interpreted as scheme "a". 191 if (!WINDOWS && path.charAt(0) != '/') { 192 path = "./" + path; 193 } 194 195 initialize(scheme, authority, path, null); 196 } 197 198 private void initialize(String scheme, String authority, String path, 199 String fragment) { 200 try { 201 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 202 .normalize(); 203 } catch (URISyntaxException e) { 204 throw new IllegalArgumentException(e); 205 } 206 } 207 208 /** 209 * Merge 2 paths such that the second path is appended relative to the first. 210 * The returned path has the scheme and authority of the first path. On 211 * Windows, the drive specification in the second path is discarded. 212 * 213 * @param path1 Path first path 214 * @param path2 Path second path, to be appended relative to path1 215 * @return Path merged path 216 */ 217 public static Path mergePaths(Path path1, Path path2) { 218 String path2Str = path2.toUri().getPath(); 219 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 220 // Add path components explicitly, because simply concatenating two path 221 // string is not safe, for example: 222 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 223 return new Path(path1.toUri().getScheme(), 224 path1.toUri().getAuthority(), 225 path1.toUri().getPath() + path2Str); 226 } 227 228 /** 229 * Normalize a path string to use non-duplicated forward slashes as 230 * the path separator and remove any trailing path separators. 231 * @param scheme Supplies the URI scheme. Used to deduce whether we 232 * should replace backslashes or not. 233 * @param path Supplies the scheme-specific part 234 * @return Normalized path string. 235 */ 236 private static String normalizePath(String scheme, String path) { 237 // Remove double forward slashes. 238 path = StringUtils.replace(path, "//", "/"); 239 240 // Remove backslashes if this looks like a Windows path. Avoid 241 // the substitution if it looks like a non-local URI. 242 if (WINDOWS && 243 (hasWindowsDrive(path) || 244 (scheme == null) || 245 (scheme.isEmpty()) || 246 (scheme.equals("file")))) { 247 path = StringUtils.replace(path, "\\", "/"); 248 } 249 250 // trim trailing slash from non-root path (ignoring windows drive) 251 int minLength = startPositionWithoutWindowsDrive(path) + 1; 252 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 253 path = path.substring(0, path.length()-1); 254 } 255 256 return path; 257 } 258 259 private static boolean hasWindowsDrive(String path) { 260 return (WINDOWS && hasDriveLetterSpecifier.matcher(path).find()); 261 } 262 263 private static int startPositionWithoutWindowsDrive(String path) { 264 if (hasWindowsDrive(path)) { 265 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 266 } else { 267 return 0; 268 } 269 } 270 271 /** 272 * Determine whether a given path string represents an absolute path on 273 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 274 * 275 * @param pathString Supplies the path string to evaluate. 276 * @param slashed true if the given path is prefixed with "/". 277 * @return true if the supplied path looks like an absolute path with a Windows 278 * drive-specifier. 279 */ 280 public static boolean isWindowsAbsolutePath(final String pathString, 281 final boolean slashed) { 282 int start = startPositionWithoutWindowsDrive(pathString); 283 return start > 0 284 && pathString.length() > start 285 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 286 (pathString.charAt(start) == '\\')); 287 } 288 289 /** Convert this to a URI. */ 290 public URI toUri() { return uri; } 291 292 /** Return the FileSystem that owns this Path. */ 293 public FileSystem getFileSystem(Configuration conf) throws IOException { 294 return FileSystem.get(this.toUri(), conf); 295 } 296 297 /** 298 * Is an absolute path (ie a slash relative path part) 299 * AND a scheme is null AND authority is null. 300 */ 301 public boolean isAbsoluteAndSchemeAuthorityNull() { 302 return (isUriPathAbsolute() && 303 uri.getScheme() == null && uri.getAuthority() == null); 304 } 305 306 /** 307 * True if the path component (i.e. directory) of this URI is absolute. 308 */ 309 public boolean isUriPathAbsolute() { 310 int start = startPositionWithoutWindowsDrive(uri.getPath()); 311 return uri.getPath().startsWith(SEPARATOR, start); 312 } 313 314 /** True if the path is not a relative path and starts with root. */ 315 public boolean isAbsolute() { 316 return isUriPathAbsolute(); 317 } 318 319 /** 320 * @return true if and only if this path represents the root of a file system 321 */ 322 public boolean isRoot() { 323 return getParent() == null; 324 } 325 326 /** Returns the final component of this path.*/ 327 public String getName() { 328 String path = uri.getPath(); 329 int slash = path.lastIndexOf(SEPARATOR); 330 return path.substring(slash+1); 331 } 332 333 /** Returns the parent of a path or null if at root. */ 334 public Path getParent() { 335 String path = uri.getPath(); 336 int lastSlash = path.lastIndexOf('/'); 337 int start = startPositionWithoutWindowsDrive(path); 338 if ((path.length() == start) || // empty path 339 (lastSlash == start && path.length() == start+1)) { // at root 340 return null; 341 } 342 String parent; 343 if (lastSlash==-1) { 344 parent = CUR_DIR; 345 } else { 346 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 347 } 348 return new Path(uri.getScheme(), uri.getAuthority(), parent); 349 } 350 351 /** Adds a suffix to the final name in the path.*/ 352 public Path suffix(String suffix) { 353 return new Path(getParent(), getName()+suffix); 354 } 355 356 @Override 357 public String toString() { 358 // we can't use uri.toString(), which escapes everything, because we want 359 // illegal characters unescaped in the string, for glob processing, etc. 360 StringBuilder buffer = new StringBuilder(); 361 if (uri.getScheme() != null) { 362 buffer.append(uri.getScheme()); 363 buffer.append(":"); 364 } 365 if (uri.getAuthority() != null) { 366 buffer.append("//"); 367 buffer.append(uri.getAuthority()); 368 } 369 if (uri.getPath() != null) { 370 String path = uri.getPath(); 371 if (path.indexOf('/')==0 && 372 hasWindowsDrive(path) && // has windows drive 373 uri.getScheme() == null && // but no scheme 374 uri.getAuthority() == null) // or authority 375 path = path.substring(1); // remove slash before drive 376 buffer.append(path); 377 } 378 if (uri.getFragment() != null) { 379 buffer.append("#"); 380 buffer.append(uri.getFragment()); 381 } 382 return buffer.toString(); 383 } 384 385 @Override 386 public boolean equals(Object o) { 387 if (!(o instanceof Path)) { 388 return false; 389 } 390 Path that = (Path)o; 391 return this.uri.equals(that.uri); 392 } 393 394 @Override 395 public int hashCode() { 396 return uri.hashCode(); 397 } 398 399 @Override 400 public int compareTo(Object o) { 401 Path that = (Path)o; 402 return this.uri.compareTo(that.uri); 403 } 404 405 /** Return the number of elements in this path. */ 406 public int depth() { 407 String path = uri.getPath(); 408 int depth = 0; 409 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 410 while (slash != -1) { 411 depth++; 412 slash = path.indexOf(SEPARATOR, slash+1); 413 } 414 return depth; 415 } 416 417 /** 418 * Returns a qualified path object. 419 * 420 * Deprecated - use {@link #makeQualified(URI, Path)} 421 */ 422 @Deprecated 423 public Path makeQualified(FileSystem fs) { 424 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 425 } 426 427 /** Returns a qualified path object. */ 428 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 429 public Path makeQualified(URI defaultUri, Path workingDir ) { 430 Path path = this; 431 if (!isAbsolute()) { 432 path = new Path(workingDir, this); 433 } 434 435 URI pathUri = path.toUri(); 436 437 String scheme = pathUri.getScheme(); 438 String authority = pathUri.getAuthority(); 439 String fragment = pathUri.getFragment(); 440 441 if (scheme != null && 442 (authority != null || defaultUri.getAuthority() == null)) 443 return path; 444 445 if (scheme == null) { 446 scheme = defaultUri.getScheme(); 447 } 448 449 if (authority == null) { 450 authority = defaultUri.getAuthority(); 451 if (authority == null) { 452 authority = ""; 453 } 454 } 455 456 URI newUri = null; 457 try { 458 newUri = new URI(scheme, authority , 459 normalizePath(scheme, pathUri.getPath()), null, fragment); 460 } catch (URISyntaxException e) { 461 throw new IllegalArgumentException(e); 462 } 463 return new Path(newUri); 464 } 465}