2525
2626import org .apache .hadoop .classification .InterfaceAudience ;
2727import org .apache .hadoop .classification .InterfaceStability ;
28+ import org .apache .hadoop .util .DurationInfo ;
2829
2930import org .apache .htrace .core .TraceScope ;
3031import org .apache .htrace .core .Tracer ;
3132import org .slf4j .Logger ;
3233import org .slf4j .LoggerFactory ;
3334
35+ import static com .google .common .base .Preconditions .checkNotNull ;
36+
37+ /**
38+ * Implementation of {@link FileSystem#globStatus(Path, PathFilter)}.
39+ * This has historically been package-private; it has been opened
40+ * up for object stores within the {@code hadoop-*} codebase ONLY.
41+ * It could be expanded for external store implementations in future.
42+ */
3443@ InterfaceAudience .Private
3544@ InterfaceStability .Unstable
36- class Globber {
45+ public class Globber {
3746 public static final Logger LOG =
3847 LoggerFactory .getLogger (Globber .class .getName ());
3948
@@ -42,21 +51,62 @@ class Globber {
4251 private final Path pathPattern ;
4352 private final PathFilter filter ;
4453 private final Tracer tracer ;
45-
46- public Globber (FileSystem fs , Path pathPattern , PathFilter filter ) {
54+ private final boolean resolveSymlinks ;
55+
56+ Globber (FileSystem fs , Path pathPattern , PathFilter filter ) {
57+ this .fs = fs ;
58+ this .fc = null ;
59+ this .pathPattern = pathPattern ;
60+ this .filter = filter ;
61+ this .tracer = FsTracer .get (fs .getConf ());
62+ this .resolveSymlinks = true ;
63+ }
64+
65+ Globber (FileContext fc , Path pathPattern , PathFilter filter ) {
66+ this .fs = null ;
67+ this .fc = fc ;
68+ this .pathPattern = pathPattern ;
69+ this .filter = filter ;
70+ this .tracer = fc .getTracer ();
71+ this .resolveSymlinks = true ;
72+ }
73+
74+ /**
75+ * Filesystem constructor for use by {@link GlobBuilder}.
76+ * @param fs filesystem
77+ * @param pathPattern path pattern
78+ * @param filter optional filter
79+ * @param resolveSymlinks should symlinks be resolved.
80+ */
81+ private Globber (FileSystem fs , Path pathPattern , PathFilter filter ,
82+ boolean resolveSymlinks ) {
4783 this .fs = fs ;
4884 this .fc = null ;
4985 this .pathPattern = pathPattern ;
5086 this .filter = filter ;
87+ this .resolveSymlinks = resolveSymlinks ;
5188 this .tracer = FsTracer .get (fs .getConf ());
89+ LOG .debug ("Created Globber for path={}, symlinks={}" ,
90+ pathPattern , resolveSymlinks );
5291 }
5392
54- public Globber (FileContext fc , Path pathPattern , PathFilter filter ) {
93+ /**
94+ * File Context constructor for use by {@link GlobBuilder}.
95+ * @param fc file context
96+ * @param pathPattern path pattern
97+ * @param filter optional filter
98+ * @param resolveSymlinks should symlinks be resolved.
99+ */
100+ private Globber (FileContext fc , Path pathPattern , PathFilter filter ,
101+ boolean resolveSymlinks ) {
55102 this .fs = null ;
56103 this .fc = fc ;
57104 this .pathPattern = pathPattern ;
58105 this .filter = filter ;
106+ this .resolveSymlinks = resolveSymlinks ;
59107 this .tracer = fc .getTracer ();
108+ LOG .debug ("Created Globber path={}, symlinks={}" ,
109+ pathPattern , resolveSymlinks );
60110 }
61111
62112 private FileStatus getFileStatus (Path path ) throws IOException {
@@ -67,6 +117,7 @@ private FileStatus getFileStatus(Path path) throws IOException {
67117 return fc .getFileStatus (path );
68118 }
69119 } catch (FileNotFoundException e ) {
120+ LOG .debug ("getFileStatus({}) failed; returning null" , path , e );
70121 return null ;
71122 }
72123 }
@@ -79,6 +130,7 @@ private FileStatus[] listStatus(Path path) throws IOException {
79130 return fc .util ().listStatus (path );
80131 }
81132 } catch (FileNotFoundException e ) {
133+ LOG .debug ("listStatus({}) failed; returning empty array" , path , e );
82134 return new FileStatus [0 ];
83135 }
84136 }
@@ -107,7 +159,7 @@ private static String unescapePathComponent(String name) {
107159 */
108160 private static List <String > getPathComponents (String path )
109161 throws IOException {
110- ArrayList <String > ret = new ArrayList <String >();
162+ ArrayList <String > ret = new ArrayList <>();
111163 for (String component : path .split (Path .SEPARATOR )) {
112164 if (!component .isEmpty ()) {
113165 ret .add (component );
@@ -145,7 +197,8 @@ private String authorityFromPath(Path path) throws IOException {
145197 public FileStatus [] glob () throws IOException {
146198 TraceScope scope = tracer .newScope ("Globber#glob" );
147199 scope .addKVAnnotation ("pattern" , pathPattern .toUri ().getPath ());
148- try {
200+ try (DurationInfo ignored = new DurationInfo (LOG , false ,
201+ "glob %s" , pathPattern )) {
149202 return doGlob ();
150203 } finally {
151204 scope .close ();
@@ -164,24 +217,26 @@ private FileStatus[] doGlob() throws IOException {
164217 String pathPatternString = pathPattern .toUri ().getPath ();
165218 List <String > flattenedPatterns = GlobExpander .expand (pathPatternString );
166219
220+ LOG .debug ("Filesystem glob {}" , pathPatternString );
167221 // Now loop over all flattened patterns. In every case, we'll be trying to
168222 // match them to entries in the filesystem.
169223 ArrayList <FileStatus > results =
170- new ArrayList <FileStatus >(flattenedPatterns .size ());
224+ new ArrayList <>(flattenedPatterns .size ());
171225 boolean sawWildcard = false ;
172226 for (String flatPattern : flattenedPatterns ) {
173227 // Get the absolute path for this flattened pattern. We couldn't do
174228 // this prior to flattening because of patterns like {/,a}, where which
175229 // path you go down influences how the path must be made absolute.
176230 Path absPattern = fixRelativePart (new Path (
177231 flatPattern .isEmpty () ? Path .CUR_DIR : flatPattern ));
232+ LOG .debug ("Pattern: {}" , absPattern );
178233 // Now we break the flattened, absolute pattern into path components.
179234 // For example, /a/*/c would be broken into the list [a, *, c]
180235 List <String > components =
181236 getPathComponents (absPattern .toUri ().getPath ());
182237 // Starting out at the root of the filesystem, we try to match
183238 // filesystem entries against pattern components.
184- ArrayList <FileStatus > candidates = new ArrayList <FileStatus >(1 );
239+ ArrayList <FileStatus > candidates = new ArrayList <>(1 );
185240 // To get the "real" FileStatus of root, we'd have to do an expensive
186241 // RPC to the NameNode. So we create a placeholder FileStatus which has
187242 // the correct path, but defaults for the rest of the information.
@@ -206,12 +261,13 @@ private FileStatus[] doGlob() throws IOException {
206261 for (int componentIdx = 0 ; componentIdx < components .size ();
207262 componentIdx ++) {
208263 ArrayList <FileStatus > newCandidates =
209- new ArrayList <FileStatus >(candidates .size ());
264+ new ArrayList <>(candidates .size ());
210265 GlobFilter globFilter = new GlobFilter (components .get (componentIdx ));
211266 String component = unescapePathComponent (components .get (componentIdx ));
212267 if (globFilter .hasPattern ()) {
213268 sawWildcard = true ;
214269 }
270+ LOG .debug ("Component {}, patterned={}" , component , sawWildcard );
215271 if (candidates .isEmpty () && sawWildcard ) {
216272 // Optimization: if there are no more candidates left, stop examining
217273 // the path components. We can only do this if we've already seen
@@ -245,19 +301,31 @@ private FileStatus[] doGlob() throws IOException {
245301 // incorrectly conclude that /a/b was a file and should not match
246302 // /a/*/*. So we use getFileStatus of the path we just listed to
247303 // disambiguate.
248- Path path = candidate .getPath ();
249- FileStatus status = getFileStatus (path );
250- if (status == null ) {
251- // null means the file was not found
252- LOG .warn ("File/directory {} not found:"
253- + " it may have been deleted."
254- + " If this is an object store, this can be a sign of"
255- + " eventual consistency problems." ,
256- path );
257- continue ;
258- }
259- if (!status .isDirectory ()) {
260- continue ;
304+ if (resolveSymlinks ) {
305+ LOG .debug ("listStatus found one entry; disambiguating {}" ,
306+ children [0 ]);
307+ Path path = candidate .getPath ();
308+ FileStatus status = getFileStatus (path );
309+ if (status == null ) {
310+ // null means the file was not found
311+ LOG .warn ("File/directory {} not found:"
312+ + " it may have been deleted."
313+ + " If this is an object store, this can be a sign of"
314+ + " eventual consistency problems." ,
315+ path );
316+ continue ;
317+ }
318+ if (!status .isDirectory ()) {
319+ LOG .debug ("Resolved entry is a file; skipping: {}" , status );
320+ continue ;
321+ }
322+ } else {
323+ // there's no symlinks in this store, so no need to issue
324+ // another call, just see if the result is a directory or a file
325+ if (children [0 ].getPath ().equals (candidate .getPath ())) {
326+ // the listing status is of a file
327+ continue ;
328+ }
261329 }
262330 }
263331 for (FileStatus child : children ) {
@@ -312,6 +380,8 @@ private FileStatus[] doGlob() throws IOException {
312380 */
313381 if ((!sawWildcard ) && results .isEmpty () &&
314382 (flattenedPatterns .size () <= 1 )) {
383+ LOG .debug ("No matches found and there was no wildcard in the path {}" ,
384+ pathPattern );
315385 return null ;
316386 }
317387 /*
@@ -324,4 +394,98 @@ private FileStatus[] doGlob() throws IOException {
324394 Arrays .sort (ret );
325395 return ret ;
326396 }
397+
398+ /**
399+ * Create a builder for a Globber, bonded to the specific filesystem.
400+ * @param filesystem filesystem
401+ * @return the builder to finish configuring.
402+ */
403+ public static GlobBuilder createGlobber (FileSystem filesystem ) {
404+ return new GlobBuilder (filesystem );
405+ }
406+
407+ /**
408+ * Create a builder for a Globber, bonded to the specific file
409+ * context.
410+ * @param fileContext file context.
411+ * @return the builder to finish configuring.
412+ */
413+ public static GlobBuilder createGlobber (FileContext fileContext ) {
414+ return new GlobBuilder (fileContext );
415+ }
416+
417+ /**
418+ * Builder for Globber instances.
419+ */
420+ @ InterfaceAudience .Private
421+ public static class GlobBuilder {
422+
423+ private final FileSystem fs ;
424+
425+ private final FileContext fc ;
426+
427+ private Path pathPattern ;
428+
429+ private PathFilter filter ;
430+
431+ private boolean resolveSymlinks = true ;
432+
433+ /**
434+ * Construct bonded to a file context.
435+ * @param fc file context.
436+ */
437+ public GlobBuilder (final FileContext fc ) {
438+ this .fs = null ;
439+ this .fc = checkNotNull (fc );
440+ }
441+
442+ /**
443+ * Construct bonded to a filesystem.
444+ * @param fs file system.
445+ */
446+ public GlobBuilder (final FileSystem fs ) {
447+ this .fs = checkNotNull (fs );
448+ this .fc = null ;
449+ }
450+
451+ /**
452+ * Set the path pattern.
453+ * @param pattern pattern to use.
454+ * @return the builder
455+ */
456+ public GlobBuilder withPathPattern (Path pattern ) {
457+ pathPattern = pattern ;
458+ return this ;
459+ }
460+
461+ /**
462+ * Set the path filter.
463+ * @param pathFilter filter
464+ * @return the builder
465+ */
466+ public GlobBuilder withPathFiltern (PathFilter pathFilter ) {
467+ filter = pathFilter ;
468+ return this ;
469+ }
470+
471+ /**
472+ * Set the symlink resolution policy.
473+ * @param resolve resolution flag.
474+ * @return the builder
475+ */
476+ public GlobBuilder withResolveSymlinks (boolean resolve ) {
477+ resolveSymlinks = resolve ;
478+ return this ;
479+ }
480+
481+ /**
482+ * Build the Globber.
483+ * @return a new instance.
484+ */
485+ public Globber build () {
486+ return fs != null
487+ ? new Globber (fs , pathPattern , filter , resolveSymlinks )
488+ : new Globber (fc , pathPattern , filter , resolveSymlinks );
489+ }
490+ }
327491}
0 commit comments