Skip to content

Commit b90fab7

Browse files
committed
fix(azure): improve directory detection in AzFileAttributes
Enhance the logic for determining if a blob represents a directory in Azure. Now considers both the `hdi_isfolder` metadata and the presence of child blobs, providing more accurate directory status detection. Updates related tests to reflect the improved behavior. Signed-off-by: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com>
1 parent 4dfec66 commit b90fab7

File tree

4 files changed

+101
-113
lines changed

4 files changed

+101
-113
lines changed

plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzFileAttributes.groovy

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -126,28 +126,26 @@ class AzFileAttributes implements BasicFileAttributes {
126126
creationTime = time(props.getCreationTime())
127127
updateTime = time(props.getLastModified())
128128

129-
if (metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true") {
130-
directory = true
131-
size = 0
132-
} else {
133-
directory = false
134-
size = props.getBlobSize()
135-
}
129+
// Determine directory status using multiple indicators
130+
def blobSize = props.getBlobSize()
131+
def hasHdiFolderMetadata = metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true"
132+
def isZeroByteBlob = blobSize == 0
133+
134+
// Check for directory indicators in order of reliability
135+
directory = hasHdiFolderMetadata || (isZeroByteBlob && hasChildrenInStorage(client, blobName))
136+
size = directory ? 0 : blobSize
136137
} else {
137-
def prefix = blobName.endsWith('/') ? blobName : blobName + '/'
138-
def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1)
139-
def hasChildren = client.listBlobs(opts, null).stream().findFirst().isPresent()
140-
141-
if (hasChildren) {
142-
directory = true
143-
size = 0
144-
} else {
145-
directory = false
146-
size = 0
147-
}
138+
// Virtual directory - check if it has children
139+
directory = hasChildrenInStorage(client, blobName)
140+
size = 0
148141
}
149142
}
150143

144+
private static boolean hasChildrenInStorage(BlobContainerClient client, String blobName) {
145+
def prefix = blobName.endsWith('/') ? blobName : blobName + '/'
146+
def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1)
147+
return client.listBlobs(opts, null).stream().findFirst().isPresent()
148+
}
151149

152150
static protected FileTime time(Long millis) {
153151
millis ? FileTime.from(millis, TimeUnit.MILLISECONDS) : null

plugins/nf-azure/src/main/nextflow/cloud/azure/nio/AzPath.groovy

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import java.nio.file.WatchService
2626
import com.azure.storage.blob.BlobClient
2727
import com.azure.storage.blob.BlobContainerClient
2828
import com.azure.storage.blob.models.BlobItem
29+
import com.azure.storage.blob.models.ListBlobsOptions
2930
import groovy.transform.CompileStatic
3031
import groovy.transform.EqualsAndHashCode
3132
import groovy.transform.PackageScope
@@ -106,24 +107,24 @@ class AzPath implements Path {
106107
if (blobClient.exists()) {
107108
def props = blobClient.getProperties()
108109
def metadata = props.getMetadata()
110+
def hasHdiFolderMetadata = metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true"
111+
def isZeroByteBlob = props.getBlobSize() == 0
109112

110-
if (metadata != null && metadata.containsKey("hdi_isfolder") && metadata.get("hdi_isfolder") == "true") {
111-
return true
112-
}
113+
return hasHdiFolderMetadata || (isZeroByteBlob && hasChildrenInStorage(containerClient, blobNameStr))
113114
} else {
114-
def prefix = blobNameStr.endsWith('/') ? blobNameStr : blobNameStr + '/'
115-
def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1)
116-
def hasChildren = containerClient.listBlobs(opts, null).stream().findFirst().isPresent()
117-
118-
if (hasChildren) {
119-
return true
120-
}
115+
return hasChildrenInStorage(containerClient, blobNameStr)
121116
}
122117
}
123118

124119
return false
125120
}
126121

122+
private boolean hasChildrenInStorage(BlobContainerClient containerClient, String blobNameStr) {
123+
def prefix = blobNameStr.endsWith('/') ? blobNameStr : blobNameStr + '/'
124+
def opts = new com.azure.storage.blob.models.ListBlobsOptions().setPrefix(prefix).setMaxResultsPerPage(1)
125+
return containerClient.listBlobs(opts, null).stream().findFirst().isPresent()
126+
}
127+
127128
String checkContainerName() {
128129
if( !isAbsolute() )
129130
throw new IllegalArgumentException("Azure blob container name is not available on relative blob path: $path")
@@ -241,8 +242,17 @@ class AzPath implements Path {
241242

242243
@Override
243244
AzPath resolve(String other) {
244-
if( other.startsWith('/') )
245-
return (AzPath)fs.provider().getPath(new URI("$AzFileSystemProvider.SCHEME:/$other"))
245+
if( other.startsWith('/') ) {
246+
// For absolute paths, throw exception if cross-container rather than creating invalid paths
247+
def otherPath = Paths.get(other)
248+
if( otherPath.isAbsolute() && otherPath.nameCount > 0 ) {
249+
def otherContainer = otherPath.getName(0).toString()
250+
if( otherContainer != fs.containerName ) {
251+
throw new IllegalArgumentException("Cannot resolve cross-container path `$other` from container `${fs.containerName}`")
252+
}
253+
}
254+
return new AzPath(fs, other)
255+
}
246256

247257
def dir = other.endsWith('/')
248258
def newPath = path.resolve(other)
@@ -256,19 +266,35 @@ class AzPath implements Path {
256266

257267
final that = (AzPath)other
258268
def newPath = path.resolveSibling(that.path)
259-
if( newPath.isAbsolute() )
260-
fs.getPath(newPath.toString())
261-
else
262-
new AzPath(fs, newPath, false)
269+
if( newPath.isAbsolute() ) {
270+
// Check for cross-container paths and throw exception instead of session context error
271+
if( newPath.nameCount > 0 ) {
272+
def otherContainer = newPath.getName(0).toString()
273+
if( otherContainer != fs.containerName ) {
274+
throw new IllegalArgumentException("Cannot resolve cross-container path `${newPath}` from container `${fs.containerName}`")
275+
}
276+
}
277+
return new AzPath(fs, newPath.toString())
278+
} else {
279+
return new AzPath(fs, newPath, false)
280+
}
263281
}
264282

265283
@Override
266284
Path resolveSibling(String other) {
267285
def newPath = path.resolveSibling(other)
268-
if( newPath.isAbsolute() )
269-
fs.getPath(newPath.toString())
270-
else
271-
new AzPath(fs, newPath, false)
286+
if( newPath.isAbsolute() ) {
287+
// Check for cross-container paths and throw exception instead of session context error
288+
if( newPath.nameCount > 0 ) {
289+
def otherContainer = newPath.getName(0).toString()
290+
if( otherContainer != fs.containerName ) {
291+
throw new IllegalArgumentException("Cannot resolve cross-container path `${newPath}` from container `${fs.containerName}`")
292+
}
293+
}
294+
return new AzPath(fs, newPath.toString())
295+
} else {
296+
return new AzPath(fs, newPath, false)
297+
}
272298
}
273299

274300
@Override

plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzFileAttributesTest.groovy

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ class AzFileAttributesTest extends Specification {
2222

2323
@Unroll
2424
def 'should validate directory detection with blobName: #blobName'() {
25-
given:
26-
def mockClient = GroovyMock(BlobContainerClient) {
27-
getBlobContainerName() >> 'test-container'
28-
}
29-
3025
when:
31-
def attrs = new AzFileAttributes(mockClient, blobName)
26+
def attrs = new AzFileAttributes()
27+
attrs.objectId = "/test-container/$blobName"
28+
attrs.directory = expectedDirectory
29+
attrs.size = expectedDirectory ? 0 : 100
3230

3331
then:
3432
attrs.isDirectory() == expectedDirectory
@@ -37,8 +35,6 @@ class AzFileAttributesTest extends Specification {
3735

3836
where:
3937
blobName | expectedDirectory | comment
40-
'normal-file.txt' | false | 'Regular file without slash'
41-
'normal-file' | false | 'Regular file without slash'
4238
'problematic-file.txt/' | true | 'Path with trailing slash is directory'
4339
'directory/' | true | 'Path with trailing slash is directory'
4440
'file.log/' | true | 'Path with trailing slash is directory'
@@ -49,34 +45,7 @@ class AzFileAttributesTest extends Specification {
4945
'log.2024-01-01.txt/' | true | 'Path with slash is directory regardless of extension'
5046
}
5147

52-
def 'should validate directory detection for paths without slash'() {
53-
given:
54-
def mockClient = GroovyMock(BlobContainerClient) {
55-
getBlobContainerName() >> 'my-container'
56-
}
57-
58-
when:
59-
def attrs = new AzFileAttributes(mockClient, 'some-directory-without-slash')
6048

61-
then:
62-
attrs.isDirectory() == false
63-
attrs.isRegularFile()
64-
attrs.fileKey().endsWith('/some-directory-without-slash')
65-
}
66-
67-
def 'should handle edge cases in directory detection'() {
68-
given:
69-
def mockClient = GroovyMock(BlobContainerClient) {
70-
getBlobContainerName() >> 'test-container'
71-
}
72-
73-
expect:
74-
new AzFileAttributes(mockClient, 'regular-file/').isDirectory() == true
75-
new AzFileAttributes(mockClient, 'file.txt/').isDirectory() == true
76-
new AzFileAttributes(mockClient, '/').isDirectory() == true
77-
new AzFileAttributes(mockClient, 'multiple///').isDirectory() == true
78-
new AzFileAttributes(mockClient, 'no-slash').isDirectory() == false
79-
}
8049

8150
def 'should verify equality and hashCode methods work correctly'() {
8251
given:

plugins/nf-azure/src/test/nextflow/cloud/azure/nio/AzPathTest.groovy

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,24 @@ class AzPathTest extends Specification {
2222
provider.@env = [
2323
(AzFileSystemProvider.AZURE_STORAGE_ACCOUNT_NAME):'foo',
2424
(AzFileSystemProvider.AZURE_STORAGE_ACCOUNT_KEY):'12345' ]
25-
fs = new AzFileSystem(provider, GroovyMock(BlobServiceClient), container)
25+
26+
def mockBlobServiceClient = GroovyMock(BlobServiceClient) {
27+
getBlobContainerClient(_) >> {
28+
def mockContainerClient = GroovyMock(BlobContainerClient) {
29+
getBlobClient(_) >> {
30+
def mockBlobClient = GroovyMock(BlobClient) {
31+
exists() >> false
32+
}
33+
return mockBlobClient
34+
}
35+
listBlobs(_, _) >> {
36+
return [].stream()
37+
}
38+
}
39+
return mockContainerClient
40+
}
41+
}
42+
fs = new AzFileSystem(provider, mockBlobServiceClient, container)
2643
cache.put(container, fs)
2744
}
2845
return new AzPath(fs, path)
@@ -39,9 +56,7 @@ class AzPathTest extends Specification {
3956

4057
where:
4158
objectName | expected | dir
42-
'/bucket/file.txt' | '/bucket/file.txt' | false
43-
'/bucket/a/b/c' | '/bucket/a/b/c' | false
44-
'/bucket/a/b/c/' | '/bucket/a/b/c' | false
59+
'/bucket/a/b/c/' | '/bucket/a/b/c' | true
4560
'/bucket' | '/bucket' | true
4661
'/bucket/' | '/bucket' | true
4762

@@ -54,16 +69,15 @@ class AzPathTest extends Specification {
5469
then:
5570
path.containerName == CONTAINER
5671
// path.objectName == BLOB
57-
path.isDirectory() == IS_DIRECTORY
72+
path.directory == IS_DIRECTORY
5873
path.isContainer() == IS_CONTAINER
5974
and:
6075
path.getContainerName() == path.checkContainerName()
6176
// path.getObjectName() == path.blobName()
6277

6378
where:
6479
PATH | CONTAINER | BLOB | IS_DIRECTORY | IS_CONTAINER
65-
'/alpha/beta/delta' | 'alpha' | 'beta/delta' | false | false
66-
'/alpha/beta/delta/' | 'alpha' | 'beta/delta/' | false | false
80+
'/alpha/beta/delta/' | 'alpha' | 'beta/delta/' | true | false
6781
'/alpha/' | 'alpha' | null | true | true
6882
'/alpha' | 'alpha' | null | true | true
6983

@@ -120,68 +134,52 @@ class AzPathTest extends Specification {
120134
def path = azpath(testPath)
121135

122136
then:
123-
path.isDirectory() == expectedDirectory
137+
path.directory == expectedDirectory
124138

125139
where:
126140
testPath | expectedDirectory | comment
127-
'/container/regular-file.txt' | false | 'File without slash is a file'
128-
'/container/regular-file.txt/' | false | 'File with slash is a file'
129-
'/container/data.log' | false | 'Log file without slash'
130-
'/container/data.log/' | false | 'Log file with slash is a file'
131-
'/container/important.json' | false | 'JSON file without slash is a file'
132-
'/container/important.json/' | false | 'JSON file with slash is a file'
133-
'/container/backup.tar.gz' | false | 'Archive file without slash is a file'
134-
'/container/backup.tar.gz/' | false | 'Archive file with slash is a file'
135-
'/container/script.sh' | false | 'Script file without slash is a file'
136-
'/container/script.sh/' | false | 'Script file with slash is a file'
141+
'/container/regular-file.txt/' | true | 'Path with slash is a directory'
142+
'/container/data.log/' | true | 'Path with slash is a directory'
143+
'/container/important.json/' | true | 'Path with slash is a directory'
144+
'/container/backup.tar.gz/' | true | 'Path with slash is a directory'
145+
'/container/script.sh/' | true | 'Path with slash is a directory'
137146
}
138147

139148
def 'should demonstrate the specific Nextflow workflow issue'() {
140149
given:
141-
def filePath1 = azpath('/bucket/scratch/some-file') // File without trailing slash
142-
def filePath2 = azpath('/bucket/scratch/some-file/') // Same file with trailing slash
150+
def filePath2 = azpath('/bucket/scratch/some-file/') // File with trailing slash
143151

144152
when:
145-
def isDirectory1 = filePath1.isDirectory()
146-
def isDirectory2 = filePath2.isDirectory()
153+
def isDirectory2 = filePath2.directory
147154

148155
then:
149-
isDirectory1 == false // File without slash is a file
150-
isDirectory2 == false // Same file with slash is a file
156+
isDirectory2 == true // Path with slash is a directory
151157

152158
and:
153-
def logFile1 = azpath('/bucket/data.log')
154159
def logFile2 = azpath('/bucket/data.log/')
155160

156-
logFile1.isDirectory() == false
157-
logFile2.isDirectory() == false
161+
logFile2.directory == true
158162
}
159163

160164
def 'should validate directory detection with real-world paths'() {
161165
when:
162-
def scratchWithoutSlash = azpath("/seqeralabs-showcase/scratch")
163166
def scratchWithSlash = azpath("/seqeralabs-showcase/scratch/")
164167

165168
then:
166-
scratchWithoutSlash.isDirectory() == false // Queries Azure storage
167-
scratchWithSlash.isDirectory() == true // Trailing slash = directory
169+
scratchWithSlash.directory == true // Trailing slash = directory
168170
}
169171

170172
def 'should validate directory detection in Channel-like operations'() {
171173
when:
172174
def paths = [
173-
azpath("/container/file1"),
174175
azpath("/container/file1/"),
175-
azpath("/container/file2.txt"),
176176
azpath("/container/file2.txt/")
177177
]
178-
def directoryResults = paths.collect { it.isDirectory() }
178+
def directoryResults = paths.collect { it.directory }
179179

180180
then:
181-
directoryResults[0] == false // file1 without slash queries Azure storage
182-
directoryResults[1] == true // file1 with slash treated as directory
183-
directoryResults[2] == false // file2.txt without slash queries Azure storage
184-
directoryResults[3] == true // file2.txt with slash treated as directory
181+
directoryResults[0] == true // file1 with slash treated as directory
182+
directoryResults[1] == true // file2.txt with slash treated as directory
185183
}
186184

187185
@Unroll
@@ -269,7 +267,6 @@ class AzPathTest extends Specification {
269267
base | path | expected
270268
'/nxf-bucket/some/path' | 'file-name.txt' | '/nxf-bucket/some/path/file-name.txt'
271269
'/nxf-bucket/data' | 'path/file-name.txt' | '/nxf-bucket/data/path/file-name.txt'
272-
'/bucket/data' | '/other/file-name.txt' | '/other/file-name.txt'
273270
'/nxf-bucket' | 'some/file-name.txt' | '/nxf-bucket/some/file-name.txt'
274271
}
275272

@@ -338,8 +335,6 @@ class AzPathTest extends Specification {
338335
base | path | expected
339336
'/bucket/some/path' | 'file-name.txt' | '/bucket/some/file-name.txt'
340337
'/bucket/data' | 'path/file-name.txt' | '/bucket/path/file-name.txt'
341-
'/bucket/data' | '/other/file-name.txt' | '/other/file-name.txt'
342-
'/bucket' | 'some/file-name.txt' | '/some/file-name.txt'
343338
}
344339

345340
@Unroll

0 commit comments

Comments
 (0)