Skip to content

Commit f8d3aae

Browse files
committed
When having writeback cache enabled it is beneficial for data consistency
to communicate to the FUSE server when the kernel prepares a page for caching. This lets the FUSE server react and lock the page. The kernel actually asks the FUSE server how much data it has locked by the same call. If the feature is not supported it will disabled after the first. 1. Add DLM_LOCK fuse opcode 1. Add cache page lock caching for writeback cache functionality. This means sending out a FUSE call whenever the kernel prepares a page for writeback cache. The kernel will manage the cache so that it will keep track of already acquired locks. (except for the case that is documented in the code) 2. Use rb-trees for the management of the already 'locked' page ranges 3. Use rw_semaphore for synchronization in fuse_dlm_cache
1 parent 0a4c668 commit f8d3aae

File tree

8 files changed

+118
-213
lines changed

8 files changed

+118
-213
lines changed

fs/fuse/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
1010
obj-$(CONFIG_CUSE) += cuse.o
1111
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
1212

13-
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
13+
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse_dlm_cache.o
1414
fuse-y += iomode.o
1515
fuse-$(CONFIG_FUSE_DAX) += dax.o
1616
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o

fs/fuse/file.c

Lines changed: 6 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include "fuse_i.h"
10+
#include "fuse_dlm_cache.h"
1011

1112
#include <linux/pagemap.h>
1213
#include <linux/slab.h>
@@ -1401,150 +1402,6 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
14011402
}
14021403
}
14031404

1404-
static bool fuse_dlm_locked(struct file* file, loff_t offset, size_t length)
1405-
{
1406-
struct inode *inode = file_inode(file);
1407-
struct fuse_conn *fc = get_fuse_conn(inode);
1408-
struct fuse_inode *fi = get_fuse_inode(inode);
1409-
struct dlm_locked_area *area;
1410-
1411-
/* if dlm is not supported by fuse server, don't bother */
1412-
if (fc->no_dlm)
1413-
return true;
1414-
1415-
/* check the locked areas for the given offset and length */
1416-
list_for_each_entry(area, &fi->dlm_locked_areas, list) {
1417-
loff_t current_area_start_offset = area->offset;
1418-
loff_t current_area_end_offset = area->offset + area->size;
1419-
loff_t lock_end_offset = offset + length;
1420-
loff_t lock_start_offset = offset;
1421-
1422-
/* check if the locked areas are completely distinct, then we should continue */
1423-
if (current_area_end_offset < lock_start_offset
1424-
|| current_area_start_offset > lock_end_offset)
1425-
continue;
1426-
1427-
/* check if the given offset and length completely overlaps with the current area */
1428-
if (current_area_start_offset <= lock_start_offset
1429-
&& current_area_end_offset >= lock_end_offset) {
1430-
return true;
1431-
}
1432-
1433-
/* lock area has segment after the current area */
1434-
if(current_area_start_offset < lock_start_offset
1435-
&& current_area_end_offset > lock_start_offset
1436-
&& current_area_end_offset < lock_end_offset) {
1437-
offset = current_area_end_offset;
1438-
length = lock_end_offset - current_area_end_offset;
1439-
/* check all other areas for the part at the end of the locked area */
1440-
return fuse_dlm_locked(file, offset, length);
1441-
}
1442-
1443-
/* lock area has segment before the current area */
1444-
if (lock_start_offset < current_area_start_offset
1445-
&& lock_end_offset > current_area_start_offset
1446-
&& lock_end_offset < current_area_end_offset) {
1447-
offset = lock_start_offset;
1448-
length = current_area_start_offset - lock_start_offset;
1449-
/* check all other areas for the rest of the part */
1450-
return fuse_dlm_locked(file, offset, length);
1451-
}
1452-
1453-
/* If the lock area is larger than the current area, continue, some other areas might match partially
1454-
* If they don't we return false and the bigger chunk will be locked and merged with the partially matching one anyway.
1455-
* This is a case the fuse server has to be able to handle.
1456-
*/
1457-
}
1458-
return false;
1459-
}
1460-
1461-
/**
1462-
* check if the given offset and length extends the already locked area or we have to create a new area
1463-
*/
1464-
static void check_and_add_locked_area(struct fuse_inode *fi, loff_t offset, size_t length) {
1465-
struct dlm_locked_area *area;
1466-
1467-
spin_lock(&fi->lock);
1468-
/* iterate through the areas */
1469-
list_for_each_entry(area, &fi->dlm_locked_areas, list) {
1470-
loff_t current_area_start_offset = area->offset;
1471-
loff_t current_area_end_offset = area->offset + area->size;
1472-
loff_t lock_end_offset = offset + length;
1473-
loff_t lock_start_offset = offset;
1474-
1475-
/* if we have overlap, extend the locked area */
1476-
if (lock_start_offset >= current_area_start_offset && lock_start_offset <= current_area_end_offset) {
1477-
area->offset = min(current_area_start_offset, lock_start_offset);
1478-
area->size = max(current_area_end_offset, lock_end_offset) - area->offset;
1479-
spin_unlock(&fi->lock);
1480-
return;
1481-
}
1482-
}
1483-
1484-
/* create a new locked area */
1485-
area = kmalloc(sizeof(struct dlm_locked_area), GFP_KERNEL);
1486-
/* note that there is no way a failure of kmalloc here can be
1487-
* reported to userspace. */
1488-
if (area) {
1489-
area->offset = offset;
1490-
area->size = length;
1491-
list_add_tail(&area->list, &fi->dlm_locked_areas);
1492-
}
1493-
spin_unlock(&fi->lock);
1494-
}
1495-
1496-
/**
1497-
* request a dlm lock from the fuse server
1498-
*/
1499-
static void fuse_get_dlm_write_lock(struct file *file, loff_t offset, size_t length)
1500-
{
1501-
struct fuse_file *ff = file->private_data;
1502-
struct inode *inode = file_inode(file);
1503-
struct fuse_conn *fc = get_fuse_conn(inode);
1504-
struct fuse_inode *fi = get_fuse_inode(inode);
1505-
struct fuse_mount *fm = ff->fm;
1506-
1507-
FUSE_ARGS(args);
1508-
struct fuse_dlm_lock_in inarg;
1509-
struct fuse_dlm_lock_out outarg;
1510-
int err;
1511-
1512-
/* note that the offset and length don't have to be page aligned here
1513-
but since we only get here on writeback caching we will send out
1514-
page aligned requests */
1515-
offset &= PAGE_MASK;
1516-
length = PAGE_ALIGN(offset + length) - offset;
1517-
1518-
if (fuse_dlm_locked(file, offset, length))
1519-
return; /* we already have this area locked */
1520-
1521-
memset(&inarg, 0, sizeof(inarg));
1522-
inarg.fh = ff->fh;
1523-
1524-
inarg.offset = offset;
1525-
inarg.size = length;
1526-
inarg.type = FUSE_DLM_LOCK_WRITE;
1527-
1528-
args.opcode = FUSE_DLM_LOCK;
1529-
args.nodeid = get_node_id(inode);
1530-
args.in_numargs = 1;
1531-
args.in_args[0].size = sizeof(inarg);
1532-
args.in_args[0].value = &inarg;
1533-
args.out_numargs = 1;
1534-
args.out_args[0].size = sizeof(outarg);
1535-
args.out_args[0].value = &outarg;
1536-
err = fuse_simple_request(fm, &args);
1537-
if (err == -ENOSYS) {
1538-
/* fuse server told us it does not support dlm, save the info */
1539-
fc->no_dlm = 1;
1540-
}
1541-
1542-
if (err || outarg.locksize < length)
1543-
return;
1544-
else
1545-
check_and_add_locked_area(fi, offset, outarg.locksize);
1546-
}
1547-
15481405
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
15491406
{
15501407
struct file *file = iocb->ki_filp;
@@ -1553,8 +1410,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
15531410
struct inode *inode = mapping->host;
15541411
ssize_t err;
15551412
struct fuse_conn *fc = get_fuse_conn(inode);
1556-
loff_t pos = iocb->ki_pos;
1557-
size_t length = iov_iter_count(from);
15581413

15591414
if (fc->writeback_cache) {
15601415
/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1571,9 +1426,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
15711426

15721427
/* if we have dlm support acquire the lock for the area
15731428
* we are writing into */
1574-
if (!fc->no_dlm)
1429+
if (!fc->no_dlm) {
1430+
loff_t pos = iocb->ki_pos;
1431+
size_t length = iov_iter_count(from);
15751432
fuse_get_dlm_write_lock(file, pos, length);
1576-
1433+
}
15771434
return generic_file_write_iter(iocb, from);
15781435
}
15791436

@@ -3487,7 +3344,7 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
34873344

34883345
INIT_LIST_HEAD(&fi->write_files);
34893346
INIT_LIST_HEAD(&fi->queued_writes);
3490-
INIT_LIST_HEAD(&fi->dlm_locked_areas);
3347+
fuse_dlm_cache_init(&fi->dlm_locked_areas);
34913348
fi->writectr = 0;
34923349
fi->iocachectr = 0;
34933350
init_waitqueue_head(&fi->page_waitq);

0 commit comments

Comments
 (0)