3737
3838#include "ocfs2.h"
3939
40+ #include "aops.h"
4041#include "dlmglue.h"
4142#include "file.h"
4243#include "inode.h"
4344#include "mmap.h"
4445
46+ static inline int ocfs2_vm_op_block_sigs (sigset_t * blocked , sigset_t * oldset )
47+ {
48+ /* The best way to deal with signals in the vm path is
49+ * to block them upfront, rather than allowing the
50+ * locking paths to return -ERESTARTSYS. */
51+ sigfillset (blocked );
52+
53+ /* We should technically never get a bad return value
54+ * from sigprocmask */
55+ return sigprocmask (SIG_BLOCK , blocked , oldset );
56+ }
57+
58+ static inline int ocfs2_vm_op_unblock_sigs (sigset_t * oldset )
59+ {
60+ return sigprocmask (SIG_SETMASK , oldset , NULL );
61+ }
62+
4563static struct page * ocfs2_nopage (struct vm_area_struct * area ,
4664 unsigned long address ,
4765 int * type )
@@ -53,51 +71,152 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
5371 mlog_entry ("(area=%p, address=%lu, type=%p)\n" , area , address ,
5472 type );
5573
56- /* The best way to deal with signals in this path is
57- * to block them upfront, rather than allowing the
58- * locking paths to return -ERESTARTSYS. */
59- sigfillset (& blocked );
60-
61- /* We should technically never get a bad ret return
62- * from sigprocmask */
63- ret = sigprocmask (SIG_BLOCK , & blocked , & oldset );
74+ ret = ocfs2_vm_op_block_sigs (& blocked , & oldset );
6475 if (ret < 0 ) {
6576 mlog_errno (ret );
6677 goto out ;
6778 }
6879
6980 page = filemap_nopage (area , address , type );
7081
71- ret = sigprocmask ( SIG_SETMASK , & oldset , NULL );
82+ ret = ocfs2_vm_op_unblock_sigs ( & oldset );
7283 if (ret < 0 )
7384 mlog_errno (ret );
7485out :
7586 mlog_exit_ptr (page );
7687 return page ;
7788}
7889
79- static struct vm_operations_struct ocfs2_file_vm_ops = {
80- .nopage = ocfs2_nopage ,
81- };
90+ static int __ocfs2_page_mkwrite (struct inode * inode , struct buffer_head * di_bh ,
91+ struct page * page )
92+ {
93+ int ret ;
94+ struct address_space * mapping = inode -> i_mapping ;
95+ loff_t pos = page -> index << PAGE_CACHE_SHIFT ;
96+ unsigned int len = PAGE_CACHE_SIZE ;
97+ pgoff_t last_index ;
98+ struct page * locked_page = NULL ;
99+ void * fsdata ;
100+ loff_t size = i_size_read (inode );
82101
83- int ocfs2_mmap (struct file * file , struct vm_area_struct * vma )
102+ /*
103+ * Another node might have truncated while we were waiting on
104+ * cluster locks.
105+ */
106+ last_index = size >> PAGE_CACHE_SHIFT ;
107+ if (page -> index > last_index ) {
108+ ret = - EINVAL ;
109+ goto out ;
110+ }
111+
112+ /*
113+ * The i_size check above doesn't catch the case where nodes
114+ * truncated and then re-extended the file. We'll re-check the
115+ * page mapping after taking the page lock inside of
116+ * ocfs2_write_begin_nolock().
117+ */
118+ if (!PageUptodate (page ) || page -> mapping != inode -> i_mapping ) {
119+ ret = - EINVAL ;
120+ goto out ;
121+ }
122+
123+ /*
124+ * Call ocfs2_write_begin() and ocfs2_write_end() to take
125+ * advantage of the allocation code there. We pass a write
126+ * length of the whole page (chopped to i_size) to make sure
127+ * the whole thing is allocated.
128+ *
129+ * Since we know the page is up to date, we don't have to
130+ * worry about ocfs2_write_begin() skipping some buffer reads
131+ * because the "write" would invalidate their data.
132+ */
133+ if (page -> index == last_index )
134+ len = size & ~PAGE_CACHE_MASK ;
135+
136+ ret = ocfs2_write_begin_nolock (mapping , pos , len , 0 , & locked_page ,
137+ & fsdata , di_bh , page );
138+ if (ret ) {
139+ if (ret != - ENOSPC )
140+ mlog_errno (ret );
141+ goto out ;
142+ }
143+
144+ ret = ocfs2_write_end_nolock (mapping , pos , len , len , locked_page ,
145+ fsdata );
146+ if (ret < 0 ) {
147+ mlog_errno (ret );
148+ goto out ;
149+ }
150+ BUG_ON (ret != len );
151+ ret = 0 ;
152+ out :
153+ return ret ;
154+ }
155+
156+ static int ocfs2_page_mkwrite (struct vm_area_struct * vma , struct page * page )
84157{
85- int ret = 0 , lock_level = 0 ;
86- struct ocfs2_super * osb = OCFS2_SB (file -> f_dentry -> d_inode -> i_sb );
158+ struct inode * inode = vma -> vm_file -> f_path .dentry -> d_inode ;
159+ struct buffer_head * di_bh = NULL ;
160+ sigset_t blocked , oldset ;
161+ int ret , ret2 ;
162+
163+ ret = ocfs2_vm_op_block_sigs (& blocked , & oldset );
164+ if (ret < 0 ) {
165+ mlog_errno (ret );
166+ return ret ;
167+ }
168+
169+ /*
170+ * The cluster locks taken will block a truncate from another
171+ * node. Taking the data lock will also ensure that we don't
172+ * attempt page truncation as part of a downconvert.
173+ */
174+ ret = ocfs2_meta_lock (inode , & di_bh , 1 );
175+ if (ret < 0 ) {
176+ mlog_errno (ret );
177+ goto out ;
178+ }
87179
88180 /*
89- * Only support shared writeable mmap for local mounts which
90- * don't know about holes.
181+ * The alloc sem should be enough to serialize with
182+ * ocfs2_truncate_file() changing i_size as well as any thread
183+ * modifying the inode btree.
91184 */
92- if ((!ocfs2_mount_local (osb ) || ocfs2_sparse_alloc (osb )) &&
93- ((vma -> vm_flags & VM_SHARED ) || (vma -> vm_flags & VM_MAYSHARE )) &&
94- ((vma -> vm_flags & VM_WRITE ) || (vma -> vm_flags & VM_MAYWRITE ))) {
95- mlog (0 , "disallow shared writable mmaps %lx\n" , vma -> vm_flags );
96- /* This is -EINVAL because generic_file_readonly_mmap
97- * returns it in a similar situation. */
98- return - EINVAL ;
185+ down_write (& OCFS2_I (inode )-> ip_alloc_sem );
186+
187+ ret = ocfs2_data_lock (inode , 1 );
188+ if (ret < 0 ) {
189+ mlog_errno (ret );
190+ goto out_meta_unlock ;
99191 }
100192
193+ ret = __ocfs2_page_mkwrite (inode , di_bh , page );
194+
195+ ocfs2_data_unlock (inode , 1 );
196+
197+ out_meta_unlock :
198+ up_write (& OCFS2_I (inode )-> ip_alloc_sem );
199+
200+ brelse (di_bh );
201+ ocfs2_meta_unlock (inode , 1 );
202+
203+ out :
204+ ret2 = ocfs2_vm_op_unblock_sigs (& oldset );
205+ if (ret2 < 0 )
206+ mlog_errno (ret2 );
207+
208+ return ret ;
209+ }
210+
211+ static struct vm_operations_struct ocfs2_file_vm_ops = {
212+ .nopage = ocfs2_nopage ,
213+ .page_mkwrite = ocfs2_page_mkwrite ,
214+ };
215+
216+ int ocfs2_mmap (struct file * file , struct vm_area_struct * vma )
217+ {
218+ int ret = 0 , lock_level = 0 ;
219+
101220 ret = ocfs2_meta_lock_atime (file -> f_dentry -> d_inode ,
102221 file -> f_vfsmnt , & lock_level );
103222 if (ret < 0 ) {
0 commit comments