11
11
#include <linux/swap.h>
12
12
#include <linux/swapops.h>
13
13
#include <linux/pagemap.h>
14
+ #include <linux/pagevec.h>
14
15
#include <linux/mempolicy.h>
15
16
#include <linux/syscalls.h>
16
17
#include <linux/sched.h>
17
18
#include <linux/export.h>
18
19
#include <linux/rmap.h>
19
20
#include <linux/mmzone.h>
20
21
#include <linux/hugetlb.h>
22
+ #include <linux/memcontrol.h>
23
+ #include <linux/mm_inline.h>
21
24
22
25
#include "internal.h"
23
26
@@ -87,6 +90,47 @@ void mlock_vma_page(struct page *page)
87
90
}
88
91
}
89
92
93
+ /*
94
+ * Finish munlock after successful page isolation
95
+ *
96
+ * Page must be locked. This is a wrapper for try_to_munlock()
97
+ * and putback_lru_page() with munlock accounting.
98
+ */
99
+ static void __munlock_isolated_page (struct page * page )
100
+ {
101
+ int ret = SWAP_AGAIN ;
102
+
103
+ /*
104
+ * Optimization: if the page was mapped just once, that's our mapping
105
+ * and we don't need to check all the other vmas.
106
+ */
107
+ if (page_mapcount (page ) > 1 )
108
+ ret = try_to_munlock (page );
109
+
110
+ /* Did try_to_unlock() succeed or punt? */
111
+ if (ret != SWAP_MLOCK )
112
+ count_vm_event (UNEVICTABLE_PGMUNLOCKED );
113
+
114
+ putback_lru_page (page );
115
+ }
116
+
117
+ /*
118
+ * Accounting for page isolation fail during munlock
119
+ *
120
+ * Performs accounting when page isolation fails in munlock. There is nothing
121
+ * else to do because it means some other task has already removed the page
122
+ * from the LRU. putback_lru_page() will take care of removing the page from
123
+ * the unevictable list, if necessary. vmscan [page_referenced()] will move
124
+ * the page back to the unevictable list if some other vma has it mlocked.
125
+ */
126
+ static void __munlock_isolation_failed (struct page * page )
127
+ {
128
+ if (PageUnevictable (page ))
129
+ count_vm_event (UNEVICTABLE_PGSTRANDED );
130
+ else
131
+ count_vm_event (UNEVICTABLE_PGMUNLOCKED );
132
+ }
133
+
90
134
/**
91
135
* munlock_vma_page - munlock a vma page
92
136
* @page - page to be unlocked
@@ -112,37 +156,10 @@ unsigned int munlock_vma_page(struct page *page)
112
156
unsigned int nr_pages = hpage_nr_pages (page );
113
157
mod_zone_page_state (page_zone (page ), NR_MLOCK , - nr_pages );
114
158
page_mask = nr_pages - 1 ;
115
- if (!isolate_lru_page (page )) {
116
- int ret = SWAP_AGAIN ;
117
-
118
- /*
119
- * Optimization: if the page was mapped just once,
120
- * that's our mapping and we don't need to check all the
121
- * other vmas.
122
- */
123
- if (page_mapcount (page ) > 1 )
124
- ret = try_to_munlock (page );
125
- /*
126
- * did try_to_unlock() succeed or punt?
127
- */
128
- if (ret != SWAP_MLOCK )
129
- count_vm_event (UNEVICTABLE_PGMUNLOCKED );
130
-
131
- putback_lru_page (page );
132
- } else {
133
- /*
134
- * Some other task has removed the page from the LRU.
135
- * putback_lru_page() will take care of removing the
136
- * page from the unevictable list, if necessary.
137
- * vmscan [page_referenced()] will move the page back
138
- * to the unevictable list if some other vma has it
139
- * mlocked.
140
- */
141
- if (PageUnevictable (page ))
142
- count_vm_event (UNEVICTABLE_PGSTRANDED );
143
- else
144
- count_vm_event (UNEVICTABLE_PGMUNLOCKED );
145
- }
159
+ if (!isolate_lru_page (page ))
160
+ __munlock_isolated_page (page );
161
+ else
162
+ __munlock_isolation_failed (page );
146
163
}
147
164
148
165
return page_mask ;
@@ -209,6 +226,73 @@ static int __mlock_posix_error_return(long retval)
209
226
return retval ;
210
227
}
211
228
229
+ /*
230
+ * Munlock a batch of pages from the same zone
231
+ *
232
+ * The work is split to two main phases. First phase clears the Mlocked flag
233
+ * and attempts to isolate the pages, all under a single zone lru lock.
234
+ * The second phase finishes the munlock only for pages where isolation
235
+ * succeeded.
236
+ *
237
+ * Note that pvec is modified during the process. Before returning
238
+ * pagevec_reinit() is called on it.
239
+ */
240
+ static void __munlock_pagevec (struct pagevec * pvec , struct zone * zone )
241
+ {
242
+ int i ;
243
+ int nr = pagevec_count (pvec );
244
+
245
+ /* Phase 1: page isolation */
246
+ spin_lock_irq (& zone -> lru_lock );
247
+ for (i = 0 ; i < nr ; i ++ ) {
248
+ struct page * page = pvec -> pages [i ];
249
+
250
+ if (TestClearPageMlocked (page )) {
251
+ struct lruvec * lruvec ;
252
+ int lru ;
253
+
254
+ /* we have disabled interrupts */
255
+ __mod_zone_page_state (zone , NR_MLOCK , -1 );
256
+
257
+ if (PageLRU (page )) {
258
+ lruvec = mem_cgroup_page_lruvec (page , zone );
259
+ lru = page_lru (page );
260
+
261
+ get_page (page );
262
+ ClearPageLRU (page );
263
+ del_page_from_lru_list (page , lruvec , lru );
264
+ } else {
265
+ __munlock_isolation_failed (page );
266
+ goto skip_munlock ;
267
+ }
268
+
269
+ } else {
270
+ skip_munlock :
271
+ /*
272
+ * We won't be munlocking this page in the next phase
273
+ * but we still need to release the follow_page_mask()
274
+ * pin.
275
+ */
276
+ pvec -> pages [i ] = NULL ;
277
+ put_page (page );
278
+ }
279
+ }
280
+ spin_unlock_irq (& zone -> lru_lock );
281
+
282
+ /* Phase 2: page munlock and putback */
283
+ for (i = 0 ; i < nr ; i ++ ) {
284
+ struct page * page = pvec -> pages [i ];
285
+
286
+ if (page ) {
287
+ lock_page (page );
288
+ __munlock_isolated_page (page );
289
+ unlock_page (page );
290
+ put_page (page ); /* pin from follow_page_mask() */
291
+ }
292
+ }
293
+ pagevec_reinit (pvec );
294
+ }
295
+
212
296
/*
213
297
* munlock_vma_pages_range() - munlock all pages in the vma range.'
214
298
* @vma - vma containing range to be munlock()ed.
@@ -230,11 +314,16 @@ static int __mlock_posix_error_return(long retval)
230
314
void munlock_vma_pages_range (struct vm_area_struct * vma ,
231
315
unsigned long start , unsigned long end )
232
316
{
317
+ struct pagevec pvec ;
318
+ struct zone * zone = NULL ;
319
+
320
+ pagevec_init (& pvec , 0 );
233
321
vma -> vm_flags &= ~VM_LOCKED ;
234
322
235
323
while (start < end ) {
236
324
struct page * page ;
237
325
unsigned int page_mask , page_increm ;
326
+ struct zone * pagezone ;
238
327
239
328
/*
240
329
* Although FOLL_DUMP is intended for get_dump_page(),
@@ -246,20 +335,47 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
246
335
page = follow_page_mask (vma , start , FOLL_GET | FOLL_DUMP ,
247
336
& page_mask );
248
337
if (page && !IS_ERR (page )) {
249
- lock_page (page );
250
- /*
251
- * Any THP page found by follow_page_mask() may have
252
- * gotten split before reaching munlock_vma_page(),
253
- * so we need to recompute the page_mask here.
254
- */
255
- page_mask = munlock_vma_page (page );
256
- unlock_page (page );
257
- put_page (page );
338
+ pagezone = page_zone (page );
339
+ /* The whole pagevec must be in the same zone */
340
+ if (pagezone != zone ) {
341
+ if (pagevec_count (& pvec ))
342
+ __munlock_pagevec (& pvec , zone );
343
+ zone = pagezone ;
344
+ }
345
+ if (PageTransHuge (page )) {
346
+ /*
347
+ * THP pages are not handled by pagevec due
348
+ * to their possible split (see below).
349
+ */
350
+ if (pagevec_count (& pvec ))
351
+ __munlock_pagevec (& pvec , zone );
352
+ lock_page (page );
353
+ /*
354
+ * Any THP page found by follow_page_mask() may
355
+ * have gotten split before reaching
356
+ * munlock_vma_page(), so we need to recompute
357
+ * the page_mask here.
358
+ */
359
+ page_mask = munlock_vma_page (page );
360
+ unlock_page (page );
361
+ put_page (page ); /* follow_page_mask() */
362
+ } else {
363
+ /*
364
+ * Non-huge pages are handled in batches
365
+ * via pagevec. The pin from
366
+ * follow_page_mask() prevents them from
367
+ * collapsing by THP.
368
+ */
369
+ if (pagevec_add (& pvec , page ) == 0 )
370
+ __munlock_pagevec (& pvec , zone );
371
+ }
258
372
}
259
373
page_increm = 1 + (~(start >> PAGE_SHIFT ) & page_mask );
260
374
start += page_increm * PAGE_SIZE ;
261
375
cond_resched ();
262
376
}
377
+ if (pagevec_count (& pvec ))
378
+ __munlock_pagevec (& pvec , zone );
263
379
}
264
380
265
381
/*
0 commit comments