@@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
802802static int __bch2_buffered_write (struct bch_inode_info * inode ,
803803 struct address_space * mapping ,
804804 struct iov_iter * iter ,
805- loff_t pos , unsigned len ,
806- bool inode_locked )
805+ loff_t pos , unsigned len )
807806{
808807 struct bch_fs * c = inode -> v .i_sb -> s_fs_info ;
809808 struct bch2_folio_reservation res ;
@@ -827,15 +826,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
827826
828827 BUG_ON (!fs .nr );
829828
830- /*
831- * If we're not using the inode lock, we need to lock all the folios for
832- * atomiticity of writes vs. other writes:
833- */
834- if (!inode_locked && folio_end_pos (darray_last (fs )) < end ) {
835- ret = - BCH_ERR_need_inode_lock ;
836- goto out ;
837- }
838-
839829 f = darray_first (fs );
840830 if (pos != folio_pos (f ) && !folio_test_uptodate (f )) {
841831 ret = bch2_read_single_folio (f , mapping );
@@ -932,10 +922,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
932922 end = pos + copied ;
933923
934924 spin_lock (& inode -> v .i_lock );
935- if (end > inode -> v .i_size ) {
936- BUG_ON (!inode_locked );
925+ if (end > inode -> v .i_size )
937926 i_size_write (& inode -> v , end );
938- }
939927 spin_unlock (& inode -> v .i_lock );
940928
941929 f_pos = pos ;
@@ -979,68 +967,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
979967 struct file * file = iocb -> ki_filp ;
980968 struct address_space * mapping = file -> f_mapping ;
981969 struct bch_inode_info * inode = file_bch_inode (file );
982- loff_t pos ;
983- bool inode_locked = false;
984- ssize_t written = 0 , written2 = 0 , ret = 0 ;
985-
986- /*
987- * We don't take the inode lock unless i_size will be changing. Folio
988- * locks provide exclusion with other writes, and the pagecache add lock
989- * provides exclusion with truncate and hole punching.
990- *
991- * There is one nasty corner case where atomicity would be broken
992- * without great care: when copying data from userspace to the page
993- * cache, we do that with faults disable - a page fault would recurse
994- * back into the filesystem, taking filesystem locks again, and
995- * deadlock; so it's done with faults disabled, and we fault in the user
996- * buffer when we aren't holding locks.
997- *
998- * If we do part of the write, but we then race and in the userspace
999- * buffer have been evicted and are no longer resident, then we have to
1000- * drop our folio locks to re-fault them in, breaking write atomicity.
1001- *
1002- * To fix this, we restart the write from the start, if we weren't
1003- * holding the inode lock.
1004- *
1005- * There is another wrinkle after that; if we restart the write from the
1006- * start, and then get an unrecoverable error, we _cannot_ claim to
1007- * userspace that we did not write data we actually did - so we must
1008- * track (written2) the most we ever wrote.
1009- */
1010-
1011- if ((iocb -> ki_flags & IOCB_APPEND ) ||
1012- (iocb -> ki_pos + iov_iter_count (iter ) > i_size_read (& inode -> v ))) {
1013- inode_lock (& inode -> v );
1014- inode_locked = true;
1015- }
1016-
1017- ret = generic_write_checks (iocb , iter );
1018- if (ret <= 0 )
1019- goto unlock ;
1020-
1021- ret = file_remove_privs_flags (file , !inode_locked ? IOCB_NOWAIT : 0 );
1022- if (ret ) {
1023- if (!inode_locked ) {
1024- inode_lock (& inode -> v );
1025- inode_locked = true;
1026- ret = file_remove_privs_flags (file , 0 );
1027- }
1028- if (ret )
1029- goto unlock ;
1030- }
1031-
1032- ret = file_update_time (file );
1033- if (ret )
1034- goto unlock ;
1035-
1036- pos = iocb -> ki_pos ;
970+ loff_t pos = iocb -> ki_pos ;
971+ ssize_t written = 0 ;
972+ int ret = 0 ;
1037973
1038974 bch2_pagecache_add_get (inode );
1039975
1040- if (!inode_locked &&
1041- (iocb -> ki_pos + iov_iter_count (iter ) > i_size_read (& inode -> v )))
1042- goto get_inode_lock ;
1043-
1044976 do {
1045977 unsigned offset = pos & (PAGE_SIZE - 1 );
1046978 unsigned bytes = iov_iter_count (iter );
@@ -1065,17 +997,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
1065997 }
1066998 }
1067999
1068- if (unlikely (bytes != iov_iter_count (iter ) && !inode_locked ))
1069- goto get_inode_lock ;
1070-
10711000 if (unlikely (fatal_signal_pending (current ))) {
10721001 ret = - EINTR ;
10731002 break ;
10741003 }
10751004
1076- ret = __bch2_buffered_write (inode , mapping , iter , pos , bytes , inode_locked );
1077- if (ret == - BCH_ERR_need_inode_lock )
1078- goto get_inode_lock ;
1005+ ret = __bch2_buffered_write (inode , mapping , iter , pos , bytes );
10791006 if (unlikely (ret < 0 ))
10801007 break ;
10811008
@@ -1096,46 +1023,50 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
10961023 }
10971024 pos += ret ;
10981025 written += ret ;
1099- written2 = max (written , written2 );
1100-
1101- if (ret != bytes && !inode_locked )
1102- goto get_inode_lock ;
11031026 ret = 0 ;
11041027
11051028 balance_dirty_pages_ratelimited (mapping );
1106-
1107- if (0 ) {
1108- get_inode_lock :
1109- bch2_pagecache_add_put (inode );
1110- inode_lock (& inode -> v );
1111- inode_locked = true;
1112- bch2_pagecache_add_get (inode );
1113-
1114- iov_iter_revert (iter , written );
1115- pos -= written ;
1116- written = 0 ;
1117- ret = 0 ;
1118- }
11191029 } while (iov_iter_count (iter ));
1120- bch2_pagecache_add_put (inode );
1121- unlock :
1122- if (inode_locked )
1123- inode_unlock (& inode -> v );
11241030
1125- iocb -> ki_pos += written ;
1031+ bch2_pagecache_add_put ( inode ) ;
11261032
1127- ret = max (written , written2 ) ?: ret ;
1128- if (ret > 0 )
1129- ret = generic_write_sync (iocb , ret );
1130- return ret ;
1033+ return written ? written : ret ;
11311034}
11321035
1133- ssize_t bch2_write_iter (struct kiocb * iocb , struct iov_iter * iter )
1036+ ssize_t bch2_write_iter (struct kiocb * iocb , struct iov_iter * from )
11341037{
1135- ssize_t ret = iocb -> ki_flags & IOCB_DIRECT
1136- ? bch2_direct_write (iocb , iter )
1137- : bch2_buffered_write (iocb , iter );
1038+ struct file * file = iocb -> ki_filp ;
1039+ struct bch_inode_info * inode = file_bch_inode (file );
1040+ ssize_t ret ;
1041+
1042+ if (iocb -> ki_flags & IOCB_DIRECT ) {
1043+ ret = bch2_direct_write (iocb , from );
1044+ goto out ;
1045+ }
1046+
1047+ inode_lock (& inode -> v );
1048+
1049+ ret = generic_write_checks (iocb , from );
1050+ if (ret <= 0 )
1051+ goto unlock ;
1052+
1053+ ret = file_remove_privs (file );
1054+ if (ret )
1055+ goto unlock ;
1056+
1057+ ret = file_update_time (file );
1058+ if (ret )
1059+ goto unlock ;
1060+
1061+ ret = bch2_buffered_write (iocb , from );
1062+ if (likely (ret > 0 ))
1063+ iocb -> ki_pos += ret ;
1064+ unlock :
1065+ inode_unlock (& inode -> v );
11381066
1067+ if (ret > 0 )
1068+ ret = generic_write_sync (iocb , ret );
1069+ out :
11391070 return bch2_err_class (ret );
11401071}
11411072
0 commit comments