November 10, 2015

file write with O_SYNC enabled

Reading one of the questions on stackexchange, thought to verify it with ftrace. So, enabled function tracing for write(2) syscall as below to capture code flow
root@test1:/sys/kernel/debug/tracing# echo SyS_write > set_graph_function 
root@test1:/sys/kernel/debug/tracing# echo function_graph > current_tracer 
root@test1:/sys/kernel/debug/tracing# echo 1 > tracing_on
At this point, I had a simple program that open's a file with O_SYNC flag and performs a write (see below strace output of program)
open("/tmp/testing_o_sync", O_WRONLY|O_CREAT|O_SYNC, 0644) = 3
write(3, "This write has to go to disk dir"..., 100) = 100
Below is the edited version of captured ftrace output (minimal details on un-interesting functions; first column is line number)
     5  1)               |  SyS_write() {
     .
     9  1)               |    vfs_write() {
    10  1)               |      rw_verify_area() {
    14  1)   1.736 us    |      }
    15  1)   0.176 us    |      __sb_start_write();
    30  1)               |      new_sync_write() {
    31  1)   0.105 us    |        iov_iter_init();
    32  1)               |        ext4_file_write_iter [ext4]() {
    33  1)               |          mutex_lock() {
    34  1)   0.100 us    |            _cond_resched();
    35  1)   0.898 us    |          }
    36  1)               |          __generic_file_write_iter() {  # http://lxr.free-electrons.com/source/mm/filemap.c?v=3.16#L2518 
     # As per above source link, most work of writing to file done in this function, however it does not handle O_SYNC
     # Syncing of data need to be handled by caller 
    37  1)   0.105 us    |            file_remove_suid();
    38  1)               |            file_update_time() {
    39  1)               |              current_fs_time() {
    40  1)   0.230 us    |                current_kernel_time();
    41  1)   0.999 us    |              }
    42  1)               |              __mnt_want_write_file() {
    43  1)   0.146 us    |                mnt_clone_write();
    44  1)   1.048 us    |              }
    45  1)               |              update_time() {
    46  1)               |                __mark_inode_dirty() {
    47  1)               |                  ext4_dirty_inode [ext4]() {
   146  1) + 74.959 us   |                  }  # ext4_dirty_inode _ends_here_
   147  1)   0.120 us    |                  _raw_spin_lock();
   148  1)   0.096 us    |                  sb_is_blkdev_sb();
   149  1)   0.276 us    |                  _raw_spin_lock();
   150  1) + 79.373 us   |                }
   151  1) + 80.631 us   |              }
   152  1)   0.176 us    |              __mnt_drop_write_file();
   153  1) + 85.662 us   |            } # file_update_time _ends_here_
   154  1)               |            generic_perform_write() {
   155  1)   0.140 us    |              iov_iter_fault_in_readable();
   156  1)               |              ext4_da_write_begin [ext4]() {
   192  1) + 25.083 us   |              } # ext4_da_write_begin _ends_here_
   193  1)   0.442 us    |              iov_iter_copy_from_user_atomic();
   194  1)               |              ext4_da_write_end [ext4]() {
   234  1) + 22.415 us   |              } # ext4_da_write_end _ends_here_
   235  1)   0.090 us    |              _cond_resched();
   236  1)   0.111 us    |              iov_iter_advance();
   237  1)   0.436 us    |              balance_dirty_pages_ratelimited();
   238  1) + 54.575 us   |            } # generic_perform_write _ends_here_
   239  1) ! 143.216 us  |          }
   240  1)   0.125 us    |          mutex_unlock();
   241  1)               |          vfs_fsync_range() { # http://lxr.free-electrons.com/source/include/linux/fs.h?v=3.16#L2233
     # Called to flush files with O_SYNC flag. vfs_fsync_range() is called from generic_write_sync function and 
     # calls respective fsync_method, as seen below
178 int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
179 {
180         if (!file->f_op->fsync)
181                 return -EINVAL;
182         return file->f_op->fsync(file, start, end, datasync);
183 }
   242  1)               |            ext4_sync_file [ext4]() {
   243  1)               |              filemap_write_and_wait_range() {
   244  1)               |                __filemap_fdatawrite_range() {
   245  1)               |                  do_writepages() {
   246  1)               |                    ext4_writepages [ext4]() {
   247  1)   0.211 us    |                      mapping_tagged();
   248  1)   0.096 us    |                      ext4_has_inline_data [ext4]();
   249  1)   0.090 us    |                      ext4_io_submit_init [ext4]();
   254  1)   0.105 us    |                      blk_start_plug();
   255  1)               |                      ext4_init_io_end [ext4]() {
       .
   261  1)   5.532 us    |                      }
   262  1)   0.095 us    |                      jbd2_journal_blocks_per_page [jbd2]();
   263  1)               |                      ext4_meta_trans_blocks [ext4]() {
       .
   267  1)   2.473 us    |                      }
   268  1)               |                      __ext4_journal_start_sb [ext4]() {
   280  1)   7.227 us    |                      }
   281  1)               |                      mpage_prepare_extent_to_map [ext4]() {
       .
   360  1) + 57.845 us   |                      } # mpage_prepare_extent_to_map _ends_here_
   361  1)               |                      __ext4_journal_stop [ext4]() {
       .
   370  1)   5.121 us    |                      }
   371  1)               |                      ext4_io_submit [ext4]() {
   372  1)               |                        submit_bio() {
   373  1)               |                          generic_make_request() {
       .
   517  1) ! 116.884 us  |                          }
   518  1) ! 117.787 us  |                        }  # submit_bio _ends_here_
   519  1)   0.135 us    |                        bio_put();
   520  1) ! 119.573 us  |                      }
   521  1)   0.286 us    |                      mpage_release_unused_pages [ext4]();
   522  1)   0.161 us    |                      ext4_put_io_end [ext4]();
   523  1)               |                      blk_finish_plug() {
   524  1)               |                        blk_flush_plug_list() {
       .
   706  1) ! 141.451 us  |                        }
   707  1) ! 142.228 us  |                      }  # blk_finish_plug _ends_here_
   708  1) ! 355.522 us  |                    }
   709  1) ! 356.280 us  |                  }  # do_writepages _ends_here_
   710  1) ! 357.167 us  |                }
   711  1)               |                filemap_fdatawait_range() {
      .
   816  1) ! 154.310 us  |                }
   817  1) ! 513.164 us  |              } # filemap_write_and_wait_range _ends_here_
   818  1)               |              jbd2_trans_will_send_data_barrier [jbd2]() {
   819  1)   0.150 us    |                _raw_read_lock();
   820  1)   0.973 us    |              }
   821  1)               |              jbd2_complete_transaction [jbd2]() {
   877  1)               |                jbd2_log_wait_commit [jbd2]() {
      .
  1130  1) ! 25593.78 us |                } # jbd2_log_wait_commit _ends_here_
  1131  1) ! 25627.76 us |              } # jbd2_complete_transaction _ends_here_
  1132  1) ! 26144.90 us |            } # ext4_sync_file _ends_here
  1133  1) ! 26145.69 us |          }
  1134  1) ! 26293.56 us |        }
  1135  1) ! 26294.93 us |      }
  1136  1)               |      __fsnotify_parent() {
     .
  1165  1)   9.083 us    |      }
  1166  1)               |      fsnotify() {
  1169  1)   0.858 us    |      }
  1170  1)   0.191 us    |      __sb_end_write();
  1171  1) ! 26321.44 us |    } # vfs_write _ends_here_
  1172  1) ! 26324.27 us |  }  # SyS_write _ends_here_
2233 static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
2234 {
2235         if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
2236                 return 0;
2237         return vfs_fsync_range(file, pos, pos + count - 1,
2238                                (file->f_flags & __O_SYNC) ? 0 : 1);
2239 }
As seen in above function, it returns 0 for buffered writes and calls 
vfs_fsync_range for file writes with O_SYNC flag

1 comment:

  1. I am the guy who asked the question.
    Just wanted to thank you verry much for this!!!!

    ReplyDelete