Reading one of the questions on
stackexchange, thought to verify it with ftrace. So, enabled function tracing for write(2) syscall as below to capture code flow
root@test1:/sys/kernel/debug/tracing# echo SyS_write > set_graph_function
root@test1:/sys/kernel/debug/tracing# echo function_graph > current_tracer
root@test1:/sys/kernel/debug/tracing# echo 1 > tracing_on
At this point, I had a simple program that open's a file with O_SYNC flag and performs a write (see below strace output of program)
open("/tmp/testing_o_sync", O_WRONLY|O_CREAT|O_SYNC, 0644) = 3
write(3, "This write has to go to disk dir"..., 100) = 100
Below is the edited version of captured ftrace output (minimal details on un-interesting functions; first column is line number)
5 1) | SyS_write() {
.
9 1) | vfs_write() {
10 1) | rw_verify_area() {
14 1) 1.736 us | }
15 1) 0.176 us | __sb_start_write();
30 1) | new_sync_write() {
31 1) 0.105 us | iov_iter_init();
32 1) | ext4_file_write_iter [ext4]() {
33 1) | mutex_lock() {
34 1) 0.100 us | _cond_resched();
35 1) 0.898 us | }
36 1) | __generic_file_write_iter() { # http://lxr.free-electrons.com/source/mm/filemap.c?v=3.16#L2518
# As per above source link, most work of writing to file done in this function, however it does not handle O_SYNC
# Syncing of data need to be handled by caller
37 1) 0.105 us | file_remove_suid();
38 1) | file_update_time() {
39 1) | current_fs_time() {
40 1) 0.230 us | current_kernel_time();
41 1) 0.999 us | }
42 1) | __mnt_want_write_file() {
43 1) 0.146 us | mnt_clone_write();
44 1) 1.048 us | }
45 1) | update_time() {
46 1) | __mark_inode_dirty() {
47 1) | ext4_dirty_inode [ext4]() {
146 1) + 74.959 us | } # ext4_dirty_inode _ends_here_
147 1) 0.120 us | _raw_spin_lock();
148 1) 0.096 us | sb_is_blkdev_sb();
149 1) 0.276 us | _raw_spin_lock();
150 1) + 79.373 us | }
151 1) + 80.631 us | }
152 1) 0.176 us | __mnt_drop_write_file();
153 1) + 85.662 us | } # file_update_time _ends_here_
154 1) | generic_perform_write() {
155 1) 0.140 us | iov_iter_fault_in_readable();
156 1) | ext4_da_write_begin [ext4]() {
192 1) + 25.083 us | } # ext4_da_write_begin _ends_here_
193 1) 0.442 us | iov_iter_copy_from_user_atomic();
194 1) | ext4_da_write_end [ext4]() {
234 1) + 22.415 us | } # ext4_da_write_end _ends_here_
235 1) 0.090 us | _cond_resched();
236 1) 0.111 us | iov_iter_advance();
237 1) 0.436 us | balance_dirty_pages_ratelimited();
238 1) + 54.575 us | } # generic_perform_write _ends_here_
239 1) ! 143.216 us | }
240 1) 0.125 us | mutex_unlock();
241 1) | vfs_fsync_range() { # http://lxr.free-electrons.com/source/include/linux/fs.h?v=3.16#L2233
# Called to flush files with O_SYNC flag. vfs_fsync_range() is called from generic_write_sync function and
# calls respective fsync_method, as seen below
178 int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
179 {
180 if (!file->f_op->fsync)
181 return -EINVAL;
182 return file->f_op->fsync(file, start, end, datasync);
183 }
242 1) | ext4_sync_file [ext4]() {
243 1) | filemap_write_and_wait_range() {
244 1) | __filemap_fdatawrite_range() {
245 1) | do_writepages() {
246 1) | ext4_writepages [ext4]() {
247 1) 0.211 us | mapping_tagged();
248 1) 0.096 us | ext4_has_inline_data [ext4]();
249 1) 0.090 us | ext4_io_submit_init [ext4]();
254 1) 0.105 us | blk_start_plug();
255 1) | ext4_init_io_end [ext4]() {
.
261 1) 5.532 us | }
262 1) 0.095 us | jbd2_journal_blocks_per_page [jbd2]();
263 1) | ext4_meta_trans_blocks [ext4]() {
.
267 1) 2.473 us | }
268 1) | __ext4_journal_start_sb [ext4]() {
280 1) 7.227 us | }
281 1) | mpage_prepare_extent_to_map [ext4]() {
.
360 1) + 57.845 us | } # mpage_prepare_extent_to_map _ends_here_
361 1) | __ext4_journal_stop [ext4]() {
.
370 1) 5.121 us | }
371 1) | ext4_io_submit [ext4]() {
372 1) | submit_bio() {
373 1) | generic_make_request() {
.
517 1) ! 116.884 us | }
518 1) ! 117.787 us | } # submit_bio _ends_here_
519 1) 0.135 us | bio_put();
520 1) ! 119.573 us | }
521 1) 0.286 us | mpage_release_unused_pages [ext4]();
522 1) 0.161 us | ext4_put_io_end [ext4]();
523 1) | blk_finish_plug() {
524 1) | blk_flush_plug_list() {
.
706 1) ! 141.451 us | }
707 1) ! 142.228 us | } # blk_finish_plug _ends_here_
708 1) ! 355.522 us | }
709 1) ! 356.280 us | } # do_writepages _ends_here_
710 1) ! 357.167 us | }
711 1) | filemap_fdatawait_range() {
.
816 1) ! 154.310 us | }
817 1) ! 513.164 us | } # filemap_write_and_wait_range _ends_here_
818 1) | jbd2_trans_will_send_data_barrier [jbd2]() {
819 1) 0.150 us | _raw_read_lock();
820 1) 0.973 us | }
821 1) | jbd2_complete_transaction [jbd2]() {
877 1) | jbd2_log_wait_commit [jbd2]() {
.
1130 1) ! 25593.78 us | } # jbd2_log_wait_commit _ends_here_
1131 1) ! 25627.76 us | } # jbd2_complete_transaction _ends_here_
1132 1) ! 26144.90 us | } # ext4_sync_file _ends_here
1133 1) ! 26145.69 us | }
1134 1) ! 26293.56 us | }
1135 1) ! 26294.93 us | }
1136 1) | __fsnotify_parent() {
.
1165 1) 9.083 us | }
1166 1) | fsnotify() {
1169 1) 0.858 us | }
1170 1) 0.191 us | __sb_end_write();
1171 1) ! 26321.44 us | } # vfs_write _ends_here_
1172 1) ! 26324.27 us | } # SyS_write _ends_here_
2233 static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
2234 {
2235 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
2236 return 0;
2237 return vfs_fsync_range(file, pos, pos + count - 1,
2238 (file->f_flags & __O_SYNC) ? 0 : 1);
2239 }
As seen in above function, it returns 0 for buffered writes and calls
vfs_fsync_range for file writes with O_SYNC flag
I am the guy who asked the question.
ReplyDeleteJust wanted to thank you verry much for this!!!!