From 52063267dfd3245ea6b6d87af44d407996e889d0 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 25 Aug 2011 20:08:17 +0000 Subject: [PATCH] commit of the OMPIO modules and frameworks. This commit was SVN r25079. --- ompi/config/ompi_check_lustre.m4 | 83 + ompi/config/ompi_check_pvfs2.m4 | 91 + ompi/mca/fbtl/Makefile.am | 40 + ompi/mca/fbtl/base/Makefile.am | 28 + ompi/mca/fbtl/base/base.h | 70 + ompi/mca/fbtl/base/fbtl_base_close.c | 56 + ompi/mca/fbtl/base/fbtl_base_file_select.c | 357 +++ ompi/mca/fbtl/base/fbtl_base_file_unselect.c | 41 + ompi/mca/fbtl/base/fbtl_base_find_available.c | 171 ++ ompi/mca/fbtl/base/fbtl_base_open.c | 88 + ompi/mca/fbtl/base/static-components.h | 18 + ompi/mca/fbtl/fbtl.h | 151 + ompi/mca/fbtl/posix/Makefile.am | 50 + ompi/mca/fbtl/posix/fbtl_posix.c | 85 + ompi/mca/fbtl/posix/fbtl_posix.h | 67 + ompi/mca/fbtl/posix/fbtl_posix_component.c | 65 + ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c | 36 + ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c | 199 ++ ompi/mca/fbtl/posix/fbtl_posix_preadv.c | 269 ++ ompi/mca/fbtl/posix/fbtl_posix_pwritev.c | 299 ++ ompi/mca/fbtl/pvfs2/Makefile.am | 54 + ompi/mca/fbtl/pvfs2/configure.m4 | 50 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c | 85 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h | 77 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c | 65 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c | 35 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c | 35 + ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c | 240 ++ ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c | 257 ++ ompi/mca/fcache/Makefile.am | 40 + ompi/mca/fcache/base/Makefile.am | 28 + ompi/mca/fcache/base/base.h | 70 + ompi/mca/fcache/base/fcache_base_close.c | 56 + .../mca/fcache/base/fcache_base_file_select.c | 358 +++ .../fcache/base/fcache_base_file_unselect.c | 41 + .../fcache/base/fcache_base_find_available.c | 170 ++ ompi/mca/fcache/base/fcache_base_open.c | 88 + ompi/mca/fcache/base/static-components.h | 18 + ompi/mca/fcache/fcache.h | 143 + ompi/mca/fcache/ux/Makefile.am | 49 + ompi/mca/fcache/ux/fcache_ux.c | 83 + ompi/mca/fcache/ux/fcache_ux.h | 68 + ompi/mca/fcache/ux/fcache_ux_component.c | 63 + .../mca/fcache/ux/fcache_ux_get_file_layout.c | 37 + ompi/mca/fcache/ux/fcache_ux_get_io_servers.c | 36 + .../mca/fcache/ux/fcache_ux_set_file_layout.c | 37 + ompi/mca/fcoll/Makefile.am | 40 + ompi/mca/fcoll/base/Makefile.am | 28 + ompi/mca/fcoll/base/base.h | 71 + ompi/mca/fcoll/base/fcoll_base_close.c | 56 + ompi/mca/fcoll/base/fcoll_base_file_select.c | 390 +++ .../mca/fcoll/base/fcoll_base_file_unselect.c | 41 + .../fcoll/base/fcoll_base_find_available.c | 171 ++ ompi/mca/fcoll/base/fcoll_base_open.c | 88 + ompi/mca/fcoll/base/static-components.h | 18 + ompi/mca/fcoll/dynamic/Makefile.am | 54 + ompi/mca/fcoll/dynamic/fcoll_dynamic.h | 86 + .../fcoll/dynamic/fcoll_dynamic_component.c | 129 + .../dynamic/fcoll_dynamic_file_read_all.c | 622 ++++ .../fcoll_dynamic_file_read_all_begin.c | 36 + .../dynamic/fcoll_dynamic_file_read_all_end.c | 35 + .../dynamic/fcoll_dynamic_file_write_all.c | 687 +++++ .../fcoll_dynamic_file_write_all_begin.c | 36 + .../fcoll_dynamic_file_write_all_end.c | 35 + ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c | 91 + ompi/mca/fcoll/fcoll.h | 166 ++ ompi/mca/fcoll/individual/Makefile.am | 54 + ompi/mca/fcoll/individual/fcoll_individual.h | 85 + .../individual/fcoll_individual_component.c | 121 + .../fcoll_individual_file_read_all.c | 211 ++ .../fcoll_individual_file_read_all_begin.c | 36 + .../fcoll_individual_file_read_all_end.c | 35 + .../fcoll_individual_file_write_all.c | 203 ++ .../fcoll_individual_file_write_all_begin.c | 36 + .../fcoll_individual_file_write_all_end.c | 35 + .../individual/fcoll_individual_module.c | 91 + ompi/mca/fcoll/static/Makefile.am | 54 + ompi/mca/fcoll/static/fcoll_static.h | 85 + .../mca/fcoll/static/fcoll_static_component.c | 132 + .../fcoll/static/fcoll_static_file_read_all.c | 469 +++ .../static/fcoll_static_file_read_all_begin.c | 36 + .../static/fcoll_static_file_read_all_end.c | 35 + .../static/fcoll_static_file_write_all.c | 457 +++ .../fcoll_static_file_write_all_begin.c | 36 + .../static/fcoll_static_file_write_all_end.c | 35 + ompi/mca/fcoll/static/fcoll_static_module.c | 91 + ompi/mca/fcoll/two_phase/Makefile.am | 54 + ompi/mca/fcoll/two_phase/fcoll_two_phase.h | 86 + .../two_phase/fcoll_two_phase_component.c | 132 + .../two_phase/fcoll_two_phase_file_read_all.c | 713 +++++ .../fcoll_two_phase_file_read_all_begin.c | 36 + .../fcoll_two_phase_file_read_all_end.c | 35 + .../fcoll_two_phase_file_write_all.c | 736 +++++ .../fcoll_two_phase_file_write_all_begin.c | 36 + .../fcoll_two_phase_file_write_all_end.c | 35 + .../fcoll/two_phase/fcoll_two_phase_module.c | 91 + ompi/mca/fcoll/ylib/Makefile.am | 54 + ompi/mca/fcoll/ylib/fcoll_ylib.h | 86 + ompi/mca/fcoll/ylib/fcoll_ylib_component.c | 132 + .../mca/fcoll/ylib/fcoll_ylib_file_read_all.c | 662 ++++ .../ylib/fcoll_ylib_file_read_all_begin.c | 36 + .../fcoll/ylib/fcoll_ylib_file_read_all_end.c | 35 + .../fcoll/ylib/fcoll_ylib_file_write_all.c | 706 +++++ .../ylib/fcoll_ylib_file_write_all_begin.c | 36 + .../ylib/fcoll_ylib_file_write_all_end.c | 35 + ompi/mca/fcoll/ylib/fcoll_ylib_module.c | 91 + ompi/mca/fs/Makefile.am | 40 + ompi/mca/fs/base/Makefile.am | 28 + ompi/mca/fs/base/base.h | 70 + ompi/mca/fs/base/fs_base_close.c | 56 + ompi/mca/fs/base/fs_base_file_select.c | 357 +++ ompi/mca/fs/base/fs_base_file_unselect.c | 41 + ompi/mca/fs/base/fs_base_find_available.c | 171 ++ ompi/mca/fs/base/fs_base_open.c | 88 + ompi/mca/fs/base/static-components.h | 18 + ompi/mca/fs/fs.h | 146 + ompi/mca/fs/lustre/Makefile.am | 57 + ompi/mca/fs/lustre/configure.m4 | 50 + ompi/mca/fs/lustre/fs_lustre.c | 93 + ompi/mca/fs/lustre/fs_lustre.h | 83 + ompi/mca/fs/lustre/fs_lustre_component.c | 103 + ompi/mca/fs/lustre/fs_lustre_file_close.c | 45 + ompi/mca/fs/lustre/fs_lustre_file_delete.c | 49 + ompi/mca/fs/lustre/fs_lustre_file_get_size.c | 42 + ompi/mca/fs/lustre/fs_lustre_file_open.c | 118 + ompi/mca/fs/lustre/fs_lustre_file_set_info.c | 42 + ompi/mca/fs/lustre/fs_lustre_file_set_size.c | 42 + ompi/mca/fs/lustre/fs_lustre_file_sync.c | 48 + ompi/mca/fs/pvfs2/Makefile.am | 57 + ompi/mca/fs/pvfs2/configure.m4 | 50 + ompi/mca/fs/pvfs2/fs_pvfs2.c | 93 + ompi/mca/fs/pvfs2/fs_pvfs2.h | 101 + ompi/mca/fs/pvfs2/fs_pvfs2_component.c | 105 + ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c | 55 + ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c | 80 + ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c | 59 + ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c | 322 ++ ompi/mca/fs/pvfs2/fs_pvfs2_file_set_info.c | 46 + ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c | 73 + ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c | 65 + ompi/mca/fs/ufs/Makefile.am | 53 + ompi/mca/fs/ufs/fs_ufs.c | 93 + ompi/mca/fs/ufs/fs_ufs.h | 81 + ompi/mca/fs/ufs/fs_ufs_component.c | 65 + ompi/mca/fs/ufs/fs_ufs_file_close.c | 51 + ompi/mca/fs/ufs/fs_ufs_file_delete.c | 49 + ompi/mca/fs/ufs/fs_ufs_file_get_size.c | 53 + ompi/mca/fs/ufs/fs_ufs_file_open.c | 95 + ompi/mca/fs/ufs/fs_ufs_file_set_info.c | 42 + ompi/mca/fs/ufs/fs_ufs_file_set_size.c | 56 + ompi/mca/fs/ufs/fs_ufs_file_sync.c | 41 + ompi/mca/io/base/io_base_delete.c | 32 +- ompi/mca/io/base/io_base_file_select.c | 75 +- ompi/mca/io/ompio/Makefile.am | 54 + ompi/mca/io/ompio/io_ompio.c | 2650 +++++++++++++++++ ompi/mca/io/ompio/io_ompio.h | 711 +++++ ompi/mca/io/ompio/io_ompio_coll_array.c | 447 +++ ompi/mca/io/ompio/io_ompio_coll_offset.c | 430 +++ ompi/mca/io/ompio/io_ompio_component.c | 284 ++ ompi/mca/io/ompio/io_ompio_file_open.c | 603 ++++ ompi/mca/io/ompio/io_ompio_file_read.c | 504 ++++ ompi/mca/io/ompio/io_ompio_file_set_view.c | 300 ++ ompi/mca/io/ompio/io_ompio_file_write.c | 621 ++++ ompi/mca/io/ompio/io_ompio_module.c | 95 + ompi/mca/io/ompio/io_ompio_nbc.c | 541 ++++ ompi/mca/io/romio/src/io_romio_component.c | 4 +- ompi/mca/sharedfp/Makefile.am | 40 + ompi/mca/sharedfp/base/Makefile.am | 28 + ompi/mca/sharedfp/base/base.h | 70 + ompi/mca/sharedfp/base/sharedfp_base_close.c | 56 + .../sharedfp/base/sharedfp_base_file_select.c | 358 +++ .../base/sharedfp_base_file_unselect.c | 41 + .../base/sharedfp_base_find_available.c | 171 ++ ompi/mca/sharedfp/base/sharedfp_base_open.c | 88 + ompi/mca/sharedfp/base/static-components.h | 18 + ompi/mca/sharedfp/dummy/Makefile.am | 48 + ompi/mca/sharedfp/dummy/sharedfp_dummy.c | 82 + ompi/mca/sharedfp/dummy/sharedfp_dummy.h | 61 + .../sharedfp/dummy/sharedfp_dummy_component.c | 63 + ompi/mca/sharedfp/dummy/sharedfp_dummy_seek.c | 35 + .../sharedfp/dummy/sharedfp_dummy_update.c | 36 + ompi/mca/sharedfp/sharedfp.h | 131 + 182 files changed, 25824 insertions(+), 16 deletions(-) create mode 100644 ompi/config/ompi_check_lustre.m4 create mode 100644 ompi/config/ompi_check_pvfs2.m4 create mode 100644 ompi/mca/fbtl/Makefile.am create mode 100644 ompi/mca/fbtl/base/Makefile.am create mode 100644 ompi/mca/fbtl/base/base.h create mode 100644 ompi/mca/fbtl/base/fbtl_base_close.c create mode 100644 ompi/mca/fbtl/base/fbtl_base_file_select.c create mode 100644 ompi/mca/fbtl/base/fbtl_base_file_unselect.c create mode 100644 ompi/mca/fbtl/base/fbtl_base_find_available.c create mode 100644 ompi/mca/fbtl/base/fbtl_base_open.c create mode 100644 ompi/mca/fbtl/base/static-components.h create mode 100644 ompi/mca/fbtl/fbtl.h create mode 100644 ompi/mca/fbtl/posix/Makefile.am create mode 100644 ompi/mca/fbtl/posix/fbtl_posix.c create mode 100644 ompi/mca/fbtl/posix/fbtl_posix.h create mode 100644 ompi/mca/fbtl/posix/fbtl_posix_component.c create mode 100644 ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c create mode 100644 ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c create mode 100644 ompi/mca/fbtl/posix/fbtl_posix_preadv.c create mode 100644 ompi/mca/fbtl/posix/fbtl_posix_pwritev.c create mode 100644 ompi/mca/fbtl/pvfs2/Makefile.am create mode 100644 ompi/mca/fbtl/pvfs2/configure.m4 create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c create mode 100644 ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c create mode 100644 ompi/mca/fcache/Makefile.am create mode 100644 ompi/mca/fcache/base/Makefile.am create mode 100644 ompi/mca/fcache/base/base.h create mode 100644 ompi/mca/fcache/base/fcache_base_close.c create mode 100644 ompi/mca/fcache/base/fcache_base_file_select.c create mode 100644 ompi/mca/fcache/base/fcache_base_file_unselect.c create mode 100644 ompi/mca/fcache/base/fcache_base_find_available.c create mode 100644 ompi/mca/fcache/base/fcache_base_open.c create mode 100644 ompi/mca/fcache/base/static-components.h create mode 100644 ompi/mca/fcache/fcache.h create mode 100644 ompi/mca/fcache/ux/Makefile.am create mode 100644 ompi/mca/fcache/ux/fcache_ux.c create mode 100644 ompi/mca/fcache/ux/fcache_ux.h create mode 100644 ompi/mca/fcache/ux/fcache_ux_component.c create mode 100644 ompi/mca/fcache/ux/fcache_ux_get_file_layout.c create mode 100644 ompi/mca/fcache/ux/fcache_ux_get_io_servers.c create mode 100644 ompi/mca/fcache/ux/fcache_ux_set_file_layout.c create mode 100644 ompi/mca/fcoll/Makefile.am create mode 100644 ompi/mca/fcoll/base/Makefile.am create mode 100644 ompi/mca/fcoll/base/base.h create mode 100644 ompi/mca/fcoll/base/fcoll_base_close.c create mode 100644 ompi/mca/fcoll/base/fcoll_base_file_select.c create mode 100644 ompi/mca/fcoll/base/fcoll_base_file_unselect.c create mode 100644 ompi/mca/fcoll/base/fcoll_base_find_available.c create mode 100644 ompi/mca/fcoll/base/fcoll_base_open.c create mode 100644 ompi/mca/fcoll/base/static-components.h create mode 100644 ompi/mca/fcoll/dynamic/Makefile.am create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic.h create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_begin.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_end.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_begin.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_end.c create mode 100644 ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c create mode 100644 ompi/mca/fcoll/fcoll.h create mode 100644 ompi/mca/fcoll/individual/Makefile.am create mode 100644 ompi/mca/fcoll/individual/fcoll_individual.h create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_component.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_read_all_begin.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_read_all_end.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_write_all_begin.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_file_write_all_end.c create mode 100644 ompi/mca/fcoll/individual/fcoll_individual_module.c create mode 100644 ompi/mca/fcoll/static/Makefile.am create mode 100644 ompi/mca/fcoll/static/fcoll_static.h create mode 100644 ompi/mca/fcoll/static/fcoll_static_component.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_read_all.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_read_all_begin.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_read_all_end.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_write_all.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_write_all_begin.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_file_write_all_end.c create mode 100644 ompi/mca/fcoll/static/fcoll_static_module.c create mode 100644 ompi/mca/fcoll/two_phase/Makefile.am create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase.h create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_begin.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_end.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_begin.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_end.c create mode 100644 ompi/mca/fcoll/two_phase/fcoll_two_phase_module.c create mode 100644 ompi/mca/fcoll/ylib/Makefile.am create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib.h create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_component.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_begin.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_end.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_begin.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_end.c create mode 100644 ompi/mca/fcoll/ylib/fcoll_ylib_module.c create mode 100644 ompi/mca/fs/Makefile.am create mode 100644 ompi/mca/fs/base/Makefile.am create mode 100644 ompi/mca/fs/base/base.h create mode 100644 ompi/mca/fs/base/fs_base_close.c create mode 100644 ompi/mca/fs/base/fs_base_file_select.c create mode 100644 ompi/mca/fs/base/fs_base_file_unselect.c create mode 100644 ompi/mca/fs/base/fs_base_find_available.c create mode 100644 ompi/mca/fs/base/fs_base_open.c create mode 100644 ompi/mca/fs/base/static-components.h create mode 100644 ompi/mca/fs/fs.h create mode 100644 ompi/mca/fs/lustre/Makefile.am create mode 100644 ompi/mca/fs/lustre/configure.m4 create mode 100644 ompi/mca/fs/lustre/fs_lustre.c create mode 100644 ompi/mca/fs/lustre/fs_lustre.h create mode 100644 ompi/mca/fs/lustre/fs_lustre_component.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_close.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_delete.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_get_size.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_open.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_set_info.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_set_size.c create mode 100644 ompi/mca/fs/lustre/fs_lustre_file_sync.c create mode 100644 ompi/mca/fs/pvfs2/Makefile.am create mode 100644 ompi/mca/fs/pvfs2/configure.m4 create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2.h create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_component.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_set_info.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c create mode 100644 ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c create mode 100644 ompi/mca/fs/ufs/Makefile.am create mode 100644 ompi/mca/fs/ufs/fs_ufs.c create mode 100644 ompi/mca/fs/ufs/fs_ufs.h create mode 100644 ompi/mca/fs/ufs/fs_ufs_component.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_close.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_delete.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_get_size.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_open.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_set_info.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_set_size.c create mode 100644 ompi/mca/fs/ufs/fs_ufs_file_sync.c create mode 100644 ompi/mca/io/ompio/Makefile.am create mode 100644 ompi/mca/io/ompio/io_ompio.c create mode 100644 ompi/mca/io/ompio/io_ompio.h create mode 100644 ompi/mca/io/ompio/io_ompio_coll_array.c create mode 100644 ompi/mca/io/ompio/io_ompio_coll_offset.c create mode 100644 ompi/mca/io/ompio/io_ompio_component.c create mode 100644 ompi/mca/io/ompio/io_ompio_file_open.c create mode 100644 ompi/mca/io/ompio/io_ompio_file_read.c create mode 100644 ompi/mca/io/ompio/io_ompio_file_set_view.c create mode 100644 ompi/mca/io/ompio/io_ompio_file_write.c create mode 100644 ompi/mca/io/ompio/io_ompio_module.c create mode 100644 ompi/mca/io/ompio/io_ompio_nbc.c create mode 100644 ompi/mca/sharedfp/Makefile.am create mode 100644 ompi/mca/sharedfp/base/Makefile.am create mode 100644 ompi/mca/sharedfp/base/base.h create mode 100644 ompi/mca/sharedfp/base/sharedfp_base_close.c create mode 100644 ompi/mca/sharedfp/base/sharedfp_base_file_select.c create mode 100644 ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c create mode 100644 ompi/mca/sharedfp/base/sharedfp_base_find_available.c create mode 100644 ompi/mca/sharedfp/base/sharedfp_base_open.c create mode 100644 ompi/mca/sharedfp/base/static-components.h create mode 100644 ompi/mca/sharedfp/dummy/Makefile.am create mode 100644 ompi/mca/sharedfp/dummy/sharedfp_dummy.c create mode 100644 ompi/mca/sharedfp/dummy/sharedfp_dummy.h create mode 100644 ompi/mca/sharedfp/dummy/sharedfp_dummy_component.c create mode 100644 ompi/mca/sharedfp/dummy/sharedfp_dummy_seek.c create mode 100644 ompi/mca/sharedfp/dummy/sharedfp_dummy_update.c create mode 100644 ompi/mca/sharedfp/sharedfp.h diff --git a/ompi/config/ompi_check_lustre.m4 b/ompi/config/ompi_check_lustre.m4 new file mode 100644 index 0000000000..1ee11f9d5c --- /dev/null +++ b/ompi/config/ompi_check_lustre.m4 @@ -0,0 +1,83 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_LUSTRE(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if LUSTRE support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_LUSTRE],[ + + check_lustre_CPPFLAGS= + check_lustre_LDFLAGS= + check_lustre_LIBS= + + check_lustre_configuration="none" + ompi_check_lustre_happy="yes" + + # Get some configuration information + AC_ARG_WITH([lustre], + [AC_HELP_STRING([--with-lustre(=DIR)], + [Build Lustre support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OMPI_CHECK_WITHDIR([lustre], [$with_lustre], [include/lustre/liblustreapi.h]) + + AC_ARG_WITH([lustre-libs], + [AC_HELP_STRING([--with-lustre-libs=LIBS], + [Libraries to link with for lustre])]) + + # Add correct -I and -L flags + temp_lustre=$with_lustre + AS_IF([test -n "$with_lustre"], + [AS_IF([test -d "$with_lustre/include"], + [check_lustre_CPPFLAGS="-I$with_lustre/include" + CPPFLAGS="$CPPFLAGS $check_lustre_CPPFLAGS"], []) + AS_IF([test -d "$with_lustre/lib"], + [check_lustre_LDFLAGS="-L$with_lustre/lib" + LDFLAGS="$LDFLAGS $check_lustre_LDFLAGS"], [])], + with_lustre="/usr/local") + + # Try to find all the lustre libraries (this is not fun!) + if test -n "$with_lustre_libs" ; then + check_lustre_LIBS="-llustre -llustreapi" + for lib in $with_lustre_libs ; do + check_lustre_LIBS="$check_lustre_LIBS -l$lib" + done + fi + + # check for lustre + LIBS="$LIBS $check_lustre_LIBS" + AC_CHECK_HEADERS([${check_lustre_header_prefix}lustre/liblustreapi.h], + [AC_MSG_CHECKING([if possible to link LUSTRE]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <${check_LUSTRE_header_prefix}lustre.h>], + [int i;])], + [AC_MSG_RESULT([yes]) + ompi_check_lustre_happy="yes"], + [AC_MSG_RESULT([no]) + ompi_check_lustre_happy="no"])], + [ompi_check_lustre_happy="no"]) + + AS_IF([test "$ompi_check_lustre_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_lustre" -a "$with_lustre" != "no"], + [echo LUSTRE support not found]) + $3]) + with_lustre="$temp_lustre" +]) diff --git a/ompi/config/ompi_check_pvfs2.m4 b/ompi/config/ompi_check_pvfs2.m4 new file mode 100644 index 0000000000..645f77ac30 --- /dev/null +++ b/ompi/config/ompi_check_pvfs2.m4 @@ -0,0 +1,91 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_PVFS2(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if PVFS2 support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_PVFS2],[ + + check_pvfs2_CPPFLAGS= + check_pvfs2_LDFLAGS= + check_pvfs2_LIBS= + + check_pvfs2_configuration="none" + ompi_check_pvfs2_happy="yes" + + # Get some configuration information + AC_ARG_WITH([pvfs2], + [AC_HELP_STRING([--with-pvfs2(=DIR)], + [Build Pvfs2 support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])]) + OMPI_CHECK_WITHDIR([pvfs2], [$with_pvfs2], [include/pvfs2.h]) + + AC_ARG_WITH([pvfs2-libs], + [AC_HELP_STRING([--with-pvfs2-libs=LIBS], + [Libraries to link with for pvfs2])]) + + # Add correct -I and -L flags + temp_pvfs2=$with_pvfs2 + AS_IF([test -n "$with_pvfs2"], + [AS_IF([test -d "$with_pvfs2/include"], + [check_pvfs2_CPPFLAGS="-I$with_pvfs2/include" + CPPFLAGS="$CPPFLAGS $check_pvfs2_CPPFLAGS" + CFLAGS="$CFLAGS $check_pvfs2_CPPFLAGS" + WRAPPER_EXTRA_CPPFLAGS="$WRAPPER_EXTRA_CPPFLAGS $check_pvfs2_CPPFLAGS" + WRAPPER_EXTRA_CFLAGS="$WRAPPER_EXTRA_CFLAGS $check_pvfs2_CPPFLAGS" + ], []) + AS_IF([test -d "$with_pvfs2/lib"], + [check_pvfs2_LDFLAGS="-L$with_pvfs2/lib" + LDFLAGS="$LDFLAGS $check_pvfs2_LDFLAGS" + WRAPPER_EXTRA_LDFLAGS="$WRAPPER_EXTRA_LDFLAGS $check_pvfs2_LDFLAGS" + check_pvfs2_LIBS="-lpvfs2 -lpthread" + ], [])], + with_pvfs2="/usr/local") + + # Try to find all the pvfs2 libraries (this is not fun!) + if test -n "$with_pvfs2_libs" ; then + for lib in $with_pvfs2_libs ; do + check_pvfs2_LIBS="$check_pvfs2_LIBS -l$lib" + done + fi + + LIBS="$LIBS $check_pvfs2_LIBS" + WRAPPER_EXTRA_LIBS="$WRAPPER_EXTRA_LIBS $check_pvfs2_LIBS" + + # check for pvfs2 + AC_CHECK_HEADERS([${check_pvfs2_header_prefix}pvfs2.h], + [AC_MSG_CHECKING([if possible to link PVFS2]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <${check_PVFS2_header_prefix}pvfs2.h>], + [int i;])], + [AC_MSG_RESULT([yes]) + ompi_check_pvfs2_happy="yes"], + [AC_MSG_RESULT([no]) + ompi_check_pvfs2_happy="no"])], + [ompi_check_pvfs2_happy="no"]) + + AS_IF([test "$ompi_check_pvfs2_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_pvfs2" -a "$with_pvfs2" != "no"], + [echo PVFS2 support not found]) + $3]) + with_pvfs2="$temp_pvfs2" +]) diff --git a/ompi/mca/fbtl/Makefile.am b/ompi/mca/fbtl/Makefile.am new file mode 100644 index 0000000000..5ecef05c0f --- /dev/null +++ b/ompi/mca/fbtl/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_fbtl.la +libmca_fbtl_la_SOURCES = + +# local files +headers = fbtl.h +libmca_fbtl_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +nobase_ompi_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/ompi/mca/fbtl/base/Makefile.am b/ompi/mca/fbtl/base/Makefile.am new file mode 100644 index 0000000000..6481743bfd --- /dev/null +++ b/ompi/mca/fbtl/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_fbtl_la_SOURCES += \ + base/fbtl_base_close.c \ + base/fbtl_base_file_select.c \ + base/fbtl_base_file_unselect.c \ + base/fbtl_base_find_available.c \ + base/fbtl_base_open.c diff --git a/ompi/mca/fbtl/base/base.h b/ompi/mca/fbtl/base/base.h new file mode 100644 index 0000000000..1d3028d9a3 --- /dev/null +++ b/ompi/mca/fbtl/base/base.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * MCA fbtl base framework public interface functions. + */ + +#ifndef MCA_FBTL_BASE_H +#define MCA_FBTL_BASE_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "opal/mca/mca.h" + + +BEGIN_C_DECLS + +OMPI_DECLSPEC int mca_fbtl_base_open(void); + +OMPI_DECLSPEC int mca_fbtl_base_close(void); + +OMPI_DECLSPEC int mca_fbtl_base_file_select(struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred); + +OMPI_DECLSPEC int mca_fbtl_base_file_unselect(struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fbtl_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads); + +OMPI_DECLSPEC int mca_fbtl_base_init_file (struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fbtl_base_get_param (struct mca_io_ompio_file_t *file, int keyval); +/* + * Globals + */ + +OMPI_DECLSPEC extern int mca_fbtl_base_param; +OMPI_DECLSPEC extern int mca_fbtl_base_output; + +OMPI_DECLSPEC extern bool mca_fbtl_base_components_opened_valid; +OMPI_DECLSPEC extern bool mca_fbtl_base_components_available_valid; + +OMPI_DECLSPEC extern opal_list_t mca_fbtl_base_components_opened; +OMPI_DECLSPEC extern opal_list_t mca_fbtl_base_components_available; + +END_C_DECLS + +#endif /* MCA_BASE_FBTL_H */ diff --git a/ompi/mca/fbtl/base/fbtl_base_close.c b/ompi/mca/fbtl/base/fbtl_base_close.c new file mode 100644 index 0000000000..c838408944 --- /dev/null +++ b/ompi/mca/fbtl/base/fbtl_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHTOB$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "ompi/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +int mca_fbtl_base_close(void) +{ + /* + Close all components that are still open. This may be the opened + list (if we're in ompi_info), or it may be the available list (if + we're anywhere else). + */ + + if (mca_fbtl_base_components_opened_valid) { + mca_base_components_close(mca_fbtl_base_output, + &mca_fbtl_base_components_opened, NULL); + OBJ_DESTRUCT(&mca_fbtl_base_components_opened); + mca_fbtl_base_components_opened_valid = false; + } else if (mca_fbtl_base_components_available_valid) { + mca_base_components_close(mca_fbtl_base_output, + &mca_fbtl_base_components_available, NULL); + OBJ_DESTRUCT(&mca_fbtl_base_components_available); + mca_fbtl_base_components_available_valid = false; + } + + /* Close the output stream for this framework */ + opal_output_close (mca_fbtl_base_output); + + /* All done */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/base/fbtl_base_file_select.c b/ompi/mca/fbtl/base/fbtl_base_file_select.c new file mode 100644 index 0000000000..c864f259dc --- /dev/null +++ b/ompi/mca/fbtl/base/fbtl_base_file_select.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/class/opal_list.h" +#include "opal/util/argv.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a opal_list_t which contains + * these modules + */ +struct queried_module_t { + opal_list_item_t super; + mca_fbtl_base_component_t *om_component; + mca_fbtl_base_module_t *om_module; +}; +typedef struct queried_module_t queried_module_t; +static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); + + +/* + * Only one fbtl module can be attached to each file. + * + * This module calls the query funtion on all the components that were + * detected by fbtl_base_open. This function is called on a + * per-file basis. This function has the following function. + * + * 1. Iterate over the list of available_components + * 2. Call the query function on each of these components. + * 3. query function returns the structure containing pointers + * to its module and its priority + * 4. Select the module with the highest priority + * 5. Call the init function on the selected module so that it does the + * right setup for the file + * 6. Call finalize on all the other modules which returned + * their module but were unfortunate to not get selected + */ + +int mca_fbtl_base_file_select (struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred) +{ + int priority; + int best_priority; + opal_list_item_t *item; + opal_list_item_t *next_item; + mca_base_component_priority_list_item_t *selectable_item; + char *names, **name_array; + int num_names; + mca_base_component_priority_list_item_t *cpli; + mca_fbtl_base_component_t *component; + mca_fbtl_base_component_t *best_component; + mca_fbtl_base_module_t *module; + opal_list_t queried; + queried_module_t *om; + opal_list_t *selectable; + char *str; + int err = MPI_SUCCESS; + int i; + bool was_selectable_constructed = false; + + /* Check and see if a preferred component was provided. If it was + provided then it should be used (if possible) */ + + if (NULL != preferred) { + + /* We have a preferred component. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_component_name[0]); + + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:base:file_select: Checking preferred component: %s", + str); + + /* query the component for its priority and get its module + structure. This is necessary to proceed */ + + component = (mca_fbtl_base_component_t *)preferred; + module = component->fbtlm_file_query (file, &priority); + if (NULL != module && + NULL != module->fbtl_module_init) { + + /* this query seems to have returned something legitimate + * and we can now go ahead and initialize the + * file with it * but first, the functions which + * are null need to be filled in */ + + /*fill_null_pointers (module);*/ + file->f_fbtl = module; + file->f_fbtl_component = preferred; + + return module->fbtl_module_init(file); + } + /* His preferred component is present, but is unable to + * run. This is not a good sign. We should try selecting + * some other component We let it fall through and select + * from the list of available components + */ + } /*end of selection for preferred component */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred component was provided but for some reason was + * not able to be selected + * 2. No preferred component was provided + * + * All we need to do is to go through the list of available + * components and find the one which has the highest priority and + * use that for this file + */ + + /* Check if anything was requested by means on the name parameters */ + names = NULL; + mca_base_param_lookup_string (mca_fbtl_base_param, &names); + + if (NULL != names && 0 < strlen(names)) { + name_array = opal_argv_split (names, ','); + num_names = opal_argv_count (name_array); + + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:base:file_Select: Checking all available module"); + + /* since there are somethings which the mca requested through the + if the intersection is NULL, then we barf saying that the requested + modules are not being available */ + + selectable = OBJ_NEW(opal_list_t); + was_selectable_constructed = true; + + /* go through the compoents_available list and check against the names + * to see whether this can be added or not */ + + for (item = opal_list_get_first(&mca_fbtl_base_components_available); + item != opal_list_get_end(&mca_fbtl_base_components_available); + item = opal_list_get_next(item)) { + /* convert the opal_list_item_t returned into the proper type */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fbtl_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fbtl_base_output, + "select: initialising %s component %s", + component->fbtlm_version.mca_type_name, + component->fbtlm_version.mca_component_name); + + /* check if this name is present in the mca_base_params */ + for (i=0; i < num_names; i++) { + if (0 == strcmp(name_array[i], component->fbtlm_version.mca_component_name)) { + /* this is present, and should be added o the selectable list */ + + /* We need to create a seperate object to initialise this list with + * since we cannot have the same item in 2 lists */ + + selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); + *selectable_item = *cpli; + opal_list_append (selectable, (opal_list_item_t *)selectable_item); + break; + } + } + } + + /* check for a NULL intersection between the available list and the + * list which was asked for */ + + if (0 == opal_list_get_size(selectable)) { + was_selectable_constructed = true; + OBJ_RELEASE (selectable); + opal_output_verbose (10, mca_fbtl_base_output, + "fbtl:base:file_select: preferred modules were not available"); + return OMPI_ERROR; + } + } else { /* if there was no name_array, then we need to simply initialize + selectable to mca_fbtl_base_components_available */ + selectable = &mca_fbtl_base_components_available; + } + + best_component = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, opal_list_t); + + for (item = opal_list_get_first(selectable); + item != opal_list_get_end(selectable); + item = opal_list_get_next(item)) { + /* + * convert the opal_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fbtl_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fbtl_base_output, + "select: initialising %s component %s", + component->fbtlm_version.mca_type_name, + component->fbtlm_version.mca_component_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == component->fbtlm_file_query) { + opal_output_verbose(10, mca_fbtl_base_output, + "select: no query, ignoring the component"); + } else { + /* + * call the query function and see what it returns + */ + module = component->fbtlm_file_query (file, &priority); + + if (NULL == module || + NULL == module->fbtl_module_init) { + /* + * query did not return any action which can be used + */ + opal_output_verbose(10, mca_fbtl_base_output, + "select: query returned failure"); + } else { + opal_output_verbose(10, mca_fbtl_base_output, + "select: query returned priority %d", + priority); + /* + * is this the best component we have found till now? + */ + if (priority > best_priority) { + best_priority = priority; + best_component = component; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_component = component; + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); + } /* end else of if (NULL == module) */ + } /* end else of if (NULL == component->fbtlm_init) */ + } /* end for ... end of traversal */ + + /* We have to remove empty out the selectable list if the selectable + * list was constructed as a duplicate and not as a pointer to the + * mca_base_components_available list. So, check and destroy */ + + if (was_selectable_constructed) { + + /* remove all the items first */ + for (item = opal_list_get_first(&mca_fbtl_base_components_available); + item != opal_list_get_end(&mca_fbtl_base_components_available); + item = next_item) { + next_item = opal_list_get_next(item); + OBJ_RELEASE (item); + } + + /* release the list itself */ + OBJ_RELEASE (selectable); + was_selectable_constructed = false; + } + + /* + * Now we have alist of components which successfully returned + * their module struct. One of these components has the best + * priority. The rest have to be comm_unqueried to counter the + * effects of file_query'ing them. Finalize happens only on + * components which should are initialized. + */ + if (NULL == best_component) { + /* + * This typically means that there was no component which was + * able to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of components which have successfully + * returned their priorities from the query. We now have to + * unquery() those components which have not been selected and + * init() the component which was selected + */ + for (item = opal_list_remove_first(&queried); + NULL != item; + item = opal_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_component == best_component) { + /* + * this is the chosen component, we have to initialise the + * module of this component. + * + * ANJU: a component might not have all the functions + * defined. Whereever a function pointer is null in the + * module structure we need to fill it in with the base + * structure function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through and + * elease the other objects + */ + + /*fill_null_pointers (om->om_module);*/ + file->f_fbtl = om->om_module; + err = om->om_module->fbtl_module_init(file); + file->f_fbtl_component = (mca_base_component_t *)best_component; + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_component->fbtlm_file_unquery) { + /* unquery the component only if they have some clean + * up job to do. Components which are queried but do + * not actually do anything typically do not have a + * unquery. Hence this check is necessary + */ + (void) om->om_component->fbtlm_file_unquery(file); + opal_output_verbose(10, mca_fbtl_base_output, + "select: component %s is not selected", + om->om_component->fbtlm_version.mca_component_name); + } /* end if */ + } /* if not best component */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + opal_output_verbose(10, mca_fbtl_base_output, + "select: component %s selected", + best_component->fbtlm_version.mca_component_name); + + OBJ_DESTRUCT(&queried); + + return err; +} diff --git a/ompi/mca/fbtl/base/fbtl_base_file_unselect.c b/ompi/mca/fbtl/base/fbtl_base_file_unselect.c new file mode 100644 index 0000000000..aaa6751be8 --- /dev/null +++ b/ompi/mca/fbtl/base/fbtl_base_file_unselect.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "mpi.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "opal/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +int mca_fbtl_base_file_unselect(mca_io_ompio_file_t *file) +{ + if (NULL != file->f_fbtl && NULL != file->f_fbtl->fbtl_module_finalize) { + return file->f_fbtl->fbtl_module_finalize(file); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/base/fbtl_base_find_available.c b/ompi/mca/fbtl/base/fbtl_base_find_available.c new file mode 100644 index 0000000000..fbdec3b98f --- /dev/null +++ b/ompi/mca/fbtl/base/fbtl_base_find_available.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/class/opal_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +opal_list_t mca_fbtl_base_modules_available; +bool mca_fbtl_base_modules_available_valid = false; + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); + +int mca_fbtl_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads) +{ + bool found = false; + mca_base_component_priority_list_item_t *entry; + opal_list_item_t *p; + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_fbtl_base_components_available, opal_list_t); + mca_fbtl_base_components_available_valid = true; + + /* The list of components which we should check is already present + in mca_fbtl_base_components_opened, which was established in + mca_fbtl_base_open */ + + for (found = false, + p = opal_list_remove_first (&mca_fbtl_base_components_opened); + NULL != p; + p = opal_list_remove_first (&mca_fbtl_base_components_opened)) { + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->super.cli_component = + ((mca_base_component_list_item_t *)p)->cli_component; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->super.cli_component, entry, + enable_progress_threads, + enable_mpi_threads)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + opal_list_append (&mca_fbtl_base_components_available, + (opal_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_component_repository_release(entry->super.cli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_fbtl_base_components_opened); + mca_fbtl_base_components_opened_valid = false; + + /* There should atleast be one fbtl component which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_fbtl_base_components_available); + mca_fbtl_base_components_available_valid = false; + opal_output_verbose (10, mca_fbtl_base_output, + "fbtl:find_available: no fbtl components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:find_available: querying fbtl component %s", + m->mca_component_name); + + /* This component has been successfully opened, now try to query it */ + if (2 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_2_0_0(m, entry, enable_progress_threads, + enable_mpi_threads); + } else { + /* unrecognised API version */ + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:find_available:unrecognised fbtl API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:find_available fbtl component %s is not available", + m->mca_component_name); + if (NULL != m->mca_close_component) { + m->mca_close_component(); + } + } else { + opal_output_verbose(10, mca_fbtl_base_output, + "fbtl:find_avalable: fbtl component %s is available", + m->mca_component_name); + + } + /* All done */ + return ret; +} + + +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_fbtl_base_component_2_0_0_t *fbtl = + (mca_fbtl_base_component_2_0_0_t *) component; + + return fbtl->fbtlm_init_query(enable_progress_threads, + enable_mpi_threads); +} diff --git a/ompi/mca/fbtl/base/fbtl_base_open.c b/ompi/mca/fbtl/base/fbtl_base_open.c new file mode 100644 index 0000000000..198ba455d2 --- /dev/null +++ b/ompi/mca/fbtl/base/fbtl_base_open.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include + +#include "ompi/class/ompi_free_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/show_help.h" + +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#ifdef __WINDOWS__ + const mca_base_component_t *mca_fbtl_base_static_components[] = {NULL}; +#else +#include "ompi/mca/fbtl/base/static-components.h" +#endif + +/* + * Global variables; most of which are loaded by back-ends of MCA + * variables + */ +int mca_fbtl_base_param = -1; +int mca_fbtl_base_output = -1; + +opal_list_t mca_fbtl_base_components_opened; +opal_list_t mca_fbtl_base_components_available; + +bool mca_fbtl_base_components_available_valid = false; +bool mca_fbtl_base_components_opened_valid = false; + +mca_fbtl_base_component_t mca_fbtl_base_selected_component; +mca_fbtl_base_module_t mca_fbtl; + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int mca_fbtl_base_open(void) +{ + /* Open an output stream for this framework */ + + mca_fbtl_base_output = opal_output_open(NULL); + + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("fbtl", mca_fbtl_base_output, + mca_fbtl_base_static_components, + &mca_fbtl_base_components_opened, true)) { + return OMPI_ERROR; + } + mca_fbtl_base_components_opened_valid = true; + + /* Find the index of the MCA "fbtl" param for selection */ + + mca_fbtl_base_param = mca_base_param_find("fbtl", "base", NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/base/static-components.h b/ompi/mca/fbtl/base/static-components.h new file mode 100644 index 0000000000..36b89b71cf --- /dev/null +++ b/ompi/mca/fbtl/base/static-components.h @@ -0,0 +1,18 @@ +/* + * $HEADER$ + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + + +const mca_base_component_t *mca_fbtl_base_static_components[] = { + + NULL +}; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + diff --git a/ompi/mca/fbtl/fbtl.h b/ompi/mca/fbtl/fbtl.h new file mode 100644 index 0000000000..1d88082640 --- /dev/null +++ b/ompi/mca/fbtl/fbtl.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_FBTL_H +#define OMPI_MCA_FBTL_H + +#include "ompi_config.h" +#include "mpi.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/request/request.h" +#include + +BEGIN_C_DECLS + +struct mca_io_ompio_file_t; + +/* + * Macro for use in components that are of type coll + */ +#define MCA_FBTL_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "fbtl", 2, 0, 0 + +/* + * These are the component function prototypes. These function pointers + * go into the component structure. These functions (query() and finalize() + * are called during fbtl_base_select(). Each component is query() ied + * and subsequently, all the unselected components are finalize() 'ed + * so that any *stuff* they did during query() can be undone. By + * similar logic, finalize() is also called on the component which + * was selected when the communicator is being destroyed. + * + * So, to sum it up, every component carries 4 functions: + * 1. open() - called during MPI_INIT + * 2. close() - called during MPI_FINALIZE + * 3. query() - called to select a particular component + * 4. finalize() - called when actions taken during query have + * to be undone + */ + +/* + * **************** component struct ******************************* + * *********** These functions go in the component struct ********** + * **************** component struct ******************************* + */ + +typedef int (*mca_fbtl_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, + bool enable_mpi_threads); + +typedef struct mca_fbtl_base_module_1_0_0_t * +(*mca_fbtl_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, + int *priority); + +typedef int (*mca_fbtl_base_component_file_unquery_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +/* + * ****************** component struct ****************************** + * Structure for fbtl v2.0.0 components.This is chained to MCA v2.0.0 + * ****************** component struct ****************************** + */ +struct mca_fbtl_base_component_2_0_0_t { + mca_base_component_t fbtlm_version; + mca_base_component_data_t fbtlm_data; + + mca_fbtl_base_component_init_query_1_0_0_fn_t fbtlm_init_query; + mca_fbtl_base_component_file_query_1_0_0_fn_t fbtlm_file_query; + mca_fbtl_base_component_file_unquery_1_0_0_fn_t fbtlm_file_unquery; +}; +typedef struct mca_fbtl_base_component_2_0_0_t mca_fbtl_base_component_2_0_0_t; +typedef struct mca_fbtl_base_component_2_0_0_t mca_fbtl_base_component_t; + +/* + * *********************************************************************** + * ************************ Interface function definitions ************** + * These are the typedefbtl for the function pointers to various fbtl + * backend functions which will be used by the various fbtl components + * *********************************************************************** + */ + +typedef int (*mca_fbtl_base_module_init_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +typedef int (*mca_fbtl_base_module_finalize_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + + +typedef size_t (*mca_fbtl_base_module_preadv_fn_t) + (struct mca_io_ompio_file_t *file, + int *sorted); +typedef size_t (*mca_fbtl_base_module_pwritev_fn_t) + (struct mca_io_ompio_file_t *file, + int *sorted); +typedef size_t (*mca_fbtl_base_module_ipreadv_fn_t) + (struct mca_io_ompio_file_t *file, + int *sorted, + ompi_request_t **request); +typedef size_t (*mca_fbtl_base_module_ipwritev_fn_t) + (struct mca_io_ompio_file_t *file, + int *sorted, + ompi_request_t **request); + +/* + * *********************************************************************** + * *************************** module structure ************************* + * *********************************************************************** + */ +struct mca_fbtl_base_module_1_0_0_t { + /* + * Per-file initialization function. This is called only + * on the module which is selected. The finalize corresponding to + * this function is present on the component struct above + */ + mca_fbtl_base_module_init_1_0_0_fn_t fbtl_module_init; + mca_fbtl_base_module_finalize_1_0_0_fn_t fbtl_module_finalize; + + /* FBTL function pointers */ + mca_fbtl_base_module_preadv_fn_t fbtl_preadv; + mca_fbtl_base_module_ipreadv_fn_t fbtl_ipreadv; + mca_fbtl_base_module_pwritev_fn_t fbtl_pwritev; + mca_fbtl_base_module_ipwritev_fn_t fbtl_ipwritev; + /* + mca_fbtl_base_module_test_fn_t fbtl_test; + mca_fbtl_base_module_wait_fn_t fbtl_wait; + mca_fbtl_base_module_progress_fn_t fbtl_progress; + */ +}; +typedef struct mca_fbtl_base_module_1_0_0_t mca_fbtl_base_module_1_0_0_t; +typedef mca_fbtl_base_module_1_0_0_t mca_fbtl_base_module_t; + +END_C_DECLS + +#endif /* OMPI_MCA_FBTL_H */ diff --git a/ompi/mca/fbtl/posix/Makefile.am b/ompi/mca/fbtl/posix/Makefile.am new file mode 100644 index 0000000000..2eb48821c6 --- /dev/null +++ b/ompi/mca/fbtl/posix/Makefile.am @@ -0,0 +1,50 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fbtl_posix_DSO +component_noinst = +component_install = mca_fbtl_posix.la +else +component_noinst = libmca_fbtl_posix.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fbtl_posix_la_SOURCES = $(sources) +mca_fbtl_posix_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fbtl_posix_la_SOURCES = $(sources) +libmca_fbtl_posix_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + fbtl_posix.h \ + fbtl_posix.c \ + fbtl_posix_component.c \ + fbtl_posix_preadv.c \ + fbtl_posix_ipreadv.c \ + fbtl_posix_pwritev.c \ + fbtl_posix_ipwritev.c diff --git a/ompi/mca/fbtl/posix/fbtl_posix.c b/ompi/mca/fbtl/posix/fbtl_posix.c new file mode 100644 index 0000000000..348cb86a10 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/posix/fbtl_posix.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fbtl_base_module_1_0_0_t posix = { + mca_fbtl_posix_module_init, /* initalise after being selected */ + mca_fbtl_posix_module_finalize, /* close a module on a communicator */ + mca_fbtl_posix_preadv, + mca_fbtl_posix_ipreadv, + mca_fbtl_posix_pwritev, + mca_fbtl_posix_ipwritev +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fbtl_posix_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) { + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_posix_component_file_query (mca_io_ompio_file_t *fh, int *priority) { + *priority = mca_fbtl_posix_priority; + + if (UFS == fh->f_fstype) { + if (*priority < 50) { + *priority = 50; + } + } + + return &posix; +} + +int mca_fbtl_posix_component_file_unquery (mca_io_ompio_file_t *file) { + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fbtl_posix_module_init (mca_io_ompio_file_t *file) { + return OMPI_SUCCESS; +} + + +int mca_fbtl_posix_module_finalize (mca_io_ompio_file_t *file) { + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/posix/fbtl_posix.h b/ompi/mca/fbtl/posix/fbtl_posix.h new file mode 100644 index 0000000000..5a36561ec5 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FBTL_POSIX_H +#define MCA_FBTL_POSIX_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +extern int mca_fbtl_posix_priority; + +BEGIN_C_DECLS + +int mca_fbtl_posix_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_posix_component_file_query (mca_io_ompio_file_t *file, int *priority); +int mca_fbtl_posix_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fbtl_posix_module_init (mca_io_ompio_file_t *file); +int mca_fbtl_posix_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +size_t mca_fbtl_posix_preadv (mca_io_ompio_file_t *file, + int *sorted); +size_t mca_fbtl_posix_pwritev (mca_io_ompio_file_t *file, + int *sorted); +size_t mca_fbtl_posix_ipreadv (mca_io_ompio_file_t *file, + int *sorted, + ompi_request_t **request); +size_t mca_fbtl_posix_ipwritev (mca_io_ompio_file_t *file, + int *sorted, + ompi_request_t **request); + +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FBTL_POSIX_H */ diff --git a/ompi/mca/fbtl/posix/fbtl_posix_component.c b/ompi/mca/fbtl/posix/fbtl_posix_component.c new file mode 100644 index 0000000000..aeb4d93289 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix_component.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fbtl_posix.h" +#include "mpi.h" + +/* + * Public string showing the fbtl posix component version number + */ +const char *mca_fbtl_posix_component_version_string = + "OMPI/MPI posix FBTL MCA component version " OMPI_VERSION; + +int mca_fbtl_posix_priority = 10; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fbtl_base_component_2_0_0_t mca_fbtl_posix_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FBTL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "posix", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + NULL, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fbtl_posix_component_init_query, /* get thread level */ + mca_fbtl_posix_component_file_query, /* get priority and actions */ + mca_fbtl_posix_component_file_unquery /* undo what was done by previous function */ +}; diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c new file mode 100644 index 0000000000..af39955380 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipreadv.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_posix.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_posix_ipreadv (mca_io_ompio_file_t *file, + int *sorted, + ompi_request_t **request) +{ + printf ("POSIX IPREADV\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c new file mode 100644 index 0000000000..fbd2732995 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix_ipwritev.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_posix.h" + +#include "mpi.h" +#include +#include +#include +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_posix_ipwritev (mca_io_ompio_file_t *fh, + int *sorted, + ompi_request_t **request) +{ + int i; + int num_req = 0; + int merge = 0; + size_t k; + char *merge_buf = NULL; + size_t merge_length = 0; + OMPI_MPI_OFFSET_TYPE merge_offset = 0; + struct aiocb *aiocbp; + + aiocbp = (struct aiocb *) malloc (sizeof(struct aiocb) * + fh->f_num_of_io_entries); + if (NULL == aiocbp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + merge_length = fh->f_io_array[sorted[i]].length; + } + merge_length += fh->f_io_array[sorted[i+1]].length; + merge++; + continue; + } + } + + if (merge) { + merge_buf = malloc (merge_length); + if (NULL == merge_buf) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[sorted[i-merge]].memory_address, + fh->f_io_array[sorted[i-merge]].length); + k += fh->f_io_array[sorted[i-merge]].length; + merge --; + } + + aiocbp[num_req].aio_offset = merge_offset; + aiocbp[num_req].aio_buf = merge_buf; + aiocbp[num_req].aio_nbytes = merge_length; + aiocbp[num_req].aio_fildes = fh->fd; + aiocbp[num_req].aio_reqprio = 0; + aiocbp[num_req].aio_sigevent.sigev_notify = SIGEV_NONE; + + if (-1 == aio_write(&aiocbp[num_req])) { + perror("aio_write() error"); + return OMPI_ERROR; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + aiocbp[num_req].aio_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + aiocbp[num_req].aio_buf = + fh->f_io_array[sorted[i]].memory_address; + aiocbp[num_req].aio_nbytes = fh->f_io_array[sorted[i]].length; + aiocbp[num_req].aio_fildes = fh->fd; + aiocbp[num_req].aio_reqprio = 0; + aiocbp[num_req].aio_sigevent.sigev_notify = SIGEV_NONE; + + if (-1 == aio_write(&aiocbp[num_req])) { + perror("aio_write() error"); + return OMPI_ERROR; + } + } + num_req ++; + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + merge_length = fh->f_io_array[i].length; + } + merge_length += fh->f_io_array[i+1].length; + merge++; + continue; + } + } + + if (merge) { + merge_buf = malloc (merge_length); + if (NULL == merge_buf) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[i-merge].memory_address, + fh->f_io_array[i-merge].length); + k += fh->f_io_array[i-merge].length; + merge --; + } + aiocbp[num_req].aio_offset = merge_offset; + aiocbp[num_req].aio_buf = merge_buf; + aiocbp[num_req].aio_nbytes = merge_length; + aiocbp[num_req].aio_fildes = fh->fd; + aiocbp[num_req].aio_reqprio = 0; + aiocbp[num_req].aio_sigevent.sigev_notify = SIGEV_NONE; + + if (-1 == aio_write(&aiocbp[num_req])) { + perror("aio_write() error"); + return OMPI_ERROR; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + aiocbp[num_req].aio_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + aiocbp[num_req].aio_buf = fh->f_io_array[i].memory_address; + aiocbp[num_req].aio_nbytes = fh->f_io_array[i].length; + aiocbp[num_req].aio_fildes = fh->fd; + aiocbp[num_req].aio_reqprio = 0; + aiocbp[num_req].aio_sigevent.sigev_notify = SIGEV_NONE; + + if (-1 == aio_write(&aiocbp[num_req])) { + perror("aio_write() error"); + return OMPI_ERROR; + } + } + num_req ++; + } + } + /* + ompi_grequest_start (mca_fbtl_aio_query_fn, + mca_fbtl_aio_free_fn, + mca_fbtl_aio_cancel_fn, + mca_fbtl_aio_poll_fn, + mca_fbtl_aio_wait_fn, + request); + */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c new file mode 100644 index 0000000000..f137cfb433 --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_posix.h" + +#include "mpi.h" +#include +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_posix_preadv (mca_io_ompio_file_t *fh, + int *sorted) +{ + /*int *fp = NULL;*/ + int i, block=1; + struct iovec *iov = NULL; + int iov_count = 0; + OMPI_MPI_OFFSET_TYPE iov_offset = 0; +#if 0 + int k; + int merge = 0; + char *merge_buf = NULL; + size_t merge_length = 0; + OMPI_MPI_OFFSET_TYPE merge_offset = 0; +#endif + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + /* + fp = (int *)fh->fd; + if (0 == *fp) + { + return OMPI_ERROR; + } + */ + +#if 1 + iov = (struct iovec *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (0 == iov_count) { + iov[iov_count].iov_base = fh->f_io_array[sorted[i]].memory_address; + iov[iov_count].iov_len = fh->f_io_array[sorted[i]].length; + iov_offset = (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset; + iov_count ++; + } + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + iov[iov_count].iov_base = + fh->f_io_array[sorted[i+1]].memory_address; + iov[iov_count].iov_len = fh->f_io_array[sorted[i+1]].length; + iov_count ++; + continue; + } + } + + if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { + perror ("fseek"); + return OMPI_ERROR; + } + if (-1 == readv (fh->fd, iov, iov_count)) { + perror ("writev"); + return OMPI_ERROR; + } + else { + iov_count = 0; + } + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (0 == iov_count) { + iov[iov_count].iov_base = fh->f_io_array[i].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i].length; + iov_offset = (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset; + iov_count ++; + } + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + iov[iov_count].iov_base = + fh->f_io_array[i+1].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i+1].length; + iov_count ++; + continue; + } + } + + if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { + perror ("fseek"); + return OMPI_ERROR; + } + if (-1 == readv (fh->fd, iov, iov_count)) { + perror ("writev"); + return OMPI_ERROR; + } + else { + iov_count = 0; + } + } + } + if (NULL != iov) { + free (iov); + iov = NULL; + } +#endif +#if 0 + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + merge_length = fh->f_io_array[sorted[i]].length; + } + merge_length += fh->f_io_array[sorted[i+1]].length; + merge++; + continue; + } + } + if (merge) { + merge_buf = malloc (merge_length); + if (-1 == pread(fh->fd, + merge_buf, + merge_length, + merge_offset)) { + perror("pread() error"); + return OMPI_ERROR; + } + k = 0; + while (merge >= 0) { + memcpy (fh->f_io_array[sorted[i-merge]].memory_address, + merge_buf + k, + fh->f_io_array[sorted[i-merge]].length); + k += fh->f_io_array[sorted[i-merge]].length; + merge --; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + if (-1 == pread(fh->fd, + fh->f_io_array[sorted[i]].memory_address, + fh->f_io_array[sorted[i]].length, + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset)) { + perror("pread() error"); + return OMPI_ERROR; + } + } + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + merge_length = fh->f_io_array[i].length; + } + merge_length += fh->f_io_array[i+1].length; + merge++; + continue; + } + } + if (merge) { + merge_buf = malloc (merge_length); + if (-1 == pread(fh->fd, + merge_buf, + merge_length, + merge_offset)) { + perror("pread() error"); + return OMPI_ERROR; + } + k = 0; + while (merge >= 0) { + memcpy (fh->f_io_array[i-merge].memory_address, + merge_buf + k, + fh->f_io_array[i-merge].length); + k += fh->f_io_array[i-merge].length; + merge --; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + if (-1 == pread(fh->fd, + fh->f_io_array[i].memory_address, + fh->f_io_array[i].length, + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset)) { + perror("pread() error"); + return OMPI_ERROR; + } + } + } + } +#endif + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c new file mode 100644 index 0000000000..d1069ccbdc --- /dev/null +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_posix.h" + +#include "mpi.h" +#include +#include +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_posix_pwritev (mca_io_ompio_file_t *fh, + int *sorted) +{ + /*int *fp = NULL;*/ + int i, block = 1; + struct iovec *iov = NULL; + int iov_count = 0; + OMPI_MPI_OFFSET_TYPE iov_offset = 0; +#if 0 + int merge = 0; + size_t k; + char *merge_buf = NULL; + size_t merge_length = 0; + OMPI_MPI_OFFSET_TYPE merge_offset = 0; +#endif + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + /* + fp = (int *)fh->fd; + if (0 == *fp) + { + return OMPI_ERROR; + } + */ +#if 1 + + iov = (struct iovec *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (0 == iov_count) { + iov[iov_count].iov_base = fh->f_io_array[sorted[i]].memory_address; + iov[iov_count].iov_len = fh->f_io_array[sorted[i]].length; + iov_offset = (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset; + iov_count ++; + } + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + iov[iov_count].iov_base = + fh->f_io_array[sorted[i+1]].memory_address; + iov[iov_count].iov_len = fh->f_io_array[sorted[i+1]].length; + iov_count ++; + continue; + } + } + + if (-1 == lseek (fh->fd, iov_offset, SEEK_SET)) { + perror ("lseek"); + return OMPI_ERROR; + } + + if (-1 == writev (fh->fd, iov, iov_count)) { + perror ("writev"); + return OMPI_ERROR; + } + iov_count = 0; + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (0 == iov_count) { + iov[iov_count].iov_base = fh->f_io_array[i].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i].length; + iov_offset = (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset; + iov_count ++; + } + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= iov_count) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + iov[iov_count].iov_base = + fh->f_io_array[i+1].memory_address; + iov[iov_count].iov_len = fh->f_io_array[i+1].length; + iov_count ++; + continue; + } + } + /* + printf ("RANK: %d Entries: %d count: %d\n", + fh->f_rank, + fh->f_num_of_io_entries, + iov_count); + for (j=0 ; jfd, iov_offset, SEEK_SET)) { + perror ("lseek"); + return OMPI_ERROR; + } + + if (-1 == writev (fh->fd, iov, iov_count)) { + perror ("writev"); + return OMPI_ERROR; + } + iov_count = 0; + } + } + + if (NULL != iov) { + free (iov); + iov = NULL; + } + +#endif +#if 0 + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + merge_length = fh->f_io_array[sorted[i]].length; + } + merge_length += fh->f_io_array[sorted[i+1]].length; + merge++; + continue; + } + } + /* + printf ("RANK: %d Entries: %d MERGE: %d \n", fh->f_rank,fh->f_num_of_io_entries,merge); + if (merge) + printf ("Merge size: %d\n",merge_length); + else + printf ("entry size: %d\n",fh->f_io_array[sorted[i]].length); + sleep(5); + */ + if (merge) { + merge_buf = malloc (merge_length); + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[sorted[i-merge]].memory_address, + fh->f_io_array[sorted[i-merge]].length); + k += fh->f_io_array[sorted[i-merge]].length; + merge --; + } + if (-1 == pwrite(fh->fd, + merge_buf, + merge_length, merge_offset)) { + perror("write() error"); + return OMPI_ERROR; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + if (-1 == pwrite(fh->fd, + fh->f_io_array[sorted[i]].memory_address, + fh->f_io_array[sorted[i]].length, + (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset)) { + perror("pwrite() error"); + return OMPI_ERROR; + } + } + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + merge_length = fh->f_io_array[i].length; + } + merge_length += fh->f_io_array[i+1].length; + merge++; + continue; + } + } + /* + printf ("RANK: %d Entries: %d MERGE: %d \n", fh->f_rank,fh->f_num_of_io_entries,merge); + if (merge) + printf ("Merge size: %d\n",merge_length); + else + printf ("entry size: %d\n",fh->f_io_array[i].length); + sleep(2); + */ + if (merge) { + merge_buf = malloc (merge_length); + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[i-merge].memory_address, + fh->f_io_array[i-merge].length); + k += fh->f_io_array[i-merge].length; + merge --; + } + if (-1 == pwrite(fh->fd, + merge_buf, + merge_length, + merge_offset)) { + perror("write() error"); + return OMPI_ERROR; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + if (-1 == pwrite(fh->fd, + fh->f_io_array[i].memory_address, + fh->f_io_array[i].length, + (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset)) { + perror("pwrite() error"); + return OMPI_ERROR; + } + } + } + } +#endif + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/pvfs2/Makefile.am b/ompi/mca/fbtl/pvfs2/Makefile.am new file mode 100644 index 0000000000..3b053f2ce1 --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fbtl_pvfs2_DSO +component_noinst = +component_install = mca_fbtl_pvfs2.la +else +component_noinst = libmca_fbtl_pvfs2.la +component_install = +endif + +# Source files + +fbtl_pvfs2_sources = \ + fbtl_pvfs2.h \ + fbtl_pvfs2.c \ + fbtl_pvfs2_component.c \ + fbtl_pvfs2_preadv.c \ + fbtl_pvfs2_ipreadv.c \ + fbtl_pvfs2_pwritev.c \ + fbtl_pvfs2_ipwritev.c + +AM_CPPFLAGS = $(fbtl_pvfs2_CPPFLAGS) + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fbtl_pvfs2_la_SOURCES = $(fbtl_pvfs2_sources) +mca_fbtl_pvfs2_la_LIBADD = +mca_fbtl_pvfs2_la_LDFLAGS = -module -avoid-version $(fbtl_pvfs2_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fbtl_pvfs2_la_SOURCES = $(fbtl_pvfs2_sources) +libmca_fbtl_pvfs2_la_LIBADD = +libmca_fbtl_pvfs2_la_LDFLAGS = -module -avoid-version $(fbtl_pvfs2_LDFLAGS) diff --git a/ompi/mca/fbtl/pvfs2/configure.m4 b/ompi/mca/fbtl/pvfs2/configure.m4 new file mode 100644 index 0000000000..1123d5ec4f --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_fbtl_pvfs2_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_fbtl_pvfs2_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/fbtl/pvfs2/Makefile]) + + OMPI_CHECK_PVFS2([fbtl_pvfs2], + [fbtl_pvfs2_happy="yes"], + [fbtl_pvfs2_happy="no"]) + + AS_IF([test "$fbtl_pvfs2_happy" = "yes"], + [fbtl_pvfs2_WRAPPER_EXTRA_LDFLAGS="$fbtl_pvfs2_LDFLAGS" + fbtl_pvfs2_WRAPPER_EXTRA_LIBS="$fbtl_pvfs2_LIBS" + $1], + [$2]) + + AC_CHECK_HEADERS([pvfs2.h], [], + [AC_CHECK_HEADERS([pvfs2.h], [], [$2], + [AC_INCLUDES_DEFAULT])], + [AC_INCLUDES_DEFAULT]) + + + # substitute in the things needed to build pvfs2 + AC_SUBST([fbtl_pvfs2_CFLAGS]) + AC_SUBST([fbtl_pvfs2_CPPFLAGS]) + AC_SUBST([fbtl_pvfs2_LDFLAGS]) + AC_SUBST([fbtl_pvfs2_LIBS]) +])dnl diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c new file mode 100644 index 0000000000..b8481b0fa5 --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fbtl_base_module_1_0_0_t pvfs2 = { + mca_fbtl_pvfs2_module_init, /* initalise after being selected */ + mca_fbtl_pvfs2_module_finalize, /* close a module on a communicator */ + mca_fbtl_pvfs2_preadv, + mca_fbtl_pvfs2_ipreadv, + mca_fbtl_pvfs2_pwritev, + mca_fbtl_pvfs2_ipwritev +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fbtl_pvfs2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) { + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) { + *priority = mca_fbtl_pvfs2_priority; + + if (PVFS2 == fh->f_fstype) { + if (*priority < 50) { + *priority = 50; + } + } + + return &pvfs2; +} + +int mca_fbtl_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) { + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fbtl_pvfs2_module_init (mca_io_ompio_file_t *file) { + return OMPI_SUCCESS; +} + + +int mca_fbtl_pvfs2_module_finalize (mca_io_ompio_file_t *file) { + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h new file mode 100644 index 0000000000..d095971d5c --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FBTL_PVFS2_H +#define MCA_FBTL_PVFS2_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/fs/pvfs2/fs_pvfs2.h" +#include "pvfs2.h" +#include "pvfs2-compat.h" + +/* +#ifdef HAVE_PVFS2_H +#include "pvfs2.h" +#endif + +#ifdef PVFS2_VERSION_MAJOR +#include "pvfs2-compat.h" +#endif +*/ +extern int mca_fbtl_pvfs2_priority; + +BEGIN_C_DECLS + +int mca_fbtl_pvfs2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fbtl_base_module_1_0_0_t * +mca_fbtl_pvfs2_component_file_query (mca_io_ompio_file_t *file, int *priority); +int mca_fbtl_pvfs2_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fbtl_pvfs2_module_init (mca_io_ompio_file_t *file); +int mca_fbtl_pvfs2_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fbtl_base_component_2_0_0_t mca_fbtl_pvfs2_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +size_t mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *file, + int *sorted); +size_t mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *file, + int *sorted); +size_t mca_fbtl_pvfs2_ipreadv (mca_io_ompio_file_t *file, + int *sorted, ompi_request_t **request); +size_t mca_fbtl_pvfs2_ipwritev (mca_io_ompio_file_t *file, + int *sorted, ompi_request_t **request); + +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FBTL_PVFS2_H */ diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c new file mode 100644 index 0000000000..9ae5cd22e5 --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_component.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fbtl_pvfs2.h" +#include "mpi.h" + +/* + * Public string showing the fbtl pvfs2 component version number + */ +const char *mca_fbtl_pvfs2_component_version_string = + "OMPI/MPI pvfs2 FBTL MCA component version " OMPI_VERSION; + +int mca_fbtl_pvfs2_priority = 0; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fbtl_base_component_2_0_0_t mca_fbtl_pvfs2_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FBTL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "pvfs2", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + NULL, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fbtl_pvfs2_component_init_query, /* get thread level */ + mca_fbtl_pvfs2_component_file_query, /* get priority and actions */ + mca_fbtl_pvfs2_component_file_unquery /* undo what was done by previous function */ +}; diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c new file mode 100644 index 0000000000..29015059b8 --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipreadv.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_pvfs2_ipreadv (mca_io_ompio_file_t *file, + int *sorted, ompi_request_t **request) +{ + printf ("PVFS2 IPREADV\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c new file mode 100644 index 0000000000..b7224e8d0b --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_ipwritev.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fbtl_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_pvfs2_ipwritev (mca_io_ompio_file_t *file, + int *sorted, ompi_request_t **request) +{ + printf ("PVFS2 IPWRITEV\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c new file mode 100644 index 0000000000..e6555a833c --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_preadv.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fbtl_pvfs2.h" + +#include "mpi.h" +#include +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_pvfs2_preadv (mca_io_ompio_file_t *fh, + int *sorted) +{ + int i; + int ret; + size_t k; + int merge = 0; + char *merge_buf = NULL; + size_t merge_length = 0; + OMPI_MPI_OFFSET_TYPE merge_offset = 0; + PVFS_sysresp_io resp_io; + PVFS_Request file_req; + PVFS_Request mem_req; + mca_fs_pvfs2 *pvfs2_fs; + + pvfs2_fs = (mca_fs_pvfs2 *)fh->f_fs_ptr; + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + merge_length = fh->f_io_array[sorted[i]].length; + } + merge_length += fh->f_io_array[sorted[i+1]].length; + merge++; + continue; + } + } + if (merge) { + merge_buf = malloc (merge_length); + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_read(pvfs2_fs->object_ref, + file_req, + merge_offset, + merge_buf, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + + k = 0; + while (merge >= 0) { + memcpy (fh->f_io_array[sorted[i-merge]].memory_address, + merge_buf + k, + fh->f_io_array[sorted[i-merge]].length); + k += fh->f_io_array[sorted[i-merge]].length; + merge --; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + ret = PVFS_Request_contiguous (fh->f_io_array[sorted[i]].length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (fh->f_io_array[sorted[i]].length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_read(pvfs2_fs->object_ref, + file_req, + (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset, + fh->f_io_array[sorted[i]].memory_address, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + } + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + merge_length = fh->f_io_array[i].length; + } + merge_length += fh->f_io_array[i+1].length; + merge++; + continue; + } + } + if (merge) { + merge_buf = malloc (merge_length); + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_read (pvfs2_fs->object_ref, + file_req, + merge_offset, + merge_buf, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + + k = 0; + while (merge >= 0) { + memcpy (fh->f_io_array[i-merge].memory_address, + merge_buf + k, + fh->f_io_array[i-merge].length); + k += fh->f_io_array[i-merge].length; + merge --; + } + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_read (pvfs2_fs->object_ref, + file_req, + (OMPI_MPI_OFFSET_TYPE) + fh ->f_io_array[i].offset, + fh->f_io_array[i].memory_address, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + } + } + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c new file mode 100644 index 0000000000..99ecd5209f --- /dev/null +++ b/ompi/mca/fbtl/pvfs2/fbtl_pvfs2_pwritev.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fbtl_pvfs2.h" + +#include "mpi.h" +#include +#include +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fbtl/fbtl.h" + +size_t +mca_fbtl_pvfs2_pwritev (mca_io_ompio_file_t *fh, + int *sorted) +{ + int i; + int merge = 0; + int ret; + size_t k; + char *merge_buf = NULL; + size_t merge_length = 0; + OMPI_MPI_OFFSET_TYPE merge_offset = 0; + PVFS_sysresp_io resp_io; + PVFS_Request file_req; + PVFS_Request mem_req; + mca_fs_pvfs2 *pvfs2_fs; + + pvfs2_fs = (mca_fs_pvfs2 *)fh->f_fs_ptr; + + if (NULL == fh->f_io_array) { + return OMPI_ERROR; + } + + if (NULL != sorted) { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i]].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[sorted[i]].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[sorted[i+1]].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset; + merge_length = fh->f_io_array[sorted[i]].length; + } + merge_length += fh->f_io_array[sorted[i+1]].length; + merge++; + continue; + } + } + /* + printf ("RANK: %d Entries: %d MERGE: %d \n", fh->f_rank,fh->f_num_of_io_entries,merge); + if (merge) + printf ("Merge size: %d\n",merge_length); + else + printf ("entry size: %d\n",fh->f_io_array[sorted[i]].length); + sleep(5); + */ + if (merge) { + merge_buf = malloc (merge_length); + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[sorted[i-merge]].memory_address, + fh->f_io_array[sorted[i-merge]].length); + k += fh->f_io_array[sorted[i-merge]].length; + merge --; + } + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_write (pvfs2_fs->object_ref, + file_req, + merge_offset, + merge_buf, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + ret = PVFS_Request_contiguous (fh->f_io_array[sorted[i]].length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (fh->f_io_array[sorted[i]].length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_write (pvfs2_fs->object_ref, + file_req, + (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[sorted[i]].offset, + fh->f_io_array[sorted[i]].memory_address, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + } + } + } + + else { + for (i=0 ; if_num_of_io_entries ; i++) { + if (fh->f_num_of_io_entries != i+1) { + if (((OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i].offset + + (OPAL_PTRDIFF_TYPE)fh->f_io_array[i].length) == + (OMPI_MPI_OFFSET_TYPE)fh->f_io_array[i+1].offset) { + if (!merge) { + merge_offset = (OMPI_MPI_OFFSET_TYPE) + fh->f_io_array[i].offset; + merge_length = fh->f_io_array[i].length; + } + merge_length += fh->f_io_array[i+1].length; + merge++; + continue; + } + } + /* + printf ("RANK: %d Entries: %d MERGE: %d \n", fh->f_rank,fh->f_num_of_io_entries,merge); + if (merge) + printf ("Merge size: %lld\n",merge_length); + else + printf ("entry size: %lld\n",fh->f_io_array[i].length); + sleep(2); + */ + if (merge) { + merge_buf = malloc (merge_length); + k = 0; + while (merge >= 0) { + memcpy (merge_buf + k, + fh->f_io_array[i-merge].memory_address, + fh->f_io_array[i-merge].length); + k += fh->f_io_array[i-merge].length; + merge --; + } + + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (merge_length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_write (pvfs2_fs->object_ref, + file_req, + merge_offset, + merge_buf, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + + merge = 0; + merge_offset = 0; + merge_length = 0; + if (NULL != merge_buf) { + free (merge_buf); + merge_buf = NULL; + } + } + else { + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, + &mem_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_Request_contiguous (fh->f_io_array[i].length, + PVFS_BYTE, + &file_req); + if (ret != 0) { + perror("PVFS_Request_contiguous() error"); + return OMPI_ERROR; + } + ret = PVFS_sys_write (pvfs2_fs->object_ref, + file_req, + (OMPI_MPI_OFFSET_TYPE) + fh ->f_io_array[i].offset, + fh->f_io_array[i].memory_address, + mem_req, + &(pvfs2_fs->credentials), + &resp_io); + if (ret != 0) { + perror("PVFS_sys_write() error"); + return OMPI_ERROR; + } + } + } + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/Makefile.am b/ompi/mca/fcache/Makefile.am new file mode 100644 index 0000000000..2e8fe219a5 --- /dev/null +++ b/ompi/mca/fcache/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_fcache.la +libmca_fcache_la_SOURCES = + +# local files +headers = fcache.h +libmca_fcache_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +nobase_ompi_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/ompi/mca/fcache/base/Makefile.am b/ompi/mca/fcache/base/Makefile.am new file mode 100644 index 0000000000..a133dac6a0 --- /dev/null +++ b/ompi/mca/fcache/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_fcache_la_SOURCES += \ + base/fcache_base_close.c \ + base/fcache_base_file_select.c \ + base/fcache_base_file_unselect.c \ + base/fcache_base_find_available.c \ + base/fcache_base_open.c diff --git a/ompi/mca/fcache/base/base.h b/ompi/mca/fcache/base/base.h new file mode 100644 index 0000000000..66d9a6bccb --- /dev/null +++ b/ompi/mca/fcache/base/base.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * MCA fcache base framework public interface functions. + */ + +#ifndef MCA_FCACHE_BASE_H +#define MCA_FCACHE_BASE_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/fcache/fcache.h" +#include "opal/mca/mca.h" + + +BEGIN_C_DECLS + +OMPI_DECLSPEC int mca_fcache_base_open(void); + +OMPI_DECLSPEC int mca_fcache_base_close(void); + +OMPI_DECLSPEC int mca_fcache_base_file_select(struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred); + +OMPI_DECLSPEC int mca_fcache_base_file_unselect(struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fcache_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads); + +OMPI_DECLSPEC int mca_fcache_base_init_file (struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fcache_base_get_param (struct mca_io_ompio_file_t *file, int keyval); +/* + * Globals + */ + +OMPI_DECLSPEC extern int mca_fcache_base_param; +OMPI_DECLSPEC extern int mca_fcache_base_output; + +OMPI_DECLSPEC extern bool mca_fcache_base_components_opened_valid; +OMPI_DECLSPEC extern bool mca_fcache_base_components_available_valid; + +OMPI_DECLSPEC extern opal_list_t mca_fcache_base_components_opened; +OMPI_DECLSPEC extern opal_list_t mca_fcache_base_components_available; + +END_C_DECLS + +#endif /* MCA_BASE_FCACHE_H */ diff --git a/ompi/mca/fcache/base/fcache_base_close.c b/ompi/mca/fcache/base/fcache_base_close.c new file mode 100644 index 0000000000..3b749210d2 --- /dev/null +++ b/ompi/mca/fcache/base/fcache_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHTOB$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "ompi/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/base/base.h" + +int mca_fcache_base_close(void) +{ + /* + Close all components that are still open. This may be the opened + list (if we're in ompi_info), or it may be the available list (if + we're anywhere else). + */ + + if (mca_fcache_base_components_opened_valid) { + mca_base_components_close(mca_fcache_base_output, + &mca_fcache_base_components_opened, NULL); + OBJ_DESTRUCT(&mca_fcache_base_components_opened); + mca_fcache_base_components_opened_valid = false; + } else if (mca_fcache_base_components_available_valid) { + mca_base_components_close(mca_fcache_base_output, + &mca_fcache_base_components_available, NULL); + OBJ_DESTRUCT(&mca_fcache_base_components_available); + mca_fcache_base_components_available_valid = false; + } + + /* Close the output stream for this framework */ + opal_output_close (mca_fcache_base_output); + + /* All done */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/base/fcache_base_file_select.c b/ompi/mca/fcache/base/fcache_base_file_select.c new file mode 100644 index 0000000000..6f50d96aab --- /dev/null +++ b/ompi/mca/fcache/base/fcache_base_file_select.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/class/opal_list.h" +#include "opal/util/argv.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a opal_list_t which contains + * these modules + */ +struct queried_module_t { + opal_list_item_t super; + mca_fcache_base_component_t *om_component; + mca_fcache_base_module_t *om_module; +}; +typedef struct queried_module_t queried_module_t; +static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); + + +/* + * Only one fcache module can be attached to each file. + * + * This module calls the query funtion on all the components that were + * detected by fcache_base_open. This function is called on a + * per-file basis. This function has the following function. + * + * 1. Iterate over the list of available_components + * 2. Call the query function on each of these components. + * 3. query function returns the structure containing pointers + * to its module and its priority + * 4. Select the module with the highest priority + * 5. Call the init function on the selected module so that it does the + * right setup for the file + * 6. Call finalize on all the other modules which returned + * their module but were unfortunate to not get selected + */ + +int mca_fcache_base_file_select (struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred) +{ + int priority; + int best_priority; + opal_list_item_t *item; + opal_list_item_t *next_item; + mca_base_component_priority_list_item_t *selectable_item; + char *names, **name_array; + int num_names; + mca_base_component_priority_list_item_t *cpli; + mca_fcache_base_component_t *component; + mca_fcache_base_component_t *best_component; + mca_fcache_base_module_t *module; + opal_list_t queried; + queried_module_t *om; + opal_list_t *selectable; + char *str; + int err = MPI_SUCCESS; + int i; + bool was_selectable_constructed = false; + + /* Check and see if a preferred component was provided. If it was + provided then it should be used (if possible) */ + + if (NULL != preferred) { + + /* We have a preferred component. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_component_name[0]); + + opal_output_verbose(10, mca_fcache_base_output, + "fcache:base:file_select: Checking preferred component: %s", + str); + + /* query the component for its priority and get its module + structure. This is necessary to proceed */ + + component = (mca_fcache_base_component_t *)preferred; + module = component->fcachem_file_query (&priority); + if (NULL != module && + NULL != module->fcache_module_init) { + + /* this query seems to have returned something legitimate + * and we can now go ahead and initialize the + * file with it * but first, the functions which + * are null need to be filled in */ + + /*fill_null_pointers (module);*/ + file->f_fcache = module; + file->f_fcache_component = preferred; + + return module->fcache_module_init(file); + } + /* His preferred component is present, but is unable to + * run. This is not a good sign. We should try selecting + * some other component We let it fall through and select + * from the list of available components + */ + } /*end of selection for preferred component */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred component was provided but for some reason was + * not able to be selected + * 2. No preferred component was provided + * + * All we need to do is to go through the list of available + * components and find the one which has the highest priority and + * use that for this file + */ + + /* Check if anything was requested by means on the name parameters */ + names = NULL; + mca_base_param_lookup_string (mca_fcache_base_param, &names); + + if (NULL != names && 0 < strlen(names)) { + name_array = opal_argv_split (names, ','); + num_names = opal_argv_count (name_array); + + opal_output_verbose(10, mca_fcache_base_output, + "fcache:base:file_Select: Checking all available module"); + + /* since there are somethings which the mca requested through the + if the intersection is NULL, then we barf saying that the requested + modules are not being available */ + + selectable = OBJ_NEW(opal_list_t); + was_selectable_constructed = true; + + /* go through the compoents_available list and check against the names + * to see whether this can be added or not */ + + for (item = opal_list_get_first(&mca_fcache_base_components_available); + item != opal_list_get_end(&mca_fcache_base_components_available); + item = opal_list_get_next(item)) { + /* convert the opal_list_item_t returned into the proper type */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fcache_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fcache_base_output, + "select: initialising %s component %s", + component->fcachem_version.mca_type_name, + component->fcachem_version.mca_component_name); + + /* check if this name is present in the mca_base_params */ + for (i=0; i < num_names; i++) { + if (0 == strcmp(name_array[i], component->fcachem_version.mca_component_name)) { + /* this is present, and should be added o the selectable list */ + + /* We need to create a seperate object to initialise this list with + * since we cannot have the same item in 2 lists */ + + selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); + *selectable_item = *cpli; + opal_list_append (selectable, (opal_list_item_t *)selectable_item); + break; + } + } + } + + /* check for a NULL intersection between the available list and the + * list which was asked for */ + + if (0 == opal_list_get_size(selectable)) { + was_selectable_constructed = true; + OBJ_RELEASE (selectable); + opal_output_verbose (10, mca_fcache_base_output, + "fcache:base:file_select: preferred modules were not available"); + return OMPI_ERROR; + } + } else { /* if there was no name_array, then we need to simply initialize + selectable to mca_fcache_base_components_available */ + selectable = &mca_fcache_base_components_available; + } + + best_component = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, opal_list_t); + + for (item = opal_list_get_first(selectable); + item != opal_list_get_end(selectable); + item = opal_list_get_next(item)) { + /* + * convert the opal_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fcache_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fcache_base_output, + "select: initialising %s component %s", + component->fcachem_version.mca_type_name, + component->fcachem_version.mca_component_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == component->fcachem_file_query) { + opal_output_verbose(10, mca_fcache_base_output, + "select: no query, ignoring the component"); + } else { + /* + * call the query function and see what it returns + */ + module = component->fcachem_file_query (&priority); + + if (NULL == module || + NULL == module->fcache_module_init) { + /* + * query did not return any action which can be used + */ + opal_output_verbose(10, mca_fcache_base_output, + "select: query returned failure"); + } else { + opal_output_verbose(10, mca_fcache_base_output, + "select: query returned priority %d", + priority); + /* + * is this the best component we have found till now? + */ + if (priority > best_priority) { + best_priority = priority; + best_component = component; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_component = component; + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); + } /* end else of if (NULL == module) */ + } /* end else of if (NULL == component->fcachem_init) */ + } /* end for ... end of traversal */ + + /* We have to remove empty out the selectable list if the selectable + * list was constructed as a duplicate and not as a pointer to the + * mca_base_components_available list. So, check and destroy */ + + if (was_selectable_constructed) { + + /* remove all the items first */ + for (item = opal_list_get_first(&mca_fcache_base_components_available); + item != opal_list_get_end(&mca_fcache_base_components_available); + item = next_item) { + next_item = opal_list_get_next(item); + OBJ_RELEASE (item); + } + + /* release the list itself */ + OBJ_RELEASE (selectable); + was_selectable_constructed = false; + } + + /* + * Now we have alist of components which successfully returned + * their module struct. One of these components has the best + * priority. The rest have to be comm_unqueried to counter the + * effects of file_query'ing them. Finalize happens only on + * components which should are initialized. + */ + if (NULL == best_component) { + /* + * This typically means that there was no component which was + * able to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of components which have successfully + * returned their priorities from the query. We now have to + * unquery() those components which have not been selected and + * init() the component which was selected + */ + for (item = opal_list_remove_first(&queried); + NULL != item; + item = opal_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_component == best_component) { + /* + * this is the chosen component, we have to initialise the + * module of this component. + * + * ANJU: a component might not have all the functions + * defined. Whereever a function pointer is null in the + * module structure we need to fill it in with the base + * structure function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through and + * elease the other objects + */ + + /*fill_null_pointers (om->om_module);*/ + file->f_fcache = om->om_module; + err = om->om_module->fcache_module_init(file); + file->f_fcache_component = (mca_base_component_t *)best_component; + + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_component->fcachem_file_unquery) { + /* unquery the component only if they have some clean + * up job to do. Components which are queried but do + * not actually do anything typically do not have a + * unquery. Hence this check is necessary + */ + (void) om->om_component->fcachem_file_unquery(file); + opal_output_verbose(10, mca_fcache_base_output, + "select: component %s is not selected", + om->om_component->fcachem_version.mca_component_name); + } /* end if */ + } /* if not best component */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + opal_output_verbose(10, mca_fcache_base_output, + "select: component %s selected", + best_component->fcachem_version.mca_component_name); + + OBJ_DESTRUCT(&queried); + + return err; +} diff --git a/ompi/mca/fcache/base/fcache_base_file_unselect.c b/ompi/mca/fcache/base/fcache_base_file_unselect.c new file mode 100644 index 0000000000..8cfa967aa3 --- /dev/null +++ b/ompi/mca/fcache/base/fcache_base_file_unselect.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "mpi.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "opal/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/base/base.h" + +int mca_fcache_base_file_unselect(mca_io_ompio_file_t *file) +{ + if (NULL != file->f_fcache && NULL != file->f_fcache->fcache_module_finalize) { + return file->f_fcache->fcache_module_finalize(file); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/base/fcache_base_find_available.c b/ompi/mca/fcache/base/fcache_base_find_available.c new file mode 100644 index 0000000000..ef3276b97e --- /dev/null +++ b/ompi/mca/fcache/base/fcache_base_find_available.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/class/opal_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/base/base.h" + +opal_list_t mca_fcache_base_modules_available; +bool mca_fcache_base_modules_available_valid = false; + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); + +int mca_fcache_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads) +{ + bool found = false; + mca_base_component_priority_list_item_t *entry; + opal_list_item_t *p; + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_fcache_base_components_available, opal_list_t); + mca_fcache_base_components_available_valid = true; + + /* The list of components which we should check is already present + in mca_fcache_base_components_opened, which was established in + mca_fcache_base_open */ + + for (found = false, + p = opal_list_remove_first (&mca_fcache_base_components_opened); + NULL != p; + p = opal_list_remove_first (&mca_fcache_base_components_opened)) { + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->super.cli_component = + ((mca_base_component_list_item_t *)p)->cli_component; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->super.cli_component, entry, + enable_progress_threads, + enable_mpi_threads)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + opal_list_append (&mca_fcache_base_components_available, + (opal_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_component_repository_release(entry->super.cli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_fcache_base_components_opened); + mca_fcache_base_components_opened_valid = false; + + /* There should atleast be one fcache component which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_fcache_base_components_available); + mca_fcache_base_components_available_valid = false; + opal_output_verbose (10, mca_fcache_base_output, + "fcache:find_available: no fcache components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + opal_output_verbose(10, mca_fcache_base_output, + "fcache:find_available: querying fcache component %s", + m->mca_component_name); + + /* This component has been successfully opened, now try to query it */ + if (2 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_2_0_0(m, entry, enable_progress_threads, + enable_mpi_threads); + } else { + /* unrecognised API version */ + opal_output_verbose(10, mca_fcache_base_output, + "fcache:find_available:unrecognised fcache API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + opal_output_verbose(10, mca_fcache_base_output, + "fcache:find_available fcache component %s is not available", + m->mca_component_name); + if (NULL != m->mca_close_component) { + m->mca_close_component(); + } + } else { + opal_output_verbose(10, mca_fcache_base_output, + "fcache:find_avalable: fcache component %s is available", + m->mca_component_name); + + } + /* All done */ + return ret; +} + + +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_fcache_base_component_2_0_0_t *fcache = (mca_fcache_base_component_2_0_0_t *) component; + + return fcache->fcachem_init_query(enable_progress_threads, + enable_mpi_threads); +} diff --git a/ompi/mca/fcache/base/fcache_base_open.c b/ompi/mca/fcache/base/fcache_base_open.c new file mode 100644 index 0000000000..259003e3ab --- /dev/null +++ b/ompi/mca/fcache/base/fcache_base_open.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include + +#include "ompi/class/ompi_free_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/show_help.h" + +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#ifdef __WINDOWS__ + const mca_base_component_t *mca_fcache_base_static_components[] = {NULL}; +#else +#include "ompi/mca/fcache/base/static-components.h" +#endif + +/* + * Global variables; most of which are loaded by back-ends of MCA + * variables + */ +int mca_fcache_base_param = -1; +int mca_fcache_base_output = -1; + +opal_list_t mca_fcache_base_components_opened; +opal_list_t mca_fcache_base_components_available; + +bool mca_fcache_base_components_available_valid = false; +bool mca_fcache_base_components_opened_valid = false; + +mca_fcache_base_component_t mca_fcache_base_selected_component; +mca_fcache_base_module_t mca_fcache; + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int mca_fcache_base_open(void) +{ + /* Open an output stream for this framework */ + + mca_fcache_base_output = opal_output_open(NULL); + + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("fcache", mca_fcache_base_output, + mca_fcache_base_static_components, + &mca_fcache_base_components_opened, true)) { + return OMPI_ERROR; + } + mca_fcache_base_components_opened_valid = true; + + /* Find the index of the MCA "fcache" param for selection */ + + mca_fcache_base_param = mca_base_param_find("fcache", "base", NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/base/static-components.h b/ompi/mca/fcache/base/static-components.h new file mode 100644 index 0000000000..fddbe26106 --- /dev/null +++ b/ompi/mca/fcache/base/static-components.h @@ -0,0 +1,18 @@ +/* + * $HEADER$ + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + + +const mca_base_component_t *mca_fcache_base_static_components[] = { + + NULL +}; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + diff --git a/ompi/mca/fcache/fcache.h b/ompi/mca/fcache/fcache.h new file mode 100644 index 0000000000..949eac873e --- /dev/null +++ b/ompi/mca/fcache/fcache.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_FCACHE_H +#define OMPI_MCA_FCACHE_H + +#include "ompi_config.h" +#include "mpi.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +BEGIN_C_DECLS + +struct mca_io_ompio_file_t; +struct mca_io_ompio_io_servers; +/* + * Macro for use in components that are of type coll + */ +#define MCA_FCACHE_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "fcache", 2, 0, 0 + +/* + * These are the component function prototypes. These function pointers + * go into the component structure. These functions (query() and finalize() + * are called during fcache_base_select(). Each component is query() ied + * and subsequently, all the unselected components are finalize() 'ed + * so that any *stuff* they did during query() can be undone. By + * similar logic, finalize() is also called on the component which + * was selected when the communicator is being destroyed. + * + * So, to sum it up, every component carries 4 functions: + * 1. open() - called during MPI_INIT + * 2. close() - called during MPI_FINALIZE + * 3. query() - called to select a particular component + * 4. finalize() - called when actions taken during query have + * to be undone + */ + +/* + * **************** component struct ******************************* + * *********** These functions go in the component struct ********** + * **************** component struct ******************************* + */ + +typedef int (*mca_fcache_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, + bool enable_mpi_threads); + +typedef struct mca_fcache_base_module_1_0_0_t * +(*mca_fcache_base_component_file_query_1_0_0_fn_t) (int *priority); + +typedef int (*mca_fcache_base_component_file_unquery_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +/* + * ****************** component struct ****************************** + * Structure for fcache v2.0.0 components.This is chained to MCA v2.0.0 + * ****************** component struct ****************************** + */ +struct mca_fcache_base_component_2_0_0_t { + mca_base_component_t fcachem_version; + mca_base_component_data_t fcachem_data; + + mca_fcache_base_component_init_query_1_0_0_fn_t fcachem_init_query; + mca_fcache_base_component_file_query_1_0_0_fn_t fcachem_file_query; + mca_fcache_base_component_file_unquery_1_0_0_fn_t fcachem_file_unquery; +}; +typedef struct mca_fcache_base_component_2_0_0_t mca_fcache_base_component_2_0_0_t; +typedef struct mca_fcache_base_component_2_0_0_t mca_fcache_base_component_t; + +/* + * *********************************************************************** + * ************************ Interface function definitions ************** + * These are the typedefcache for the function pointers to various fcache + * backend functions which will be used by the various fcache components + * *********************************************************************** + */ + +typedef int (*mca_fcache_base_module_init_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fcache_base_module_finalize_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fcache_base_module_get_file_layout_fn_t)( + char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers); + +typedef int (*mca_fcache_base_module_set_file_layout_fn_t)( + char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers); + +typedef int (*mca_fcache_base_module_get_io_servers_fn_t)( + char* filename, + struct mca_io_ompio_io_servers *io_servers, + int num_io_servers); + +/* + * *********************************************************************** + * *************************** module structure ************************* + * *********************************************************************** + */ +struct mca_fcache_base_module_1_0_0_t { + /* + * Per-file initialization function. This is called only + * on the module which is selected. The finalize corresponding to + * this function is present on the component struct above + */ + mca_fcache_base_module_init_1_0_0_fn_t fcache_module_init; + mca_fcache_base_module_finalize_1_0_0_fn_t fcache_module_finalize; + + /* FCACHE function pointers */ + mca_fcache_base_module_get_file_layout_fn_t fcache_get_file_layout; + mca_fcache_base_module_set_file_layout_fn_t fcache_set_file_layout; + mca_fcache_base_module_get_io_servers_fn_t fcache_get_io_servers; +}; +typedef struct mca_fcache_base_module_1_0_0_t mca_fcache_base_module_1_0_0_t; +typedef mca_fcache_base_module_1_0_0_t mca_fcache_base_module_t; + +END_C_DECLS + +#endif /* OMPI_MCA_FCACHE_H */ diff --git a/ompi/mca/fcache/ux/Makefile.am b/ompi/mca/fcache/ux/Makefile.am new file mode 100644 index 0000000000..d13eb0077e --- /dev/null +++ b/ompi/mca/fcache/ux/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcache_ux_DSO +component_noinst = +component_install = mca_fcache_ux.la +else +component_noinst = libmca_fcache_ux.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcache_ux_la_SOURCES = $(sources) +mca_fcache_ux_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcache_ux_la_SOURCES = $(sources) +libmca_fcache_ux_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + fcache_ux.h \ + fcache_ux.c \ + fcache_ux_component.c \ + fcache_ux_get_file_layout.c \ + fcache_ux_set_file_layout.c \ + fcache_ux_get_io_servers.c diff --git a/ompi/mca/fcache/ux/fcache_ux.c b/ompi/mca/fcache/ux/fcache_ux.c new file mode 100644 index 0000000000..d6913b1683 --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/fcache/ux/fcache_ux.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcache_base_module_1_0_0_t ux = { + mca_fcache_ux_module_init, /* initalise after being selected */ + mca_fcache_ux_module_finalize, /* close a module on a communicator */ + mca_fcache_ux_get_file_layout, + mca_fcache_ux_set_file_layout, + mca_fcache_ux_get_io_servers +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fcache_ux_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fcache_base_module_1_0_0_t * +mca_fcache_ux_component_file_query (int *priority) +{ + *priority = 20; + + return &ux; +} + +int mca_fcache_ux_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcache_ux_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcache_ux_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/ux/fcache_ux.h b/ompi/mca/fcache/ux/fcache_ux.h new file mode 100644 index 0000000000..5962fe5342 --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCACHE_UX_H +#define MCA_FCACHE_UX_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/mca/io/ompio/io_ompio.h" + + +BEGIN_C_DECLS + +int mca_fcache_ux_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcache_base_module_1_0_0_t * +mca_fcache_ux_component_file_query (int *priority); +int mca_fcache_ux_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcache_ux_module_init (mca_io_ompio_file_t *file); +int mca_fcache_ux_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fcache_base_component_2_0_0_t mca_fcache_ux_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_fcache_ux_get_file_layout (char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers); + +int mca_fcache_ux_set_file_layout (char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers); + +int mca_fcache_ux_get_io_servers (char* filename, + struct mca_io_ompio_io_servers *io_servers, + int num_io_servers); +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FCACHE_UX_H */ diff --git a/ompi/mca/fcache/ux/fcache_ux_component.c b/ompi/mca/fcache/ux/fcache_ux_component.c new file mode 100644 index 0000000000..e17d000519 --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux_component.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcache_ux.h" +#include "mpi.h" + +/* + * Public string showing the fcache ux component version number + */ +const char *mca_fcache_ux_component_version_string = + "OMPI/MPI ux FCACHE MCA component version " OMPI_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcache_base_component_2_0_0_t mca_fcache_ux_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FCACHE_BASE_VERSION_2_0_0, + + /* Component name and version */ + "ux", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + NULL, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fcache_ux_component_init_query, /* get thread level */ + mca_fcache_ux_component_file_query, /* get priority and actions */ + mca_fcache_ux_component_file_unquery /* undo what was done by previous function */ +}; diff --git a/ompi/mca/fcache/ux/fcache_ux_get_file_layout.c b/ompi/mca/fcache/ux/fcache_ux_get_file_layout.c new file mode 100644 index 0000000000..9ae20876a6 --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux_get_file_layout.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fcache_ux.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fcache/fcache.h" + +int +mca_fcache_ux_get_file_layout (char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers) +{ + printf ("UX GET FILE LAYOUT\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/ux/fcache_ux_get_io_servers.c b/ompi/mca/fcache/ux/fcache_ux_get_io_servers.c new file mode 100644 index 0000000000..3f9930534c --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux_get_io_servers.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fcache_ux.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fcache/fcache.h" + +int +mca_fcache_ux_get_io_servers (char* filename, + struct mca_io_ompio_io_servers *io_servers, + int num_io_servers) +{ + printf ("UX GET IO SERVERS\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcache/ux/fcache_ux_set_file_layout.c b/ompi/mca/fcache/ux/fcache_ux_set_file_layout.c new file mode 100644 index 0000000000..006c6d4074 --- /dev/null +++ b/ompi/mca/fcache/ux/fcache_ux_set_file_layout.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fcache_ux.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fcache/fcache.h" + +int +mca_fcache_ux_set_file_layout (char* filename, + int *num_io_servers, + size_t *depth, + int *file_io_servers) +{ + printf ("UX SET FILE LAYOUT\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/Makefile.am b/ompi/mca/fcoll/Makefile.am new file mode 100644 index 0000000000..0af2ae045e --- /dev/null +++ b/ompi/mca/fcoll/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_fcoll.la +libmca_fcoll_la_SOURCES = + +# local files +headers = fcoll.h +libmca_fcoll_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +nobase_ompi_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/ompi/mca/fcoll/base/Makefile.am b/ompi/mca/fcoll/base/Makefile.am new file mode 100644 index 0000000000..eec2a277b3 --- /dev/null +++ b/ompi/mca/fcoll/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_fcoll_la_SOURCES += \ + base/fcoll_base_close.c \ + base/fcoll_base_file_select.c \ + base/fcoll_base_file_unselect.c \ + base/fcoll_base_find_available.c \ + base/fcoll_base_open.c diff --git a/ompi/mca/fcoll/base/base.h b/ompi/mca/fcoll/base/base.h new file mode 100644 index 0000000000..cd0f94e7be --- /dev/null +++ b/ompi/mca/fcoll/base/base.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * MCA fcoll base framework public interface functions. + */ + +#ifndef MCA_FCOLL_BASE_H +#define MCA_FCOLL_BASE_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "opal/mca/mca.h" + + +BEGIN_C_DECLS + +OMPI_DECLSPEC int mca_fcoll_base_open(void); + +OMPI_DECLSPEC int mca_fcoll_base_close(void); + +OMPI_DECLSPEC int mca_fcoll_base_file_select(struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred); +OMPI_DECLSPEC int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, + char *name); +OMPI_DECLSPEC int mca_fcoll_base_file_unselect(struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fcoll_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads); + +OMPI_DECLSPEC int mca_fcoll_base_init_file (struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fcoll_base_get_param (struct mca_io_ompio_file_t *file, int keyval); +/* + * Globals + */ + +OMPI_DECLSPEC extern int mca_fcoll_base_param; +OMPI_DECLSPEC extern int mca_fcoll_base_output; + +OMPI_DECLSPEC extern bool mca_fcoll_base_components_opened_valid; +OMPI_DECLSPEC extern bool mca_fcoll_base_components_available_valid; + +OMPI_DECLSPEC extern opal_list_t mca_fcoll_base_components_opened; +OMPI_DECLSPEC extern opal_list_t mca_fcoll_base_components_available; + +END_C_DECLS + +#endif /* MCA_BASE_FCOLL_H */ diff --git a/ompi/mca/fcoll/base/fcoll_base_close.c b/ompi/mca/fcoll/base/fcoll_base_close.c new file mode 100644 index 0000000000..3ae8e4967e --- /dev/null +++ b/ompi/mca/fcoll/base/fcoll_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHTOB$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "ompi/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + +int mca_fcoll_base_close(void) +{ + /* + Close all components that are still open. This may be the opened + list (if we're in ompi_info), or it may be the available list (if + we're anywhere else). + */ + + if (mca_fcoll_base_components_opened_valid) { + mca_base_components_close(mca_fcoll_base_output, + &mca_fcoll_base_components_opened, NULL); + OBJ_DESTRUCT(&mca_fcoll_base_components_opened); + mca_fcoll_base_components_opened_valid = false; + } else if (mca_fcoll_base_components_available_valid) { + mca_base_components_close(mca_fcoll_base_output, + &mca_fcoll_base_components_available, NULL); + OBJ_DESTRUCT(&mca_fcoll_base_components_available); + mca_fcoll_base_components_available_valid = false; + } + + /* Close the output stream for this framework */ + opal_output_close (mca_fcoll_base_output); + + /* All done */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/base/fcoll_base_file_select.c b/ompi/mca/fcoll/base/fcoll_base_file_select.c new file mode 100644 index 0000000000..9c56a2a8e5 --- /dev/null +++ b/ompi/mca/fcoll/base/fcoll_base_file_select.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/class/opal_list.h" +#include "opal/util/argv.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a opal_list_t which contains + * these modules + */ +struct queried_module_t { + opal_list_item_t super; + mca_fcoll_base_component_t *om_component; + mca_fcoll_base_module_t *om_module; +}; +typedef struct queried_module_t queried_module_t; +static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); + + +/* + * Only one fcoll module can be attached to each file. + * + * This module calls the query funtion on all the components that were + * detected by fcoll_base_open. This function is called on a + * per-file basis. This function has the following function. + * + * 1. Iterate over the list of available_components + * 2. Call the query function on each of these components. + * 3. query function returns the structure containing pointers + * to its module and its priority + * 4. Select the module with the highest priority + * 5. Call the init function on the selected module so that it does the + * right setup for the file + * 6. Call finalize on all the other modules which returned + * their module but were unfortunate to not get selected + */ + +int mca_fcoll_base_file_select (struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred) +{ + int priority; + int best_priority; + opal_list_item_t *item; + opal_list_item_t *next_item; + char *names, **name_array; + int num_names; + mca_base_component_priority_list_item_t *cpli; + mca_fcoll_base_component_t *component; + mca_fcoll_base_component_t *best_component; + mca_fcoll_base_module_t *module; + opal_list_t queried; + queried_module_t *om; + opal_list_t *selectable; + char *str; + int err = MPI_SUCCESS; + int i; + bool was_selectable_constructed = false; + + /* Check and see if a preferred component was provided. If it was + provided then it should be used (if possible) */ + if (NULL != preferred) { + + /* We have a preferred component. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_component_name[0]); + + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:base:file_select: Checking preferred component: %s", + str); + + /* query the component for its priority and get its module + structure. This is necessary to proceed */ + + component = (mca_fcoll_base_component_t *)preferred; + module = component->fcollm_file_query (file, &priority); + if (NULL != module && + NULL != module->fcoll_module_init) { + + /* this query seems to have returned something legitimate + * and we can now go ahead and initialize the + * file with it * but first, the functions which + * are null need to be filled in */ + + /*fill_null_pointers (module);*/ + file->f_fcoll = module; + file->f_fcoll_component = preferred; + + return module->fcoll_module_init(file); + } + /* His preferred component is present, but is unable to + * run. This is not a good sign. We should try selecting + * some other component We let it fall through and select + * from the list of available components + */ + } /*end of selection for preferred component */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred component was provided but for some reason was + * not able to be selected + * 2. No preferred component was provided + * + * All we need to do is to go through the list of available + * components and find the one which has the highest priority and + * use that for this file + */ + + /* Check if anything was requested by means on the name parameters */ + names = NULL; + mca_base_param_lookup_string (mca_fcoll_base_param, &names); + + if (NULL != names && 0 < strlen(names)) { + name_array = opal_argv_split (names, ','); + num_names = opal_argv_count (name_array); + + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:base:file_Select: Checking all available module"); + + /* since there are somethings which the mca requested through the + if the intersection is NULL, then we barf saying that the requested + modules are not being available */ + + selectable = OBJ_NEW(opal_list_t); + was_selectable_constructed = true; + + /* go through the compoents_available list and check against the names + * to see whether this can be added or not */ + + for (item = opal_list_get_first(&mca_fcoll_base_components_available); + item != opal_list_get_end(&mca_fcoll_base_components_available); + item = opal_list_get_next(item)) { + /* convert the opal_list_item_t returned into the proper type */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fcoll_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fcoll_base_output, + "select: initialising %s component %s", + component->fcollm_version.mca_type_name, + component->fcollm_version.mca_component_name); + + /* check if this name is present in the mca_base_params */ + for (i=0; i < num_names; i++) { + if (0 == strcmp(name_array[i], component->fcollm_version.mca_component_name)) { + /* this is present, and should be added o the selectable list */ + + /* We need to create a seperate object to initialise this list with + * since we cannot have the same item in 2 lists */ + module = component->fcollm_file_query (file, &priority); + if (NULL != module && + NULL != module->fcoll_module_init) { + + file->f_fcoll = module; + file->f_fcoll_component = (mca_base_component_t *)component; + return module->fcoll_module_init(file); + } + + /* + selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); + *selectable_item = *cpli; + opal_list_append (selectable, (opal_list_item_t *)selectable_item); + break;*/ + } + } + } + + /* check for a NULL intersection between the available list and the + * list which was asked for */ + + if (0 == opal_list_get_size(selectable)) { + was_selectable_constructed = true; + OBJ_RELEASE (selectable); + opal_output_verbose (10, mca_fcoll_base_output, + "fcoll:base:file_select: preferred modules were not available"); + return OMPI_ERROR; + } + } else { /* if there was no name_array, then we need to simply initialize + selectable to mca_fcoll_base_components_available */ + selectable = &mca_fcoll_base_components_available; + } + + best_component = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, opal_list_t); + + for (item = opal_list_get_first(selectable); + item != opal_list_get_end(selectable); + item = opal_list_get_next(item)) { + /* + * convert the opal_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fcoll_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fcoll_base_output, + "select: initialising %s component %s", + component->fcollm_version.mca_type_name, + component->fcollm_version.mca_component_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == component->fcollm_file_query) { + opal_output_verbose(10, mca_fcoll_base_output, + "select: no query, ignoring the component"); + } else { + /* + * call the query function and see what it returns + */ + module = component->fcollm_file_query (file, &priority); + + if (NULL == module || + NULL == module->fcoll_module_init) { + /* + * query did not return any action which can be used + */ + opal_output_verbose(10, mca_fcoll_base_output, + "select: query returned failure"); + } else { + opal_output_verbose(10, mca_fcoll_base_output, + "select: query returned priority %d", + priority); + /* + * is this the best component we have found till now? + */ + if (priority > best_priority) { + best_priority = priority; + best_component = component; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_component = component; + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); + } /* end else of if (NULL == module) */ + } /* end else of if (NULL == component->fcollm_init) */ + } /* end for ... end of traversal */ + + /* We have to remove empty out the selectable list if the selectable + * list was constructed as a duplicate and not as a pointer to the + * mca_base_components_available list. So, check and destroy */ + + if (was_selectable_constructed) { + + /* remove all the items first */ + for (item = opal_list_get_first(&mca_fcoll_base_components_available); + item != opal_list_get_end(&mca_fcoll_base_components_available); + item = next_item) { + next_item = opal_list_get_next(item); + OBJ_RELEASE (item); + } + + /* release the list itself */ + OBJ_RELEASE (selectable); + was_selectable_constructed = false; + } + + /* + * Now we have alist of components which successfully returned + * their module struct. One of these components has the best + * priority. The rest have to be comm_unqueried to counter the + * effects of file_query'ing them. Finalize happens only on + * components which should are initialized. + */ + if (NULL == best_component) { + /* + * This typically means that there was no component which was + * able to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of components which have successfully + * returned their priorities from the query. We now have to + * unquery() those components which have not been selected and + * init() the component which was selected + */ + for (item = opal_list_remove_first(&queried); + NULL != item; + item = opal_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_component == best_component) { + /* + * this is the chosen component, we have to initialise the + * module of this component. + * + * ANJU: a component might not have all the functions + * defined. Whereever a function pointer is null in the + * module structure we need to fill it in with the base + * structure function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through and + * elease the other objects + */ + + /*fill_null_pointers (om->om_module);*/ + file->f_fcoll = om->om_module; + err = om->om_module->fcoll_module_init(file); + file->f_fcoll_component = (mca_base_component_t *)best_component; + /* + printf ("SELECTED: %s\n", best_component->fcollm_version.mca_component_name); + */ + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_component->fcollm_file_unquery) { + /* unquery the component only if they have some clean + * up job to do. Components which are queried but do + * not actually do anything typically do not have a + * unquery. Hence this check is necessary + */ + (void) om->om_component->fcollm_file_unquery(file); + opal_output_verbose(10, mca_fcoll_base_output, + "select: component %s is not selected", + om->om_component->fcollm_version.mca_component_name); + } /* end if */ + } /* if not best component */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + opal_output_verbose(10, mca_fcoll_base_output, + "select: component %s selected", + best_component->fcollm_version.mca_component_name); + + OBJ_DESTRUCT(&queried); + + return err; +} + +int mca_fcoll_base_query_table (struct mca_io_ompio_file_t *file, char *name) +{ + if (!strcmp (name, "individual")) { + if ((int)file->f_cc_size >= file->f_bytes_per_agg && + file->f_cc_size >= file->f_stripe_size) { + return 1; + } + } + if (!strcmp (name, "dynamic")) { + if ((int)file->f_cc_size < file->f_bytes_per_agg && + file->f_cc_size >= file->f_stripe_size) { + return 1; + } + } + if (!strcmp (name, "two_phase")) { + if ((int)file->f_cc_size < file->f_bytes_per_agg && + file->f_cc_size < file->f_stripe_size) { + return 1; + } + } + return 0; +} diff --git a/ompi/mca/fcoll/base/fcoll_base_file_unselect.c b/ompi/mca/fcoll/base/fcoll_base_file_unselect.c new file mode 100644 index 0000000000..c59728cce0 --- /dev/null +++ b/ompi/mca/fcoll/base/fcoll_base_file_unselect.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "mpi.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "opal/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + +int mca_fcoll_base_file_unselect(mca_io_ompio_file_t *file) +{ + if (NULL != file->f_fcoll && NULL != file->f_fcoll->fcoll_module_finalize) { + return file->f_fcoll->fcoll_module_finalize(file); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/base/fcoll_base_find_available.c b/ompi/mca/fcoll/base/fcoll_base_find_available.c new file mode 100644 index 0000000000..c9ee1d9170 --- /dev/null +++ b/ompi/mca/fcoll/base/fcoll_base_find_available.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/class/opal_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + +opal_list_t mca_fcoll_base_modules_available; +bool mca_fcoll_base_modules_available_valid = false; + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); + +int mca_fcoll_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads) +{ + bool found = false; + mca_base_component_priority_list_item_t *entry; + opal_list_item_t *p; + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_fcoll_base_components_available, opal_list_t); + mca_fcoll_base_components_available_valid = true; + + /* The list of components which we should check is already present + in mca_fcoll_base_components_opened, which was established in + mca_fcoll_base_open */ + + for (found = false, + p = opal_list_remove_first (&mca_fcoll_base_components_opened); + NULL != p; + p = opal_list_remove_first (&mca_fcoll_base_components_opened)) { + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->super.cli_component = + ((mca_base_component_list_item_t *)p)->cli_component; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->super.cli_component, entry, + enable_progress_threads, + enable_mpi_threads)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + opal_list_append (&mca_fcoll_base_components_available, + (opal_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_component_repository_release(entry->super.cli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_fcoll_base_components_opened); + mca_fcoll_base_components_opened_valid = false; + + /* There should atleast be one fcoll component which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_fcoll_base_components_available); + mca_fcoll_base_components_available_valid = false; + opal_output_verbose (10, mca_fcoll_base_output, + "fcoll:find_available: no fcoll components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:find_available: querying fcoll component %s", + m->mca_component_name); + + /* This component has been successfully opened, now try to query it */ + if (2 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_2_0_0(m, entry, enable_progress_threads, + enable_mpi_threads); + } else { + /* unrecognised API version */ + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:find_available:unrecognised fcoll API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:find_available fcoll component %s is not available", + m->mca_component_name); + if (NULL != m->mca_close_component) { + m->mca_close_component(); + } + } else { + opal_output_verbose(10, mca_fcoll_base_output, + "fcoll:find_avalable: fcoll component %s is available", + m->mca_component_name); + + } + /* All done */ + return ret; +} + + +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_fcoll_base_component_2_0_0_t *fcoll = + (mca_fcoll_base_component_2_0_0_t *) component; + + return fcoll->fcollm_init_query(enable_progress_threads, + enable_mpi_threads); +} diff --git a/ompi/mca/fcoll/base/fcoll_base_open.c b/ompi/mca/fcoll/base/fcoll_base_open.c new file mode 100644 index 0000000000..384c1c74c2 --- /dev/null +++ b/ompi/mca/fcoll/base/fcoll_base_open.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include + +#include "ompi/class/ompi_free_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/show_help.h" + +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#ifdef __WINDOWS__ + const mca_base_component_t *mca_fcoll_base_static_components[] = {NULL}; +#else +#include "ompi/mca/fcoll/base/static-components.h" +#endif + +/* + * Global variables; most of which are loaded by back-ends of MCA + * variables + */ +int mca_fcoll_base_param = -1; +int mca_fcoll_base_output = -1; + +opal_list_t mca_fcoll_base_components_opened; +opal_list_t mca_fcoll_base_components_available; + +bool mca_fcoll_base_components_available_valid = false; +bool mca_fcoll_base_components_opened_valid = false; + +mca_fcoll_base_component_t mca_fcoll_base_selected_component; +mca_fcoll_base_module_t mca_fcoll; + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int mca_fcoll_base_open(void) +{ + /* Open an output stream for this framework */ + + mca_fcoll_base_output = opal_output_open(NULL); + + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("fcoll", mca_fcoll_base_output, + mca_fcoll_base_static_components, + &mca_fcoll_base_components_opened, true)) { + return OMPI_ERROR; + } + mca_fcoll_base_components_opened_valid = true; + + /* Find the index of the MCA "fcoll" param for selection */ + + mca_fcoll_base_param = mca_base_param_find("fcoll", "base", NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/base/static-components.h b/ompi/mca/fcoll/base/static-components.h new file mode 100644 index 0000000000..e4cb0fa68d --- /dev/null +++ b/ompi/mca/fcoll/base/static-components.h @@ -0,0 +1,18 @@ +/* + * $HEADER$ + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + + +const mca_base_component_t *mca_fcoll_base_static_components[] = { + + NULL +}; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + diff --git a/ompi/mca/fcoll/dynamic/Makefile.am b/ompi/mca/fcoll/dynamic/Makefile.am new file mode 100644 index 0000000000..1ef3487360 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_dynamic.h \ + fcoll_dynamic_module.c \ + fcoll_dynamic_component.c \ + fcoll_dynamic_file_read_all.c \ + fcoll_dynamic_file_read_all_begin.c \ + fcoll_dynamic_file_read_all_end.c \ + fcoll_dynamic_file_write_all.c \ + fcoll_dynamic_file_write_all_begin.c \ + fcoll_dynamic_file_write_all_end.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_dynamic_DSO +component_noinst = +component_install = mca_fcoll_dynamic.la +else +component_noinst = libmca_fcoll_dynamic.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_dynamic_la_SOURCES = $(sources) +mca_fcoll_dynamic_la_LDFLAGS = -module -avoid-version +mca_fcoll_dynamic_la_LIBADD = \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_dynamic_la_SOURCES =$(sources) +libmca_fcoll_dynamic_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic.h b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h new file mode 100644 index 0000000000..6a581ed2d7 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_DYNAMIC_EXPORT_H +#define MCA_FCOLL_DYNAMIC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_dynamic_priority; +extern int mca_fcoll_dynamic_num_io_procs; +extern int mca_fcoll_dynamic_constant_cbs; +extern int mca_fcoll_dynamic_cycle_buffer_size; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_component; + +/* API functions */ + +int mca_fcoll_dynamic_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_dynamic_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_dynamic_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_dynamic_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_dynamic_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +int mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_dynamic_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_dynamic_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +END_C_DECLS + +#endif /* MCA_FCOLL_DYNAMIC_EXPORT_H */ diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c new file mode 100644 index 0000000000..7b4b5677bf --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_component.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" +#include "mpi.h" + +/* + * Public string showing the fcoll ompi_dynamic component version number + */ +const char *mca_fcoll_dynamic_component_version_string = + "Open MPI dynamic collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_dynamic_priority = 10; +int mca_fcoll_dynamic_num_io_procs = -1; +int mca_fcoll_dynamic_constant_cbs = 0; +int mca_fcoll_dynamic_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + +/* + * Local function + */ +static int dynamic_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_dynamic_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "dynamic", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + dynamic_register, + NULL + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_fcoll_dynamic_component_init_query, + mca_fcoll_dynamic_component_file_query, + mca_fcoll_dynamic_component_file_unquery +}; + + +static int +dynamic_register(void) +{ + int param; + + param = mca_base_param_find ("fcoll", NULL, "dynamic_priority"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_dynamic_priority); + } + param = mca_base_param_find ("fcoll", NULL, "dynamic_num_io_procs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_dynamic_num_io_procs); + } + param = mca_base_param_find ("fcoll", NULL, "dynamic_constant_cbs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_dynamic_constant_cbs); + } + param = mca_base_param_find ("fcoll", NULL, "dynamic_cycle_buffer_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_dynamic_cycle_buffer_size); + } + + mca_base_param_reg_int (&mca_fcoll_dynamic_component.fcollm_version, + "priority", + "Priority of the dynamic fcoll component", + false, false, mca_fcoll_dynamic_priority, + &mca_fcoll_dynamic_priority); + mca_base_param_reg_int (&mca_fcoll_dynamic_component.fcollm_version, + "num_io_procs", + "Number of writers in the dynamic fcoll component", + false, false, mca_fcoll_dynamic_num_io_procs, + &mca_fcoll_dynamic_num_io_procs); + mca_base_param_reg_int (&mca_fcoll_dynamic_component.fcollm_version, + "constant_cbs", + "wether we are using constant or scaling cycle buffer size in the dynamic fcoll component", + false, false, mca_fcoll_dynamic_constant_cbs, + &mca_fcoll_dynamic_constant_cbs); + mca_base_param_reg_int (&mca_fcoll_dynamic_component.fcollm_version, + "cycle_buffer_size", + "Cycle Buffer Size of the dynamic fcoll component", + false, false, mca_fcoll_dynamic_cycle_buffer_size, + &mca_fcoll_dynamic_cycle_buffer_size); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c new file mode 100644 index 0000000000..4ea9cb1ac9 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +int +mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint position = 0; + MPI_Aint total_bytes = 0; /* total bytes to be read */ + MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + MPI_Aint bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0; + int cycles = 0; + int i=0, j=0, x=0; + int n=0; /* current position in total_bytes_per_process array */ + MPI_Aint bytes_remaining = 0; /* how many bytes have been read from the current + value from total_bytes_per_process */ + int bytes_received = 0; + int blocks = 0; + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + size_t current_position = 0; + char *receive_buf = NULL; + + /* global iovec at the readers that contain the iovecs created from + file_set_view */ + uint32_t total_fview_count = 0; + struct iovec *global_fview = NULL; + int local_count = 0; + struct iovec *iov = NULL; + int *fview_count = NULL; + int current_index; + + char *global_buf = NULL; + MPI_Aint global_count = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + size_t max_data = 0; + int *bytes_per_process = NULL; + MPI_Aint bytes_left = 0; + MPI_Aint *total_bytes_per_process = NULL; + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_dynamic_num_io_procs, + max_data); + } + + total_bytes_per_process = (MPI_Aint*)malloc + (fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompi_io_ompio_allgather_array (&max_data, + 1, + MPI_LONG, + total_bytes_per_process, + 1, + MPI_LONG, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + for (i=0 ; if_procs_per_group ; i++) { + total_bytes += total_bytes_per_process[i]; + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + /* + fh->f_comm->c_coll.coll_allreduce (&max_data, + &total_bytes, + 1, + MPI_DOUBLE, + MPI_SUM, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + */ + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this write *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, max_data, &iov, &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + /************************************************************* + *** ALLGather the File View information at all processes *** + *************************************************************/ + + fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompi_io_ompio_allgather_array (&local_count, + 1, + MPI_INT, + fview_count, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs[0] = 0; + total_fview_count = fview_count[0]; + for (i=1 ; if_procs_per_group ; i++) { + total_fview_count += fview_count[i]; + displs[i] = displs[i-1] + fview_count[i-1]; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (i=0 ; if_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + i, + fview_count[i], + displs[i]); + } + } + */ + /* allocate the global iovec */ + if (0 != total_fview_count) { + global_fview = (struct iovec*)malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_fview) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + if (fh->f_flags & OMPIO_UNIFORM_FVIEW) { + ompi_io_ompio_allgather_array (iov, + local_count, + fh->f_iov_type, + global_fview, + local_count, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + else { + ompi_io_ompio_allgatherv_array (iov, + local_count, + fh->f_iov_type, + global_fview, + fview_count, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + /* sort it */ + if (0 != total_fview_count) { + sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + ompi_io_ompio_sort_iovec (global_fview, total_fview_count, sorted); + } + if (NULL != iov) { + free (iov); + iov = NULL; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (i=0 ; if_rank, + global_fview[sorted[i]].iov_base, + global_fview[sorted[i]].iov_len); + } + } + */ + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* + * Calculate how many bytes are read in each cycle + */ + bytes_per_cycle = mca_fcoll_dynamic_cycle_buffer_size; + + cycles = ceil((double)total_bytes/bytes_per_cycle); + + n = 0; + bytes_remaining = 0; + current_index = 0; + + for (index = 0; index < cycles; index++) { + int k; + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + memset(displs, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + } + + if (cycles-1 == index) { + bytes_to_read_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %d**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } + */ + + /* Calculate how much data will be contributed in this cycle + by each process*/ + bytes_received = 0; + + while (bytes_to_read_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_read_in_cycle) { + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process[n] += bytes_remaining; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_remaining; + } + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process[n] += bytes_to_read_in_cycle; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + if (bytes_to_read_in_cycle < + global_fview[sorted[current_index]].iov_len) { + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process[n] += bytes_to_read_in_cycle; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += bytes_to_read_in_cycle; + } + bytes_remaining = global_fview[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_received += + global_fview[sorted[current_index]].iov_len; + } + bytes_to_read_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + /* + for (i=0 ; if_procs_per_group ; i++) { + printf ("Proc %d sending %d at %d\n", + i, + bytes_per_process[i], + displs[i]); + } + */ + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /********************************************************** + ******* Create the io array, and pass it to fbtl ********* + *********************************************************/ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + MPI_Aint bytes_to_read = global_count; + MPI_Aint *temp = NULL; + int block = 1; + k = 0; + + temp = (MPI_Aint *)malloc (sizeof(MPI_Aint) * fh->f_procs_per_group); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_procs_per_group*sizeof(MPI_Aint)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_read) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE *block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_to_read -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_left -= bytes_to_read; + bytes_to_read = 0;; + k ++; + break; + } + } + else { + if (bytes_to_read < global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = &global_buf[start+temp[n]]; + bytes_left = global_fview[sorted[x]].iov_len - bytes_to_read; + bytes_to_read = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_to_read -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_preadv (fh, NULL)) { + opal_output (1, "READ FAILED\n"); + return OMPI_ERROR; + } + } + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + /********************************************************** + ********* Scatter the Data from the readers ************** + *********************************************************/ + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_received) { + /* allocate a receive buffer and copy the data that needs + to be received into it in case the data is non-contigous + in memory */ + receive_buf = malloc (bytes_received); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + ompi_io_ompio_scatterv_array (global_buf, + bytes_per_process, + displs, + MPI_BYTE, + receive_buf, + bytes_received, + MPI_BYTE, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + position += bytes_received; + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_received; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + + /********************************************************** + **************** DONE SCATTERING OF DATA ***************** + *********************************************************/ + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_begin.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_begin.c new file mode 100644 index 0000000000..1faffe7c6d --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_dynamic_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC READ ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_end.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_end.c new file mode 100644 index 0000000000..e4ecd3b4c7 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_read_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_dynamic_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC READ ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c new file mode 100644 index 0000000000..62daa21a94 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all.c @@ -0,0 +1,687 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define TIME_BREAKDOWN 0 + +int +mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint total_bytes_written = 0; /* total bytes that have been written*/ + MPI_Aint total_bytes = 0; /* total bytes to be written */ + MPI_Aint bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + MPI_Aint bytes_per_cycle = 0; /* total written in each cycle by each process*/ + int index = 0; + int cycles = 0; + int i=0, j=0, x=0; + int n=0; /* current position in total_bytes_per_process array */ + MPI_Aint bytes_remaining = 0; /* how many bytes have been written from the current + value from total_bytes_per_process */ + int bytes_sent = 0; + int blocks = 0; + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + char *send_buf = NULL; + size_t current_position = 0; + + /* global iovec at the writers that contain the iovecs created from + file_set_view */ + uint32_t total_fview_count = 0; + struct iovec *global_fview = NULL; + int local_count = 0; + struct iovec *iov = NULL; + int *fview_count = NULL; + int current_index; + + char *global_buf = NULL; + MPI_Aint global_count = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + size_t max_data = 0; + int *bytes_per_process = NULL; + MPI_Aint bytes_left = 0; + MPI_Aint *total_bytes_per_process = NULL; +#if TIME_BREAKDOWN + double start_time=0, end_time=0, start_time2=0, end_time2=0; + double total=0 , total_io=0; +#endif + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_dynamic_num_io_procs, + max_data); + } + +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + start_time = MPI_Wtime(); + } +#endif + + total_bytes_per_process = (MPI_Aint*)malloc + (fh->f_procs_per_group*sizeof(MPI_Aint)); + if (NULL == total_bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompi_io_ompio_allgather_array (&max_data, + 1, + MPI_LONG, + total_bytes_per_process, + 1, + MPI_LONG, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + for (i=0 ; if_procs_per_group ; i++) { + total_bytes += total_bytes_per_process[i]; + } + + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this write *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, max_data, &iov, &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + /************************************************************* + *** ALLGather the File View information at all processes *** + *************************************************************/ + + fview_count = (int *) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == fview_count) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + ompi_io_ompio_allgather_array (&local_count, + 1, + MPI_INT, + fview_count, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + displs = (int*) malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs[0] = 0; + total_fview_count = fview_count[0]; + for (i=1 ; if_procs_per_group ; i++) { + total_fview_count += fview_count[i]; + displs[i] = displs[i-1] + fview_count[i-1]; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (i=0 ; if_procs_per_group ; i++) { + printf ("%d: PROCESS: %d ELEMENTS: %d DISPLS: %d\n", + fh->f_rank, + i, + fview_count[i], + displs[i]); + } + } + */ + /* allocate the global iovec */ + if (0 != total_fview_count) { + global_fview = (struct iovec*)malloc (total_fview_count * + sizeof(struct iovec)); + if (NULL == global_fview) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + if (fh->f_flags & OMPIO_UNIFORM_FVIEW) { + ompi_io_ompio_allgather_array (iov, + local_count, + fh->f_iov_type, + global_fview, + local_count, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + else { + ompi_io_ompio_allgatherv_array (iov, + local_count, + fh->f_iov_type, + global_fview, + fview_count, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + /* sort it */ + if (0 != total_fview_count) { + sorted = (int *)malloc (total_fview_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + ompi_io_ompio_sort_iovec (global_fview, total_fview_count, sorted); + } + + if (NULL != iov) { + free (iov); + iov = NULL; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (i=0 ; if_flags |= OMPIO_UNIFORM_FVIEW; + if (ompi_ddt_is_contiguous_memory_layout (datatype, count)) + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process = (int *)malloc (fh->f_procs_per_group * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* MSC THIS does not work at the moment + * Calculate how many bytes are written in each cycle + if (mca_fcoll_dynamic_constant_cbs) { + bytes_per_cycle = + mca_fcoll_dynamic_cycle_buffer_size/mca_fcoll_dynamic_num_io_procs; + } + else { + */ + bytes_per_cycle = mca_fcoll_dynamic_cycle_buffer_size; + + cycles = ceil((double)total_bytes/bytes_per_cycle); + + n = 0; + bytes_remaining = 0; + current_index = 0; + +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + end_time = MPI_Wtime(); + total = end_time-start_time; + printf ("%d: Preprocessing --- %f\n", fh->f_rank, total); + total = 0; + } +#endif + + for (index = 0; index < cycles; index++) { + int k; + +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + start_time = MPI_Wtime(); + } +#endif + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + memset(displs, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + } + + if (cycles-1 == index) { + bytes_to_write_in_cycle = total_bytes - bytes_per_cycle*index; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %lld**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } + */ + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + + /* Calculate how much data will be contributed in this cycle + by each process*/ + bytes_sent = 0; + + while (bytes_to_write_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_write_in_cycle) { + if (fh->f_procs_in_group[fh->f_aggregator_index] == + fh->f_rank) { + bytes_per_process[n] += bytes_remaining; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_remaining; + } + current_index ++; + bytes_to_write_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + if (fh->f_procs_in_group[fh->f_aggregator_index] == + fh->f_rank) { + bytes_per_process[n] += bytes_to_write_in_cycle; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_to_write_in_cycle; + } + bytes_remaining -= bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + } + else { + if (bytes_to_write_in_cycle < + global_fview[sorted[current_index]].iov_len) { + if (fh->f_procs_in_group[fh->f_aggregator_index] == + fh->f_rank) { + bytes_per_process[n] += bytes_to_write_in_cycle; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += bytes_to_write_in_cycle; + } + bytes_remaining = global_fview[sorted[current_index]].iov_len - + bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + else { + if (fh->f_procs_in_group[fh->f_aggregator_index] == + fh->f_rank) { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + } + if (fh->f_procs_in_group[n] == fh->f_rank) { + bytes_sent += global_fview[sorted[current_index]].iov_len; + } + bytes_to_write_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + /* + for (i=0 ; if_procs_per_group ; i++) { + printf ("Proc %d sending %lld at %lld\n", + i, + bytes_per_process[i], + displs[i]); + } + */ + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + send_buf = &((char*)buf)[total_bytes_written]; + } + else if (bytes_sent) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + send_buf = malloc (bytes_sent); + if (NULL == send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + remaining = bytes_sent; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *) mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + } + total_bytes_written += bytes_sent; + + /* Get the data from all processes to the writer*/ + ompi_io_ompio_gatherv_array (send_buf, + bytes_sent, + MPI_BYTE, + global_buf, + bytes_per_process, + displs, + MPI_BYTE, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + if (NULL != send_buf) { + free (send_buf); + send_buf = NULL; + } + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) + for (i=0 ; if_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + MPI_Aint bytes_to_write = global_count; + MPI_Aint *temp = NULL; + int block = 1; + k = 0; + + temp = (MPI_Aint *)malloc (sizeof(MPI_Aint) * fh->f_procs_per_group); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_procs_per_group*sizeof(MPI_Aint)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_to_write -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_left -= bytes_to_write; + bytes_to_write = 0;; + k ++; + break; + } + } + else { + if (bytes_to_write < global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_write; + bytes_to_write = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += fh->f_io_array[k].length; + bytes_to_write -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + start_time2 = MPI_Wtime(); + } +#endif + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_pwritev (fh, NULL)) { + opal_output (1, "WRITE FAILED\n"); + return OMPI_ERROR; + } + } +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + end_time2 = MPI_Wtime(); + total_io += end_time2-start_time2; + } +#endif + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE WRITING ************************ + *********************************************************/ + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + end_time = MPI_Wtime(); + total += end_time-start_time; + } +#endif + } +#if TIME_BREAKDOWN + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + printf ("%d: Total --- %f I/O ---- %f\n", fh->f_rank, total, total_io); + } +#endif + + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_begin.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_begin.c new file mode 100644 index 0000000000..af59fae369 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_dynamic_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC WRITE ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_end.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_end.c new file mode 100644 index 0000000000..9a6a554d82 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_file_write_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_dynamic_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC WRITE ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c new file mode 100644 index 0000000000..115cb37184 --- /dev/null +++ b/ompi/mca/fcoll/dynamic/fcoll_dynamic_module.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_dynamic.h" + +#include + +#include "mpi.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t dynamic = { + mca_fcoll_dynamic_module_init, + mca_fcoll_dynamic_module_finalize, + mca_fcoll_dynamic_file_read_all, + mca_fcoll_dynamic_file_read_all_begin, + mca_fcoll_dynamic_file_read_all_end, + mca_fcoll_dynamic_file_write_all, + mca_fcoll_dynamic_file_write_all_begin, + mca_fcoll_dynamic_file_write_all_end +}; + +int +mca_fcoll_dynamic_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_dynamic_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_dynamic_priority; + if (0 >= mca_fcoll_dynamic_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "dynamic")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &dynamic; +} + +int mca_fcoll_dynamic_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_dynamic_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_dynamic_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/fcoll.h b/ompi/mca/fcoll/fcoll.h new file mode 100644 index 0000000000..e6defe8968 --- /dev/null +++ b/ompi/mca/fcoll/fcoll.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_FCOLL_H +#define OMPI_MCA_FCOLL_H + +#include "ompi_config.h" +#include "mpi.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +BEGIN_C_DECLS + +struct mca_io_ompio_file_t; + +/* + * Macro for use in components that are of type coll + */ +#define MCA_FCOLL_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "fcoll", 2, 0, 0 + +/* + * These are the component function prototypes. These function pointers + * go into the component structure. These functions (query() and finalize() + * are called during fcoll_base_select(). Each component is query() ied + * and subsequently, all the unselected components are finalize() 'ed + * so that any *stuff* they did during query() can be undone. By + * similar logic, finalize() is also called on the component which + * was selected when the communicator is being destroyed. + * + * So, to sum it up, every component carries 4 functions: + * 1. open() - called during MPI_INIT + * 2. close() - called during MPI_FINALIZE + * 3. query() - called to select a particular component + * 4. finalize() - called when actions taken during query have + * to be undone + */ + +/* + * **************** component struct ******************************* + * *********** These functions go in the component struct ********** + * **************** component struct ******************************* + */ + +typedef int (*mca_fcoll_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, + bool enable_mpi_threads); + +typedef struct mca_fcoll_base_module_1_0_0_t * +(*mca_fcoll_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, + int *priority); + +typedef int (*mca_fcoll_base_component_file_unquery_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +/* + * ****************** component struct ****************************** + * Structure for fcoll v2.0.0 components.This is chained to MCA v2.0.0 + * ****************** component struct ****************************** + */ +struct mca_fcoll_base_component_2_0_0_t { + mca_base_component_t fcollm_version; + mca_base_component_data_t fcollm_data; + + mca_fcoll_base_component_init_query_1_0_0_fn_t fcollm_init_query; + mca_fcoll_base_component_file_query_1_0_0_fn_t fcollm_file_query; + mca_fcoll_base_component_file_unquery_1_0_0_fn_t fcollm_file_unquery; +}; +typedef struct mca_fcoll_base_component_2_0_0_t mca_fcoll_base_component_2_0_0_t; +typedef struct mca_fcoll_base_component_2_0_0_t mca_fcoll_base_component_t; + +/* + * *********************************************************************** + * ************************ Interface function definitions ************** + * These are the typedefcoll for the function pointers to various fcoll + * backend functions which will be used by the various fcoll components + * *********************************************************************** + */ + +typedef int (*mca_fcoll_base_module_init_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fcoll_base_module_finalize_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fcoll_base_module_file_read_all_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); + +typedef int (*mca_fcoll_base_module_file_read_all_begin_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +typedef int (*mca_fcoll_base_module_file_read_all_end_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status); + +typedef int (*mca_fcoll_base_module_file_write_all_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); + +typedef int (*mca_fcoll_base_module_file_write_all_begin_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +typedef int (*mca_fcoll_base_module_file_write_all_end_fn_t) +(struct mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status); + +/* + * *********************************************************************** + * *************************** module structure ************************* + * *********************************************************************** + */ +struct mca_fcoll_base_module_1_0_0_t { + /* + * Per-file initialization function. This is called only + * on the module which is selected. The finalize corresponding to + * this function is present on the component struct above + */ + mca_fcoll_base_module_init_1_0_0_fn_t fcoll_module_init; + mca_fcoll_base_module_finalize_1_0_0_fn_t fcoll_module_finalize; + + /* FCOLL function pointers */ + mca_fcoll_base_module_file_read_all_fn_t fcoll_file_read_all; + mca_fcoll_base_module_file_read_all_begin_fn_t fcoll_file_read_all_begin; + mca_fcoll_base_module_file_read_all_end_fn_t fcoll_file_read_all_end; + mca_fcoll_base_module_file_write_all_fn_t fcoll_file_write_all; + mca_fcoll_base_module_file_write_all_begin_fn_t fcoll_file_write_all_begin; + mca_fcoll_base_module_file_write_all_end_fn_t fcoll_file_write_all_end; +}; +typedef struct mca_fcoll_base_module_1_0_0_t mca_fcoll_base_module_1_0_0_t; +typedef mca_fcoll_base_module_1_0_0_t mca_fcoll_base_module_t; + +END_C_DECLS + +#endif /* OMPI_MCA_FCOLL_H */ diff --git a/ompi/mca/fcoll/individual/Makefile.am b/ompi/mca/fcoll/individual/Makefile.am new file mode 100644 index 0000000000..b8d594c589 --- /dev/null +++ b/ompi/mca/fcoll/individual/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_individual.h \ + fcoll_individual_module.c \ + fcoll_individual_component.c \ + fcoll_individual_file_read_all.c \ + fcoll_individual_file_read_all_begin.c \ + fcoll_individual_file_read_all_end.c \ + fcoll_individual_file_write_all.c \ + fcoll_individual_file_write_all_begin.c \ + fcoll_individual_file_write_all_end.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_individual_DSO +component_noinst = +component_install = mca_fcoll_individual.la +else +component_noinst = libmca_fcoll_individual.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_individual_la_SOURCES = $(sources) +mca_fcoll_individual_la_LDFLAGS = -module -avoid-version +mca_fcoll_individual_la_LIBADD = \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_individual_la_SOURCES =$(sources) +libmca_fcoll_individual_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/individual/fcoll_individual.h b/ompi/mca/fcoll/individual/fcoll_individual.h new file mode 100644 index 0000000000..a333661101 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_INDIVIDUAL_EXPORT_H +#define MCA_FCOLL_INDIVIDUAL_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_individual_priority; +extern int mca_fcoll_individual_constant_cbs; +extern int mca_fcoll_individual_cycle_buffer_size; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_individual_component; + +/* API functions */ + +int mca_fcoll_individual_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_individual_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_individual_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_individual_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_individual_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_individual_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_individual_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_individual_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_individual_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +END_C_DECLS + +#endif /* MCA_FCOLL_INDIVIDUAL_EXPORT_H */ diff --git a/ompi/mca/fcoll/individual/fcoll_individual_component.c b/ompi/mca/fcoll/individual/fcoll_individual_component.c new file mode 100644 index 0000000000..d8f1d6d69c --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_component.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "fcoll_individual.h" + +/* + * Public string showing the fcoll ompi_individual component version number + */ +const char *mca_fcoll_individual_component_version_string = + "Open MPI individual collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_individual_priority = 10; +int mca_fcoll_individual_constant_cbs = 0; +int mca_fcoll_individual_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + +/* + * Local function + */ +static int individual_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_individual_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "individual", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + individual_register, + NULL + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_fcoll_individual_component_init_query, + mca_fcoll_individual_component_file_query, + mca_fcoll_individual_component_file_unquery +}; + + +static int +individual_register(void) +{ + int param; + + param = mca_base_param_find ("fcoll", NULL, "individual_priority"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_individual_priority); + } + param = mca_base_param_find ("fcoll", NULL, "individual_constant_cbs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_individual_constant_cbs); + } + param = mca_base_param_find ("fcoll", NULL, "individual_cycle_buffer_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_individual_cycle_buffer_size); + } + + mca_base_param_reg_int (&mca_fcoll_individual_component.fcollm_version, + "priority", + "Priority of the individual fcoll component", + false, false, mca_fcoll_individual_priority, + &mca_fcoll_individual_priority); + mca_base_param_reg_int (&mca_fcoll_individual_component.fcollm_version, + "constant_cbs", + "wether we are using constant or scaling cycle buffer size in the individual fcoll component", + false, false, mca_fcoll_individual_constant_cbs, + &mca_fcoll_individual_constant_cbs); + mca_base_param_reg_int (&mca_fcoll_individual_component.fcollm_version, + "cycle_buffer_size", + "Cycle Buffer Size of the individual fcoll component", + false, false, mca_fcoll_individual_cycle_buffer_size, + &mca_fcoll_individual_cycle_buffer_size); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c new file mode 100644 index 0000000000..2eaa3b1fa0 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include + +#define TIME_BREAKDOWN 0 + +int +mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_read = 0; /* total bytes that have been read*/ + size_t bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + size_t bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0; + int cycles = 0; + + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + size_t max_data = 0; + int i = 0; /* index into the decoded iovec of the buffer */ + int j = 0; /* index into the file vie iovec */ + int k = 0; /* index into the io_array */ + size_t sum_previous_counts = 0; + size_t sum_previous_length = 0; +#if TIME_BREAKDOWN + double start = 0, end=0, start_all=0, end_all=0, total_io=0; +#endif + +#if TIME_BREAKDOWN + start_all = MPI_Wtime(); +#endif + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + + if (mca_fcoll_individual_constant_cbs) { + bytes_per_cycle = mca_fcoll_individual_cycle_buffer_size/fh->f_size; + } + else { + bytes_per_cycle = mca_fcoll_individual_cycle_buffer_size; + } + + cycles = ceil((float)max_data/bytes_per_cycle); + +#if 0 + printf ("Bytes per Cycle: %d Cycles: %d\n",bytes_per_cycle, cycles); +#endif + + sum_previous_length = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + + for (index = 0; index < cycles; index++) { + OPAL_PTRDIFF_TYPE disp; + int block = 1; + + k = 0; + if ((index == cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_read_in_cycle = max_data % bytes_per_cycle; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + + /* + ompi_io_ompio_create_list (fh->f_decoded_iov, fh->f_iov_count, + decoded_iov, iov_count, + &total_bytes_read, &bytes_to_read_in_cycle, + &sum_previous_counts, &sum_previous_length, + &decoded_iov_index, &fview_iov_index, + &fh->f_io_array, &fh->f_num_of_io_entries); + */ + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_read_in_cycle) { + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + block * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts) <= 0) { + sum_previous_counts += decoded_iov[i].iov_len; + i = i + 1; + } + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + (total_bytes_read - sum_previous_counts); + fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; + + if (decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts) >= + bytes_to_read_in_cycle) { + fh->f_io_array[k].length = bytes_to_read_in_cycle; + } + else { + fh->f_io_array[k].length = decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts); + } + + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) <= 0) { + sum_previous_length += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_length = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + (fh->f_total_bytes - sum_previous_length); + fh->f_io_array[k].offset = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) < fh->f_io_array[k].length) { + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length); + } + total_bytes_read += fh->f_io_array[k].length; + fh->f_total_bytes += fh->f_io_array[k].length; + bytes_to_read_in_cycle -= fh->f_io_array[k].length; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_num_of_io_entries = k; + +#if 0 + if (fh->f_rank == 0) { + int i; + printf("*************************** %d\n", fh->f_num_of_io_entries); + + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + } +#endif + + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_preadv (fh, NULL); + } + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + } + + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_begin.c b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_begin.c new file mode 100644 index 0000000000..311574f0ec --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_individual_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("INDIVIDUAL READ ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_end.c b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_end.c new file mode 100644 index 0000000000..d9b1f7a08d --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_read_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_individual_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("INDIVIDUAL READ ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c new file mode 100644 index 0000000000..96c5d20c49 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include + +#define TIME_BREAKDOWN 0 + +int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_written = 0; /* total bytes that have been written*/ + size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t bytes_per_cycle = 0; /* total written in each cycle by each process*/ + int index = 0; + int cycles = 0; + + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + size_t max_data = 0; + int i = 0; /* index into the decoded iovec of the buffer */ + int j = 0; /* index into the file vie iovec */ + int k = 0; /* index into the io_array */ + size_t sum_previous_counts = 0; + size_t sum_previous_length = 0; +#if TIME_BREAKDOWN + double start = 0, end=0, start_all=0, end_all=0, total_io=0; +#endif + +#if TIME_BREAKDOWN + start_all = MPI_Wtime(); +#endif + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + + if (mca_fcoll_individual_constant_cbs) { + bytes_per_cycle = mca_fcoll_individual_cycle_buffer_size/fh->f_size; + } + else { + bytes_per_cycle = mca_fcoll_individual_cycle_buffer_size; + } + + cycles = ceil((float)max_data/bytes_per_cycle); + +#if 0 + printf ("MAX DATA: %d\n", max_data); + printf ("Bytes per Cycle: %d Cycles: %d\n",bytes_per_cycle, cycles); +#endif + + sum_previous_length = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + + for (index = 0; index < cycles; index++) { + OPAL_PTRDIFF_TYPE disp; + int block = 1; + + k = 0; + if ((index == cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_write_in_cycle = max_data % bytes_per_cycle; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write_in_cycle) { + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + block * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) <= 0) { + sum_previous_counts += decoded_iov[i].iov_len; + i = i + 1; + } + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + (total_bytes_written - sum_previous_counts); + fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) >= + bytes_to_write_in_cycle) { + fh->f_io_array[k].length = bytes_to_write_in_cycle; + } + else { + fh->f_io_array[k].length = decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts); + } + + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) <= 0) { + sum_previous_length += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_length = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + (fh->f_total_bytes - sum_previous_length); + fh->f_io_array[k].offset = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) + < fh->f_io_array[k].length) { + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length); + } + total_bytes_written += fh->f_io_array[k].length; + fh->f_total_bytes += fh->f_io_array[k].length; + bytes_to_write_in_cycle -= fh->f_io_array[k].length; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_num_of_io_entries = k; + +#if 0 + if (fh->f_rank == 0) { + int d; + printf("*************************** %d\n", fh->f_num_of_io_entries); + + for (d=0 ; df_num_of_io_entries ; d++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[d].memory_address, + fh->f_io_array[d].offset, + fh->f_io_array[d].length); + } + } +#endif + + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_pwritev (fh, NULL); + } + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + } + + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_begin.c b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_begin.c new file mode 100644 index 0000000000..75050387d2 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_individual_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("INDIVIDUAL WRITE ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_end.c b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_end.c new file mode 100644 index 0000000000..b080e801a5 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_file_write_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_individual_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("INDIVIDUAL WRITE ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/individual/fcoll_individual_module.c b/ompi/mca/fcoll/individual/fcoll_individual_module.c new file mode 100644 index 0000000000..76a1b5a2b8 --- /dev/null +++ b/ompi/mca/fcoll/individual/fcoll_individual_module.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_individual.h" + +#include + +#include "mpi.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t individual = { + mca_fcoll_individual_module_init, + mca_fcoll_individual_module_finalize, + mca_fcoll_individual_file_read_all, + mca_fcoll_individual_file_read_all_begin, + mca_fcoll_individual_file_read_all_end, + mca_fcoll_individual_file_write_all, + mca_fcoll_individual_file_write_all_begin, + mca_fcoll_individual_file_write_all_end +}; + +int +mca_fcoll_individual_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_individual_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_individual_priority; + if (0 >= mca_fcoll_individual_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "individual")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &individual; +} + +int mca_fcoll_individual_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_individual_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_individual_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/Makefile.am b/ompi/mca/fcoll/static/Makefile.am new file mode 100644 index 0000000000..0994429b23 --- /dev/null +++ b/ompi/mca/fcoll/static/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_static.h \ + fcoll_static_module.c \ + fcoll_static_component.c \ + fcoll_static_file_read_all.c \ + fcoll_static_file_read_all_begin.c \ + fcoll_static_file_read_all_end.c \ + fcoll_static_file_write_all.c \ + fcoll_static_file_write_all_begin.c \ + fcoll_static_file_write_all_end.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_static_DSO +component_noinst = +component_install = mca_fcoll_static.la +else +component_noinst = libmca_fcoll_static.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_static_la_SOURCES = $(sources) +mca_fcoll_static_la_LDFLAGS = -module -avoid-version +mca_fcoll_static_la_LIBADD = \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_static_la_SOURCES =$(sources) +libmca_fcoll_static_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/static/fcoll_static.h b/ompi/mca/fcoll/static/fcoll_static.h new file mode 100644 index 0000000000..43fba1d847 --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_STATIC_EXPORT_H +#define MCA_FCOLL_STATIC_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_static_priority; +extern int mca_fcoll_static_num_io_procs; +extern int mca_fcoll_static_constant_cbs; +extern int mca_fcoll_static_cycle_buffer_size; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_static_component; +/* API functions */ + +int mca_fcoll_static_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_static_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_static_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_static_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_static_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_static_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_static_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +int mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_static_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_static_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +END_C_DECLS + +#endif /* MCA_FCOLL_STATIC_EXPORT_H */ diff --git a/ompi/mca/fcoll/static/fcoll_static_component.c b/ompi/mca/fcoll/static/fcoll_static_component.c new file mode 100644 index 0000000000..3a0d74e5fa --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_component.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "fcoll_static.h" + +/* + * Public string showing the fcoll ompi_static component version number + */ +const char *mca_fcoll_static_component_version_string = + "Open MPI static collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_static_priority = 10; +int mca_fcoll_static_num_io_procs = -1; +int mca_fcoll_static_constant_cbs = 1; +int mca_fcoll_static_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + +/* + * Local function + */ +static int static_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_static_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "static", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + static_register, + NULL + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_fcoll_static_component_init_query, + mca_fcoll_static_component_file_query, + mca_fcoll_static_component_file_unquery +}; + + +static int +static_register(void) +{ + int param; + + param = mca_base_param_find ("fcoll", NULL, "static_priority"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_static_priority); + } + param = mca_base_param_find ("fcoll", NULL, "static_num_io_procs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_static_num_io_procs); + } + param = mca_base_param_find ("fcoll", NULL, "static_constant_cbs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_static_constant_cbs); + } + param = mca_base_param_find ("fcoll", NULL, "static_cycle_buffer_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_static_cycle_buffer_size); + } + + mca_base_param_reg_int (&mca_fcoll_static_component.fcollm_version, + "priority", + "Priority of the static fcoll component", + false, false, mca_fcoll_static_priority, + &mca_fcoll_static_priority); + mca_base_param_reg_int (&mca_fcoll_static_component.fcollm_version, + "num_io_procs", + "Number of writers in the static fcoll component", + false, false, mca_fcoll_static_num_io_procs, + &mca_fcoll_static_num_io_procs); + mca_base_param_reg_int (&mca_fcoll_static_component.fcollm_version, + "constant_cbs", + "wether we are using constant or scaling cycle buffer size in the static fcoll component", + false, false, mca_fcoll_static_constant_cbs, + &mca_fcoll_static_constant_cbs); + mca_base_param_reg_int (&mca_fcoll_static_component.fcollm_version, + "cycle_buffer_size", + "Cycle Buffer Size of the static fcoll component", + false, false, mca_fcoll_static_cycle_buffer_size, + &mca_fcoll_static_cycle_buffer_size); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_read_all.c b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c new file mode 100644 index 0000000000..b1928df2e0 --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_read_all.c @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define TIME_BREAKDOWN 0 + +int +mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t position = 0; + MPI_Aint bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + size_t bytes_per_cycle = 0; /* total read in each cycle by each process*/ + + int index = 0; + int cycles = 0, local_cycles; + int i=0; + int ret; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + uint32_t iov_index = 0; + size_t current_position = 0; + char *receive_buf = NULL; + + /* global iovec at the readers that contain the iovecs created from + file_set_view */ + uint32_t global_iov_count = 0; + struct iovec *global_iov = NULL; + + char *global_buf = NULL; + MPI_Aint global_count = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + + int *displs = NULL; + + size_t max_data = 0; + int *iovec_count_per_process = NULL; + int *bytes_per_process = NULL; + +#if TIME_BREAKDOWN + double start=0, end=0, start_all=0, end_all=0; + double total_gather=0 , total_io=0; +#endif + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + + /* In case the data is not contigous in memory, decode it into an iovec */ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_static_num_io_procs, + max_data); + } + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process = (int *)malloc (sizeof(int)*fh->f_procs_per_group); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + iovec_count_per_process = (int *)malloc (sizeof(int)*fh->f_procs_per_group); + if (NULL == iovec_count_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* + * Calculate how many bytes are read in each cycle + */ + if (mca_fcoll_static_constant_cbs) { + bytes_per_cycle = + mca_fcoll_static_cycle_buffer_size/fh->f_procs_per_group; + } + else { + bytes_per_cycle = mca_fcoll_static_cycle_buffer_size; + } + + /* TODO : number of cycles has to be the same for all processes in a group, + so need to Allreduce the cycle within a group. + This works now if all processes are reading the same amount of data */ + cycles = ceil((double)max_data/bytes_per_cycle); + local_cycles = cycles; + ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, + &cycles, + 1, + MPI_INT, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); +#if 0 + printf ("Bytes per Process: %d Cycles: %d Procs_per_group %d\n", + bytes_per_cycle, cycles, fh->f_procs_per_group); +#endif + + for (index = 0; index < cycles; index++) { + struct iovec *iov = NULL; + int iov_size = 0; + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + /* + printf ("********* READER %d PROCS %d CYCLE %d of %d************\n", + fh->f_rank, + fh->f_procs_per_group, + index, + cycles); + */ + memset(displs, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(iovec_count_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + } + if (local_cycles > index) { + if ((index == local_cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_read_in_cycle = max_data % bytes_per_cycle; + } + else if (max_data <= bytes_per_cycle) { + bytes_to_read_in_cycle = max_data; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + } + else { + bytes_to_read_in_cycle = 0; + } + + /********************************************************** + Gather from each process iovecs to where to write the data + *********************************************************/ + if (bytes_to_read_in_cycle) { + ompi_io_ompio_generate_current_file_view (fh, + bytes_to_read_in_cycle, + &iov, + &iov_size); + } + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + ompi_io_ompio_gather_array (&iov_size, + 1, + MPI_INT, + iovec_count_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_iov_count = iovec_count_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_iov_count += iovec_count_per_process[i]; + displs[i] = displs[i-1] + iovec_count_per_process[i-1]; + } + } + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + global_iov = (struct iovec*)malloc (global_iov_count * + sizeof(struct iovec)); + if (NULL == global_iov) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_flags & OMPIO_UNIFORM_FVIEW) { + ompi_io_ompio_gather_array (iov, + iov_size, + fh->f_iov_type, + global_iov, + iov_size, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + else { + ompi_io_ompio_gatherv_array (iov, + iov_size, + fh->f_iov_type, + global_iov, + iovec_count_per_process, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + /* sort it */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + sorted = (int *)malloc (global_iov_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + ompi_io_ompio_sort_iovec (global_iov, global_iov_count, sorted); + } + + /********************************************************** + **************** DONE GATHERING OF IOVECS **************** + *********************************************************/ + + /* gather from each process how many bytes each will be recieving */ + ompi_io_ompio_gather_array (&bytes_to_read_in_cycle, + 1, + MPI_INT, + bytes_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /********************************************************** + ** Create the io array, sort it, and pass it to fbtl **** + *********************************************************/ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + MPI_Aint temp = 0; + int x = 0, k = 0; + + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (global_iov_count * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (i=0 ; if_procs_per_group ; i++) { + for (x=0 ; xf_io_array[k].offset = global_iov[sorted[k]].iov_base; + fh->f_io_array[k].length = global_iov[sorted[k]].iov_len; + fh->f_io_array[k].memory_address = &global_buf[temp]; + temp += fh->f_io_array[k].length; + k ++; + } + } + fh->f_num_of_io_entries = k; + + /* + printf("%d *************************** %d\n",fh->f_rank, fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) + { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + + printf("******* SORTED ************ %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) + { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[sorted[i]].memory_address, + fh->f_io_array[sorted[i]].offset, + fh->f_io_array[sorted[i]].length); + } + */ + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_preadv (fh, NULL); + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) + for (i=0 ; if_flags & OMPIO_CONTIGUOUS_MEMORY) { + receive_buf = &((char*)buf)[position]; + } + else if (bytes_to_read_in_cycle) { + /* allocate a receive buffer and copy the data that needs + to be received into it in case the data is non-contigous + in memory */ + receive_buf = malloc (bytes_to_read_in_cycle); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + ompi_io_ompio_scatterv_array (global_buf, + bytes_per_process, + displs, + MPI_BYTE, + receive_buf, + bytes_to_read_in_cycle, + MPI_BYTE, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + position += bytes_to_read_in_cycle; + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = bytes_to_read_in_cycle; + + while (remaining && (iov_count > iov_index)) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + + /********************************************************** + **************** DONE SCATTERING OF DATA ***************** + *********************************************************/ + + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_iov) { + free (global_iov); + global_iov = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + } + + if (NULL != iov) { + free (iov); + iov = NULL; + } + } + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + if (NULL != iovec_count_per_process) { + free (iovec_count_per_process); + iovec_count_per_process = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_read_all_begin.c b/ompi/mca/fcoll/static/fcoll_static_file_read_all_begin.c new file mode 100644 index 0000000000..a383ce9b76 --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_read_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_static_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("STATIC READ ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_read_all_end.c b/ompi/mca/fcoll/static/fcoll_static_file_read_all_end.c new file mode 100644 index 0000000000..7d3101cd2f --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_read_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_static_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("STATIC READ ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c new file mode 100644 index 0000000000..75b0e131c2 --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all.c @@ -0,0 +1,457 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +int +mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + MPI_Aint total_bytes_written = 0; /* total bytes that have been written*/ + MPI_Aint bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t bytes_per_cycle = 0; /* total written in each cycle by each process*/ + + int index = 0; + int cycles = 0, local_cycles = 0; + int i=0; + int ret; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + uint32_t iov_index = 0; + size_t current_position = 0; + char *send_buf = NULL; + + /* global iovec at the writers that contain the iovecs created from + file_set_view */ + uint32_t global_iov_count = 0; + struct iovec *global_iov = NULL; + int *sorted; + + char *global_buf = NULL; + MPI_Aint global_count = 0; + + int *displs = NULL; + + size_t max_data = 0; + int *iovec_count_per_process = NULL; + int *bytes_per_process = NULL; + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + + /* In case the data is not contigous in memory, decode it into an iovec */ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_static_num_io_procs, + max_data); + } + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + bytes_per_process = (int *)malloc (sizeof(int)*fh->f_procs_per_group); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + iovec_count_per_process = (int *)malloc (sizeof(int)*fh->f_procs_per_group); + if (NULL == iovec_count_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int*)malloc (fh->f_procs_per_group*sizeof(int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* + * Calculate how many bytes are written in each cycle + */ + if (mca_fcoll_static_constant_cbs) { + bytes_per_cycle = + mca_fcoll_static_cycle_buffer_size/fh->f_procs_per_group; + } + else { + bytes_per_cycle = mca_fcoll_static_cycle_buffer_size; + } + + /* TODO : number of cycles has to be the same for all processes in a group, + so need to Allreduce the cycle within a group. + This works now if all processes are writing the same amount of data */ + + cycles = ceil((double)max_data/bytes_per_cycle); + local_cycles = cycles; + ret = fh->f_comm->c_coll.coll_allreduce (&local_cycles, + &cycles, + 1, + MPI_INT, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); +#if 0 + printf ("Max Data: %d Bytes per Process: %d Cycles: %d Procs_per_group %d\n", + max_data, bytes_per_cycle, cycles, fh->f_procs_per_group); +#endif + for (index = 0; index < cycles; index++) { + struct iovec *iov = NULL; + int iov_size = 0; + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + /*printf ("********** CYCLE %d **************\n",index);*/ + memset(displs, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + memset(iovec_count_per_process, 0x0, fh->f_procs_per_group*sizeof(int)); + } + + if (local_cycles > index) { + if ((index == local_cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_write_in_cycle = max_data % bytes_per_cycle; + } + else if (max_data <= bytes_per_cycle) { + bytes_to_write_in_cycle = max_data; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } + } + else { + bytes_to_write_in_cycle = 0; + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + printf ("****%d: CYCLE %d Bytes %d**********\n", + fh->f_rank, + index, + bytes_to_write_in_cycle); + } + */ + + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + + /* gather from each process how many bytes each will be sending */ + ompi_io_ompio_gather_array (&bytes_to_write_in_cycle, + 1, + MPI_INT, + bytes_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + send_buf = &((char*)buf)[total_bytes_written]; + } + else if (bytes_to_write_in_cycle) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + send_buf = malloc (bytes_to_write_in_cycle); + if (NULL == send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + remaining = bytes_to_write_in_cycle; + + while (remaining && (iov_count > iov_index)) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + } + total_bytes_written += bytes_to_write_in_cycle; + + /* Get the data from all processes to the writer*/ + ompi_io_ompio_gatherv_array (send_buf, + bytes_to_write_in_cycle, + MPI_BYTE, + global_buf, + bytes_per_process, + displs, + MPI_BYTE, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + if (NULL != send_buf) { + free (send_buf); + send_buf = NULL; + } + } + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + ompi_io_ompio_gather_array (&iov_size, + 1, + MPI_INT, + iovec_count_per_process, + 1, + MPI_INT, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + displs[0] = 0; + global_iov_count = iovec_count_per_process[0]; + for (i=1 ; if_procs_per_group ; i++) { + global_iov_count += iovec_count_per_process[i]; + displs[i] = displs[i-1] + iovec_count_per_process[i-1]; + } + } + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + global_iov = (struct iovec*)malloc (global_iov_count * + sizeof(struct iovec)); + if (NULL == global_iov) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_flags & OMPIO_UNIFORM_FVIEW) { + ompi_io_ompio_gather_array (iov, + iov_size, + fh->f_iov_type, + global_iov, + iov_size, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + else { + ompi_io_ompio_gatherv_array (iov, + iov_size, + fh->f_iov_type, + global_iov, + iovec_count_per_process, + displs, + fh->f_iov_type, + fh->f_aggregator_index, + fh->f_procs_in_group, + fh->f_procs_per_group, + fh->f_comm); + } + + /* sort it */ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + sorted = (int *)malloc (global_iov_count * sizeof(int)); + if (NULL == sorted) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + ompi_io_ompio_sort_iovec (global_iov, global_iov_count, sorted); + } + + /********************************************************** + **************** DONE GATHERING OF IOVECS **************** + *********************************************************/ + + /********************************************************** + ** Create the io array, sort it, and pass it to fbtl **** + *********************************************************/ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + MPI_Aint temp = 0; + int x = 0, k = 0; + + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (global_iov_count * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (i=0 ; if_procs_per_group ; i++) { + for (x=0 ; xf_io_array[k].offset = global_iov[sorted[k]].iov_base; + fh->f_io_array[k].length = global_iov[sorted[k]].iov_len; + fh->f_io_array[k].memory_address = &global_buf[temp]; + temp += fh->f_io_array[k].length; + k ++; + } + } + fh->f_num_of_io_entries = k; + + /* + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) + { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + + printf("******* SORTED ************ %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) + { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[sorted[i]].memory_address, + fh->f_io_array[sorted[i]].offset, + fh->f_io_array[sorted[i]].length); + } + */ + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_pwritev (fh, NULL); + } + } + /********************************************************** + ******************** DONE WRITING ************************ + *********************************************************/ + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_iov) { + free (global_iov); + global_iov = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + } + if (NULL != iov) { + free (iov); + iov = NULL; + } + } + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + if (NULL != iovec_count_per_process) { + free (iovec_count_per_process); + iovec_count_per_process = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all_begin.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all_begin.c new file mode 100644 index 0000000000..5828974b3c --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_static_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("STATIC WRITE ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_file_write_all_end.c b/ompi/mca/fcoll/static/fcoll_static_file_write_all_end.c new file mode 100644 index 0000000000..3c36d19c8b --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_file_write_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_static_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("STATIC WRITE ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/static/fcoll_static_module.c b/ompi/mca/fcoll/static/fcoll_static_module.c new file mode 100644 index 0000000000..882af338ce --- /dev/null +++ b/ompi/mca/fcoll/static/fcoll_static_module.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_static.h" + +#include + +#include "mpi.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t static_t = { + mca_fcoll_static_module_init, + mca_fcoll_static_module_finalize, + mca_fcoll_static_file_read_all, + mca_fcoll_static_file_read_all_begin, + mca_fcoll_static_file_read_all_end, + mca_fcoll_static_file_write_all, + mca_fcoll_static_file_write_all_begin, + mca_fcoll_static_file_write_all_end +}; + +int +mca_fcoll_static_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_static_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_static_priority; + if (0 >= mca_fcoll_static_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "static")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &static_t; +} + +int mca_fcoll_static_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_static_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_static_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/Makefile.am b/ompi/mca/fcoll/two_phase/Makefile.am new file mode 100644 index 0000000000..8765563fec --- /dev/null +++ b/ompi/mca/fcoll/two_phase/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_two_phase.h \ + fcoll_two_phase_module.c \ + fcoll_two_phase_component.c \ + fcoll_two_phase_file_read_all.c \ + fcoll_two_phase_file_read_all_begin.c \ + fcoll_two_phase_file_read_all_end.c \ + fcoll_two_phase_file_write_all.c \ + fcoll_two_phase_file_write_all_begin.c \ + fcoll_two_phase_file_write_all_end.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_two_phase_DSO +component_noinst = +component_install = mca_fcoll_two_phase.la +else +component_noinst = libmca_fcoll_two_phase.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_two_phase_la_SOURCES = $(sources) +mca_fcoll_two_phase_la_LDFLAGS = -module -avoid-version +mca_fcoll_two_phase_la_LIBADD = \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_two_phase_la_SOURCES =$(sources) +libmca_fcoll_two_phase_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase.h b/ompi/mca/fcoll/two_phase/fcoll_two_phase.h new file mode 100644 index 0000000000..8da03edd2b --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_TWO_PHASE_EXPORT_H +#define MCA_FCOLL_TWO_PHASE_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_two_phase_priority; +extern int mca_fcoll_two_phase_num_io_procs; +extern int mca_fcoll_two_phase_constant_cbs; +extern int mca_fcoll_two_phase_cycle_buffer_size; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_two_phase_component; + +/* API functions */ + +int mca_fcoll_two_phase_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_two_phase_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_two_phase_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_two_phase_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_two_phase_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_two_phase_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_two_phase_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +int mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_two_phase_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_two_phase_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +END_C_DECLS + +#endif /* MCA_FCOLL_TWO_PHASE_EXPORT_H */ diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c new file mode 100644 index 0000000000..1c31a6cfb2 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_component.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "fcoll_two_phase.h" + +/* + * Public string showing the fcoll ompi_two_phase component version number + */ +const char *mca_fcoll_two_phase_component_version_string = + "Open MPI two_phase collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_two_phase_priority = 10; +int mca_fcoll_two_phase_num_io_procs = -1; +int mca_fcoll_two_phase_constant_cbs = 0; +int mca_fcoll_two_phase_cycle_buffer_size = OMPIO_PREALLOC_MAX_BUF_SIZE; + +/* + * Local function + */ +static int two_phase_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_two_phase_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "two_phase", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + two_phase_register, + NULL + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_fcoll_two_phase_component_init_query, + mca_fcoll_two_phase_component_file_query, + mca_fcoll_two_phase_component_file_unquery +}; + + +static int +two_phase_register(void) +{ + int param; + + param = mca_base_param_find ("fcoll", NULL, "two_phase_priority"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_two_phase_priority); + } + param = mca_base_param_find ("fcoll", NULL, "two_phase_num_io_procs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_two_phase_num_io_procs); + } + param = mca_base_param_find ("fcoll", NULL, "two_phase_constant_cbs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_two_phase_constant_cbs); + } + param = mca_base_param_find ("fcoll", NULL, "two_phase_cycle_buffer_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_two_phase_cycle_buffer_size); + } + + mca_base_param_reg_int (&mca_fcoll_two_phase_component.fcollm_version, + "priority", + "Priority of the two_phase fcoll component", + false, false, mca_fcoll_two_phase_priority, + &mca_fcoll_two_phase_priority); + mca_base_param_reg_int (&mca_fcoll_two_phase_component.fcollm_version, + "num_io_procs", + "Number of writers in the two_phase fcoll component", + false, false, mca_fcoll_two_phase_num_io_procs, + &mca_fcoll_two_phase_num_io_procs); + mca_base_param_reg_int (&mca_fcoll_two_phase_component.fcollm_version, + "constant_cbs", + "wether we are using constant or scaling cycle buffer size in the two_phase fcoll component", + false, false, mca_fcoll_two_phase_constant_cbs, + &mca_fcoll_two_phase_constant_cbs); + mca_base_param_reg_int (&mca_fcoll_two_phase_component.fcollm_version, + "cycle_buffer_size", + "Cycle Buffer Size of the two_phase fcoll component", + false, false, mca_fcoll_two_phase_cycle_buffer_size, + &mca_fcoll_two_phase_cycle_buffer_size); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c new file mode 100644 index 0000000000..bd23ae604d --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all.c @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +int +mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_read = 0; /* total bytes that have been read*/ + size_t total_bytes = 0; /* total bytes to be read */ + size_t total_bytes_global = 0; + size_t bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + size_t max_data = 0; + size_t bytes_remaining = 0; + size_t *bytes_rem = NULL; + size_t *prev_bytes_rem = NULL; + size_t stripe_size =0; + + int index = 0; + int current_index = 0; + int *current = NULL; + int *previous = NULL; + int cycles = 0; + int i=0, j=0, x=0, n=0; + int blocks = 0; + int bytes_left = 0; + int two_phase_num_io_procs = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int *bytes_per_process = NULL; + int *bytes_received = NULL; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + int global_fview_count = 0; + struct iovec *global_fview = NULL; + + int local_count = 0; + struct iovec *iov = NULL; + + int broken_count = 0; + struct iovec *broken_iovec = NULL; + + int *fview_count = NULL; + + int global_count = 0; + char *global_buf = NULL; + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + if (-1 == mca_fcoll_two_phase_num_io_procs) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_two_phase_num_io_procs, + max_data); + two_phase_num_io_procs = + ceil((float)fh->f_size/fh->f_procs_per_group); + } + fh->f_aggregator_index = + ceil((float)fh->f_size/two_phase_num_io_procs); + if (fh->f_aggregator_index * two_phase_num_io_procs > fh->f_size) { + two_phase_num_io_procs = + ceil((float)fh->f_size/fh->f_aggregator_index); + } + } + + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this read *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, + max_data, + &iov, + &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + + /************************************************************* + * Breakdown the file view at each process per OST then send * + * each portion of the file view t0 the corresp aggregator * + *************************************************************/ + + fh->f_comm->c_coll.coll_allreduce (&max_data, + &total_bytes, + 1, + MPI_DOUBLE, + MPI_SUM, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + stripe_size = ceil((float)total_bytes/two_phase_num_io_procs); + + ompi_io_ompio_break_file_view (fh, + iov, + local_count, + two_phase_num_io_procs, + stripe_size, + &broken_iovec, + &broken_count); + /* + for (i=0 ; if_rank, + broken_iovec[i].iov_base, + broken_iovec[i].iov_len); + } + */ + + if (NULL != iov) { + free (iov); + iov = NULL; + } + + ompi_io_ompio_distribute_file_view (fh, + broken_iovec, + broken_count, + two_phase_num_io_procs, + stripe_size, + &fview_count, + &global_fview, + &global_fview_count); + /* + for (i=0 ; if_rank, + global_fview[i].iov_base, + global_fview[i].iov_len); + } + */ + + total_bytes = 0; + if (0 == fh->f_rank%fh->f_aggregator_index) { + if (global_fview_count) { + for (i=0 ; if_size * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int *) malloc (fh->f_size * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + fh->f_comm->c_coll.coll_allreduce (&total_bytes, + &total_bytes_global, + 1, + MPI_DOUBLE, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + bytes_received = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == bytes_received) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + current = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == current) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + previous = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == previous) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + bytes_rem = (size_t *)malloc (two_phase_num_io_procs * sizeof (size_t)); + if (NULL == bytes_rem) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + prev_bytes_rem = (size_t *)malloc (two_phase_num_io_procs * sizeof (size_t)); + if (NULL == prev_bytes_rem) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memset(current, 0x0, two_phase_num_io_procs*sizeof(int)); + memset(previous, 0x0, two_phase_num_io_procs*sizeof(int)); + memset(bytes_rem, 0x0, two_phase_num_io_procs*sizeof(size_t)); + memset(prev_bytes_rem, 0x0, two_phase_num_io_procs*sizeof(size_t)); + + cycles = ceil ((float)total_bytes_global/ + mca_fcoll_two_phase_cycle_buffer_size); + + for (index = 0; index < cycles; index++) { + int k = 0; + size_t total_bytes_recv = 0; + size_t temp = 0; + global_count = 0; + + memset(bytes_received, 0x0, two_phase_num_io_procs*sizeof(int)); + if (0 == fh->f_rank%fh->f_aggregator_index) { + memset(displs, 0x0, fh->f_size*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_size*sizeof(int)); + + if ((int)total_bytes > mca_fcoll_two_phase_cycle_buffer_size) { + bytes_to_read_in_cycle = mca_fcoll_two_phase_cycle_buffer_size; + } + else { + bytes_to_read_in_cycle = total_bytes; + } + } + /* + printf ("****%d: Total_bytes: %d CYCLE %d Bytes %d OFFSET %d******\n", + fh->f_rank, + total_bytes, + index, + bytes_to_read_in_cycle, + fh->f_offset); + sleep(1); + */ + /********************************************************** + **Gather the Data from all the processes at the readers ** + *********************************************************/ + + /* Calculate how much data will be contributed in this cycle + by each process*/ + for (k=0 ; kf_offset; + + while (current[k] < broken_count) { + if (k*stripe_size+fh->f_offset > + (size_t)broken_iovec[current[k]].iov_base || + (k+1)*stripe_size+fh->f_offset <= + (size_t)broken_iovec[current[k]].iov_base) { + if ((k+1)*stripe_size+fh->f_offset <= + (size_t)broken_iovec[current[k]].iov_base) { + break; + } + current[k] ++; + previous[k] = current[k]; + continue; + } + if (temp >= + (size_t)((OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base + + broken_iovec[current[k]].iov_len)) { + if (bytes_rem[k]) { + bytes_received[k] += bytes_rem[k]; + total_bytes_recv += bytes_rem[k]; + bytes_rem[k] = 0; + } + else { + bytes_received[k] += broken_iovec[current[k]].iov_len; + total_bytes_recv += broken_iovec[current[k]].iov_len; + } + current[k] ++; + } + else { + if (bytes_rem[k]) { + bytes_received[k] += temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + total_bytes_recv += temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + bytes_rem[k] -= temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + break; + } + else { + if (temp > (size_t)broken_iovec[current[k]].iov_base) { + bytes_received[k] += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base; + total_bytes_recv += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base; + bytes_rem[k] = broken_iovec[current[k]].iov_len - + (temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + break; + } + else { + break; + } + } + } + } + } + /* + for (i=0 ; if_rank, + bytes_received[i], i); + } + sleep(1); + */ + if (0 == fh->f_rank%fh->f_aggregator_index && bytes_to_read_in_cycle) { + /* Calculate how much data will be recieved this cycle + by each aggregator*/ + while (bytes_to_read_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_read_in_cycle) { + bytes_per_process[n] += bytes_remaining; + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + bytes_per_process[n] += bytes_to_read_in_cycle; + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + if (bytes_to_read_in_cycle < + global_fview[sorted[current_index]].iov_len) { + bytes_per_process[n] += bytes_to_read_in_cycle; + bytes_remaining = + global_fview[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + bytes_to_read_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* + for (i=0 ; if_size ; i++) { + printf ("%d --> expecting %d from %d\n",fh->f_rank, + bytes_per_process[i], i); + } + */ + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_size ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + /* + for (i=0 ; if_size ; i++) { + printf ("Proc %d sending %d at %d\n", + i, + bytes_per_process[i], + displs[i]); + } + */ + if (0 != global_count) { + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + /********************************************************** + ******* Create the io array, and pass it to fbtl ********* + *********************************************************/ + if (0 == fh->f_rank%fh->f_aggregator_index && global_count) { + int bytes_to_read = global_count; + int *temp = NULL; + int block = 1; + k = 0; + + temp = (int *)malloc (sizeof(int) * fh->f_size); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_size*sizeof(int)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_read) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_read -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_left -= bytes_to_read; + bytes_to_read = 0;; + k ++; + break; + } + } + else { + if (bytes_to_read < (int)global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_read; + bytes_to_read = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_read -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_preadv (fh, NULL)) { + opal_output (1, "READ FAILED\n"); + return OMPI_ERROR; + } + } + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + + /********************************************************** + ********* Scatter the Data from the readers ************** + *********************************************************/ +#if 0 + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + receive_buf = &((char*)buf)[total_bytes_read]; + } + else if (total_bytes_recv) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + receive_buf = malloc (total_bytes_recv); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } +#endif + /* distribute the data to its corresponding processes */ + ompi_io_ompio_receive_data (fh, + buf, + total_bytes_recv, + decoded_iov, + iov_count, + bytes_received, + broken_iovec, + previous, + prev_bytes_rem, + global_buf, + bytes_per_process, + displs, + two_phase_num_io_procs, + stripe_size); + + /* + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (k=0 ; kf_rank, + ((int *)global_buf)[k]); + } + } + */ +#if 0 + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = total_bytes_recv; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } +#endif + total_bytes_read += total_bytes_recv; + total_bytes -= global_count; + + /********************************************************** + **************** DONE GATHERING OF DATA ****************** + *********************************************************/ + + + + if (0 == fh->f_rank%fh->f_aggregator_index) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != broken_iovec) { + free (broken_iovec); + broken_iovec = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != bytes_received) { + free (bytes_received); + bytes_received = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + /* + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + */ + + fh->f_flags ^= OMPIO_AGGREGATOR_IS_SET; + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_begin.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_begin.c new file mode 100644 index 0000000000..e41ac3d658 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_two_phase_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC READ ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_end.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_end.c new file mode 100644 index 0000000000..d4dbaf4229 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_read_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_two_phase_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC READ ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c new file mode 100644 index 0000000000..a315310dd5 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all.c @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define TIME_BREAKDOWN 0 +int +mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_written = 0; /* total bytes that have been written*/ + size_t total_bytes = 0; /* total bytes to be written */ + size_t total_bytes_global = 0; + size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t max_data = 0; + size_t bytes_remaining = 0; + size_t *bytes_rem = 0; + size_t *prev_bytes_rem = 0; + size_t stripe_size =0; + size_t bytes_left = 0; + + int index = 0; + int current_index = 0; + int *current = NULL; + int *previous = NULL; + int cycles = 0; + int i=0, j=0, x=0, n=0; + int blocks = 0; + int two_phase_num_io_procs = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int *bytes_per_process = NULL; + int *bytes_sent = NULL; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + int global_fview_count = 0; + struct iovec *global_fview = NULL; + + int local_count = 0; + struct iovec *iov = NULL; + + int broken_count = 0; + struct iovec *broken_iovec = NULL; + + int *fview_count = NULL; + + int global_count = 0; + char *global_buf = NULL; + +#if TIME_BREAKDOWN + double start_time=0, end_time=0, start_time2=0, end_time2=0; + double total=0 , total_io=0; +#endif + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags = fh->f_flags | OMPIO_CONTIGUOUS_MEMORY; + } + + + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + + if (-1 == mca_fcoll_two_phase_num_io_procs) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_two_phase_num_io_procs, + max_data); + two_phase_num_io_procs = + ceil((float)fh->f_size/fh->f_procs_per_group); + + } + fh->f_aggregator_index = + ceil((float)fh->f_size/two_phase_num_io_procs); + if (fh->f_aggregator_index * two_phase_num_io_procs > fh->f_size) { + two_phase_num_io_procs = + ceil((float)fh->f_size/fh->f_aggregator_index); + } + } + +/* printf("two_phase_num_io_procs : %ld \n", two_phase_num_io_procs);*/ + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time = MPI_Wtime(); + } +#endif + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this write *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, + max_data, + &iov, + &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + /************************************************************** + * Breakdown the file view at each process for each aggregator* + * then send each portion of the file view to the corresp agg.* + **************************************************************/ + + + + fh->f_comm->c_coll.coll_allreduce (&max_data, + &total_bytes, + 1, + MPI_DOUBLE, + MPI_SUM, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + + stripe_size = ceil((float)total_bytes/two_phase_num_io_procs); + + + + ompi_io_ompio_break_file_view (fh, + iov, + local_count, + two_phase_num_io_procs, + stripe_size, + &broken_iovec, + &broken_count); + /* + for (i=0 ; if_rank, + broken_iovec[i].iov_base, + broken_iovec[i].iov_len); + } + */ + if (NULL != iov) { + free (iov); + iov = NULL; + } + + ompi_io_ompio_distribute_file_view (fh, + broken_iovec, + broken_count, + two_phase_num_io_procs, + stripe_size, + &fview_count, + &global_fview, + &global_fview_count); + /* + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + printf("%d: fview_count[%d] = %d:\n", + fh->f_rank, + i, + fview_count[i]); + } + } + for (i=0 ; if_rank, + global_fview[i].iov_base, + global_fview[i].iov_len); + } + */ + total_bytes = 0; + if (0 == fh->f_rank%fh->f_aggregator_index) { + if (global_fview_count) { + for (i=0 ; if_size * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int *) malloc (fh->f_size * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + fh->f_comm->c_coll.coll_allreduce (&total_bytes, + &total_bytes_global, + 1, + MPI_DOUBLE, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + bytes_sent = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == bytes_sent) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + current = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == current) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + previous = (int *)malloc (two_phase_num_io_procs * sizeof (int)); + if (NULL == previous) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + bytes_rem = (size_t *)malloc (two_phase_num_io_procs * sizeof (size_t)); + if (NULL == bytes_rem) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + prev_bytes_rem = (size_t *)malloc (two_phase_num_io_procs * sizeof (size_t)); + if (NULL == prev_bytes_rem) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memset(current, 0x0, two_phase_num_io_procs*sizeof(int)); + memset(previous, 0x0, two_phase_num_io_procs*sizeof(int)); + memset(bytes_rem, 0x0, two_phase_num_io_procs*sizeof(size_t)); + memset(prev_bytes_rem, 0x0, two_phase_num_io_procs*sizeof(size_t)); + + cycles = ceil ((float)total_bytes_global/ + mca_fcoll_two_phase_cycle_buffer_size); + /* + printf ("%d: Cycles: %d Total Bytes: %lld Global: %lld\n", + fh->f_rank, + cycles, + total_bytes, + total_bytes_global); + */ +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time = MPI_Wtime(); + total = end_time-start_time; + printf ("%d: Preprocessing --- %f\n", fh->f_rank, total); + total = 0; + } +#endif + + for (index = 0; index < cycles; index++) { + int k = 0; + size_t total_bytes_sent = 0; + size_t temp = 0; + global_count = 0; + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time = MPI_Wtime(); + } +#endif + + memset(bytes_sent, 0x0, two_phase_num_io_procs*sizeof(int)); + if (0 == fh->f_rank%fh->f_aggregator_index) { + memset(displs, 0x0, fh->f_size*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_size*sizeof(int)); + + if (total_bytes > (size_t)mca_fcoll_two_phase_cycle_buffer_size) { + bytes_to_write_in_cycle = mca_fcoll_two_phase_cycle_buffer_size; + } + else { + bytes_to_write_in_cycle = total_bytes; + } + } + /* + printf ("****%d: Total_bytes: %lld CYCLE %d Bytes %lld OFFSET %d******\n", + fh->f_rank, + total_bytes, + index, + bytes_to_write_in_cycle, + fh->f_offset); + */ + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + /* Calculate how much data will be contributed in this cycle + by each process*/ + for (k=0 ; kf_offset; + while (current[k] < broken_count) { + if (k*stripe_size+fh->f_offset > + (size_t)broken_iovec[current[k]].iov_base || + (k+1)*stripe_size+fh->f_offset <= + (size_t)broken_iovec[current[k]].iov_base) { + if ((k+1)*stripe_size+fh->f_offset <= + (size_t)broken_iovec[current[k]].iov_base) { + break; + } + current[k] ++; + previous[k] = current[k]; + continue; + } + if (temp >= + (size_t)((OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base + + broken_iovec[current[k]].iov_len)) { + if (bytes_rem[k]) { + bytes_sent[k] += bytes_rem[k]; + total_bytes_sent += bytes_rem[k]; + bytes_rem[k] = 0; + } + else { + bytes_sent[k] += broken_iovec[current[k]].iov_len; + total_bytes_sent += broken_iovec[current[k]].iov_len; + } + current[k] ++; + } + else { + if (bytes_rem[k]) { + bytes_sent[k] += temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + total_bytes_sent += temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + bytes_rem[k] -= temp - + ((broken_iovec[current[k]].iov_len - bytes_rem[k]) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + break; + } + else { + if (temp > (size_t)broken_iovec[current[k]].iov_base) { + bytes_sent[k] += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base; + total_bytes_sent += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base; + bytes_rem[k] = broken_iovec[current[k]].iov_len - + (temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current[k]].iov_base); + break; + } + else { + break; + } + } + } + } + } + /* + if (total_bytes_sent) { + printf ("%d ---> %d\n", fh->f_rank, total_bytes_sent); + } + for (i=0 ; if_rank, + bytes_sent[i], i); + } + } + */ + if (0 == fh->f_rank%fh->f_aggregator_index && bytes_to_write_in_cycle) { + /* Calculate how much data will be recieved this cycle + by each aggregator*/ + while (bytes_to_write_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_write_in_cycle) { + bytes_per_process[n] += bytes_remaining; + current_index ++; + bytes_to_write_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + bytes_per_process[n] += bytes_to_write_in_cycle; + bytes_remaining -= bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + } + else { + if (bytes_to_write_in_cycle < + global_fview[sorted[current_index]].iov_len) { + bytes_per_process[n] += bytes_to_write_in_cycle; + bytes_remaining = + global_fview[sorted[current_index]].iov_len - + bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + else { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + bytes_to_write_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* + for (i=0 ; if_size ; i++) { + if (bytes_per_process[i]) { + printf ("%d --> expecting %d from %d\n",fh->f_rank, + bytes_per_process[i], i); + } + } + */ + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_size ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + + if (0 != global_count) { + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + + /* Send the data to the corresponding aggregator */ + if ( OMPI_SUCCESS != ompi_io_ompio_send_data (fh, + buf, + total_bytes_sent, + decoded_iov, + iov_count, + bytes_sent, + broken_iovec, + previous, + prev_bytes_rem, + global_buf, + bytes_per_process, + displs, + two_phase_num_io_procs, + stripe_size)) { + opal_output (1, "ERROR IN SENDING DATA\n"); + return OMPI_ERROR; + } + + /* + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (k=0 ; kf_rank, + ((int *)global_buf)[k]); + } + } + */ + + total_bytes_written += total_bytes_sent; + total_bytes -= global_count; + + /********************************************************** + **************** DONE GATHERING OF DATA ****************** + *********************************************************/ + + /********************************************************** + ******* Create the io array, and pass it to fbtl ********* + *********************************************************/ + if (0 == fh->f_rank%fh->f_aggregator_index && global_count) { + size_t bytes_to_write = global_count; + int *temp = NULL; + int block = 1; + k = 0; + + temp = (int *)malloc (sizeof(int) * fh->f_size); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_size*sizeof(int)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_write -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_left -= bytes_to_write; + bytes_to_write = 0;; + k ++; + break; + } + } + else { + if (bytes_to_write < global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_write; + bytes_to_write = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_write -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("%d: *************************** %d\n", fh->f_rank, fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %d LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time2 = MPI_Wtime(); + } +#endif + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_pwritev (fh, NULL)) { + opal_output (1, "WRITE FAILED\n"); + return OMPI_ERROR; + } + } +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time2 = MPI_Wtime(); + total_io += end_time2-start_time2; + } +#endif + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE WRITING ************************ + *********************************************************/ + + if (0 == fh->f_rank%fh->f_aggregator_index) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time = MPI_Wtime(); + total += end_time-start_time; + } +#endif + } + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + printf ("%d: Total --- %f I/O ---- %f\n", fh->f_rank, total, total_io); + } +#endif + + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != broken_iovec) { + free (broken_iovec); + broken_iovec = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != bytes_sent) { + free (bytes_sent); + bytes_sent = NULL; + } + if (NULL != current) { + free (current); + current = NULL; + } + if (NULL != previous) { + free (previous); + previous = NULL; + } + if (NULL != bytes_rem) { + free (bytes_rem); + bytes_rem = NULL; + } + if (NULL != prev_bytes_rem) { + free (prev_bytes_rem); + prev_bytes_rem = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + + fh->f_flags ^= OMPIO_AGGREGATOR_IS_SET; + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_begin.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_begin.c new file mode 100644 index 0000000000..05f057e965 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_two_phase_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC WRITE ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_end.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_end.c new file mode 100644 index 0000000000..687ef8a054 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_file_write_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_two_phase_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC WRITE ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/two_phase/fcoll_two_phase_module.c b/ompi/mca/fcoll/two_phase/fcoll_two_phase_module.c new file mode 100644 index 0000000000..191ee242f0 --- /dev/null +++ b/ompi/mca/fcoll/two_phase/fcoll_two_phase_module.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_two_phase.h" + +#include + +#include "mpi.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t two_phase = { + mca_fcoll_two_phase_module_init, + mca_fcoll_two_phase_module_finalize, + mca_fcoll_two_phase_file_read_all, + mca_fcoll_two_phase_file_read_all_begin, + mca_fcoll_two_phase_file_read_all_end, + mca_fcoll_two_phase_file_write_all, + mca_fcoll_two_phase_file_write_all_begin, + mca_fcoll_two_phase_file_write_all_end +}; + +int +mca_fcoll_two_phase_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_two_phase_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_two_phase_priority; + if (0 >= mca_fcoll_two_phase_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "two_phase")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &two_phase; +} + +int mca_fcoll_two_phase_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_two_phase_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_two_phase_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/Makefile.am b/ompi/mca/fcoll/ylib/Makefile.am new file mode 100644 index 0000000000..ca4314c38b --- /dev/null +++ b/ompi/mca/fcoll/ylib/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + fcoll_ylib.h \ + fcoll_ylib_module.c \ + fcoll_ylib_component.c \ + fcoll_ylib_file_read_all.c \ + fcoll_ylib_file_read_all_begin.c \ + fcoll_ylib_file_read_all_end.c \ + fcoll_ylib_file_write_all.c \ + fcoll_ylib_file_write_all_begin.c \ + fcoll_ylib_file_write_all_end.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fcoll_ylib_DSO +component_noinst = +component_install = mca_fcoll_ylib.la +else +component_noinst = libmca_fcoll_ylib.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_fcoll_ylib_la_SOURCES = $(sources) +mca_fcoll_ylib_la_LDFLAGS = -module -avoid-version +mca_fcoll_ylib_la_LIBADD = \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/libopen-rte.la \ + $(top_ompi_builddir)/opal/libopen-pal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fcoll_ylib_la_SOURCES =$(sources) +libmca_fcoll_ylib_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib.h b/ompi/mca/fcoll/ylib/fcoll_ylib.h new file mode 100644 index 0000000000..c1fcc92950 --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FCOLL_YLIB_EXPORT_H +#define MCA_FCOLL_YLIB_EXPORT_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +BEGIN_C_DECLS + +/* Globally exported variables */ + +extern int mca_fcoll_ylib_priority; +extern int mca_fcoll_ylib_num_io_procs; +extern int mca_fcoll_ylib_stripe_size; +extern int mca_fcoll_ylib_blocks_per_cycle; + +OMPI_MODULE_DECLSPEC extern mca_fcoll_base_component_2_0_0_t mca_fcoll_ylib_component; + +/* API functions */ + +int mca_fcoll_ylib_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fcoll_base_module_1_0_0_t * +mca_fcoll_ylib_component_file_query (mca_io_ompio_file_t *fh, int *priority); + +int mca_fcoll_ylib_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fcoll_ylib_module_init (mca_io_ompio_file_t *file); +int mca_fcoll_ylib_module_finalize (mca_io_ompio_file_t *file); + +int mca_fcoll_ylib_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_ylib_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_ylib_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +int mca_fcoll_ylib_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status); + +int mca_fcoll_ylib_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); + +int mca_fcoll_ylib_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t * status); + +END_C_DECLS + +#endif /* MCA_FCOLL_YLIB_EXPORT_H */ diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_component.c b/ompi/mca/fcoll/ylib/fcoll_ylib_component.c new file mode 100644 index 0000000000..faf7be8d8d --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_component.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "fcoll_ylib.h" + +/* + * Public string showing the fcoll ompi_ylib component version number + */ +const char *mca_fcoll_ylib_component_version_string = + "Open MPI ylib collective MCA component version " OMPI_VERSION; + +/* + * Global variables + */ +int mca_fcoll_ylib_priority = 0; +int mca_fcoll_ylib_num_io_procs = 1; +int mca_fcoll_ylib_stripe_size = 1048576; +int mca_fcoll_ylib_blocks_per_cycle = 20; + +/* + * Local function + */ +static int ylib_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fcoll_base_component_2_0_0_t mca_fcoll_ylib_component = { + + /* First, the mca_component_t struct containing meta information + * about the component itself */ + + { + MCA_FCOLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + "ylib", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + ylib_register, + NULL + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + mca_fcoll_ylib_component_init_query, + mca_fcoll_ylib_component_file_query, + mca_fcoll_ylib_component_file_unquery +}; + + +static int +ylib_register(void) +{ + int param; + + param = mca_base_param_find ("fcoll", NULL, "ylib_priority"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_ylib_priority); + } + param = mca_base_param_find ("fcoll", NULL, "ylib_num_io_procs"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_ylib_num_io_procs); + } + param = mca_base_param_find ("fcoll", NULL, "ylib_stripe_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_ylib_stripe_size); + } + param = mca_base_param_find ("fcoll", NULL, "ylib_blocks_per_cycle"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fcoll_ylib_blocks_per_cycle); + } + + mca_base_param_reg_int (&mca_fcoll_ylib_component.fcollm_version, + "priority", + "Priority of the ylib fcoll component", + false, false, mca_fcoll_ylib_priority, + &mca_fcoll_ylib_priority); + mca_base_param_reg_int (&mca_fcoll_ylib_component.fcollm_version, + "num_io_procs", + "Number of writers in the ylib fcoll component", + false, false, mca_fcoll_ylib_num_io_procs, + &mca_fcoll_ylib_num_io_procs); + mca_base_param_reg_int (&mca_fcoll_ylib_component.fcollm_version, + "stripe_size", + "Stripe Size of the ylib fcoll component", + false, false, mca_fcoll_ylib_stripe_size, + &mca_fcoll_ylib_stripe_size); + mca_base_param_reg_int (&mca_fcoll_ylib_component.fcollm_version, + "blocks_per_cycle", + "Blocks to write per cycle of the ylib fcoll component", + false, false, mca_fcoll_ylib_blocks_per_cycle, + &mca_fcoll_ylib_blocks_per_cycle); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all.c new file mode 100644 index 0000000000..b42d00fd41 --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all.c @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +int +mca_fcoll_ylib_file_read_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_read = 0; /* total bytes that have been read*/ + size_t total_bytes = 0; /* total bytes to be read */ + size_t total_bytes_global = 0; + size_t bytes_per_cycle = 0; /* total read in each cycle by each process*/ + size_t bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + size_t current_position = 0; + size_t max_data = 0; + size_t bytes_remaining = 0; + size_t bytes_rem = 0; + size_t prev_bytes_rem = 0; + + int index = 0; + int current_index = 0; + int current = 0; + int previous = 0; + int cycles = 0; + int i=0, j=0, x=0, n=0; + int blocks = 0; + int bytes_left = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int *bytes_per_process = NULL; + int *bytes_received = NULL; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + + char *receive_buf = NULL; + + int global_fview_count = 0; + struct iovec *global_fview = NULL; + + int local_count = 0; + struct iovec *iov = NULL; + + int broken_count = 0; + struct iovec *broken_iovec = NULL; + + int *fview_count = NULL; + + int global_count = 0; + char *global_buf = NULL; + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_ylib_num_io_procs, + max_data); + mca_fcoll_ylib_num_io_procs = + ceil((float)fh->f_size/fh->f_procs_per_group); + fh->f_aggregator_index = + ceil((float)fh->f_size/mca_fcoll_ylib_num_io_procs); + } + + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this read *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, + max_data, + &iov, + &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + + /************************************************************* + * Breakdown the file view at each process per OST then send * + * each portion of the file view t0 the corresp aggregator * + *************************************************************/ + ompi_io_ompio_break_file_view (fh, + iov, + local_count, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size, + &broken_iovec, + &broken_count); + /* + for (i=0 ; if_rank, + broken_iovec[i].iov_base, + broken_iovec[i].iov_len); + } + */ + + if (NULL != iov) { + free (iov); + iov = NULL; + } + + ompi_io_ompio_distribute_file_view (fh, + broken_iovec, + broken_count, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size, + &fview_count, + &global_fview, + &global_fview_count); + /* + for (i=0 ; if_rank, + global_fview[i].iov_base, + global_fview[i].iov_len); + } + */ + if (0 == fh->f_rank%fh->f_aggregator_index) { + if (global_fview_count) { + for (i=0 ; if_size * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int *) malloc (fh->f_size * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + fh->f_comm->c_coll.coll_allreduce (&total_bytes, + &total_bytes_global, + 1, + MPI_DOUBLE, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + bytes_received = (int *)malloc (mca_fcoll_ylib_num_io_procs * sizeof (int)); + if (NULL == bytes_received) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + bytes_per_cycle = mca_fcoll_ylib_stripe_size * mca_fcoll_ylib_blocks_per_cycle; + cycles = ceil ((float)total_bytes_global/bytes_per_cycle); + + for (index = 0; index < cycles; index++) { + int k = 0; + size_t total_bytes_recv = 0; + size_t temp = 0; + global_count = 0; + + memset(bytes_received, 0x0, mca_fcoll_ylib_num_io_procs*sizeof(int)); + if (0 == fh->f_rank%fh->f_aggregator_index) { + memset(displs, 0x0, fh->f_size*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_size*sizeof(int)); + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + if (total_bytes > bytes_per_cycle) { + bytes_to_read_in_cycle = bytes_per_cycle; + } + else { + bytes_to_read_in_cycle = total_bytes; + } + } + /* + printf ("****%d: Total_bytes: %d CYCLE %d Bytes %d OFFSET %d******\n", + fh->f_rank, + total_bytes, + index, + bytes_to_read_in_cycle, + fh->f_offset); + sleep(1); + */ + /********************************************************** + **Gather the Data from all the processes at the readers ** + *********************************************************/ + + /* Calculate how much data will be contributed in this cycle + by each process*/ + previous = current; + prev_bytes_rem = bytes_rem; + temp = bytes_per_cycle * mca_fcoll_ylib_num_io_procs * (index+1) + + fh->f_offset; + + while (current < broken_count) { + if (temp >= + (size_t)((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base + + broken_iovec[current].iov_len)) { + k = ((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base / + mca_fcoll_ylib_stripe_size) % mca_fcoll_ylib_num_io_procs; + + if (bytes_rem) { + bytes_received[k] += bytes_rem; + total_bytes_recv += bytes_rem; + bytes_rem = 0; + } + else { + bytes_received[k] += broken_iovec[current].iov_len; + total_bytes_recv += broken_iovec[current].iov_len; + } + current ++; + } + else { + k = ((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base / + mca_fcoll_ylib_stripe_size) % mca_fcoll_ylib_num_io_procs; + if (bytes_rem) { + bytes_received[k] += temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + total_bytes_recv += temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + bytes_rem -= temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + break; + } + else { + if (temp > (size_t)broken_iovec[current].iov_base) { + bytes_received[k] += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base; + total_bytes_recv += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base; + bytes_rem = broken_iovec[current].iov_len - + (temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + break; + } + else { + break; + } + } + } + } + /* + for (i=0 ; if_rank, + bytes_received[i], i); + } + sleep(1); + */ + if (0 == fh->f_rank%fh->f_aggregator_index && bytes_to_read_in_cycle) { + /* Calculate how much data will be recieved this cycle + by each aggregator*/ + while (bytes_to_read_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_read_in_cycle) { + bytes_per_process[n] += bytes_remaining; + current_index ++; + bytes_to_read_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + bytes_per_process[n] += bytes_to_read_in_cycle; + bytes_remaining -= bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + } + else { + if (bytes_to_read_in_cycle < + global_fview[sorted[current_index]].iov_len) { + bytes_per_process[n] += bytes_to_read_in_cycle; + bytes_remaining = + global_fview[sorted[current_index]].iov_len - + bytes_to_read_in_cycle; + bytes_to_read_in_cycle = 0; + break; + } + else { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + bytes_to_read_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* + for (i=0 ; if_size ; i++) { + printf ("%d --> expecting %d from %d\n",fh->f_rank, + bytes_per_process[i], i); + } + */ + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_size ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + /* + for (i=0 ; if_size ; i++) { + printf ("Proc %d sending %d at %d\n", + i, + bytes_per_process[i], + displs[i]); + } + */ + if (0 != global_count) { + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + /********************************************************** + ******* Create the io array, and pass it to fbtl ********* + *********************************************************/ + if (0 == fh->f_rank%fh->f_aggregator_index && global_count) { + int bytes_to_read = global_count; + int *temp = NULL; + int block = 1; + k = 0; + + temp = (int *)malloc (sizeof(int) * fh->f_size); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_size*sizeof(int)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_read) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_read -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_left -= bytes_to_read; + bytes_to_read = 0;; + k ++; + break; + } + } + else { + if (bytes_to_read < (int)global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_read; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_read; + bytes_to_read = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_read -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("*************************** %d\n", fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_preadv (fh, NULL)) { + opal_output (1, "READ FAILED\n"); + return OMPI_ERROR; + } + } + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE READING ************************ + *********************************************************/ + + + /********************************************************** + ********* Scatter the Data from the readers ************** + *********************************************************/ + + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + receive_buf = &((char*)buf)[total_bytes_read]; + } + else if (total_bytes_recv) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + receive_buf = malloc (total_bytes_recv); + if (NULL == receive_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + /* distribute the data to its corresponding processes */ + ompi_io_ompio_scatter_data (fh, + receive_buf, + total_bytes_recv, + bytes_received, + broken_iovec, + previous, + prev_bytes_rem, + global_buf, + bytes_per_process, + displs, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size); + + /* + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (k=0 ; kf_rank, + ((int *)global_buf)[k]); + } + } + */ + + /* If data is not contigous in memory, copy the data from the + receive buffer into the buffer passed in */ + if (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + remaining = total_bytes_recv; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + } + + total_bytes_read += total_bytes_recv; + total_bytes -= global_count; + + /********************************************************** + **************** DONE GATHERING OF DATA ****************** + *********************************************************/ + + + + if (0 == fh->f_rank%fh->f_aggregator_index) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } + } + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != broken_iovec) { + free (broken_iovec); + broken_iovec = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != bytes_received) { + free (bytes_received); + bytes_received = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + /* + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_begin.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_begin.c new file mode 100644 index 0000000000..a42b53bd12 --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_ylib_file_read_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC READ ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_end.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_end.c new file mode 100644 index 0000000000..c1d44626e6 --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_read_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_ylib_file_read_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC READ ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all.c new file mode 100644 index 0000000000..5a03bd89ad --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all.c @@ -0,0 +1,706 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "ompi/mca/io/io.h" +#include "math.h" +#include "ompi/mca/pml/pml.h" +#include + +#define TIME_BREAKDOWN 0 +int +mca_fcoll_ylib_file_write_all (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + size_t total_bytes_written = 0; /* total bytes that have been written*/ + size_t total_bytes = 0; /* total bytes to be written */ + size_t total_bytes_global = 0; + size_t bytes_per_cycle = 0; /* total written in each cycle by each process*/ + size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t current_position = 0; + size_t max_data = 0; + size_t bytes_remaining = 0; + size_t bytes_rem = 0; + size_t prev_bytes_rem = 0; + + int index = 0; + int current_index = 0; + int current = 0; + int previous = 0; + int cycles = 0; + int i=0, j=0, x=0, n=0; + int blocks = 0; + int bytes_left = 0; + + /* array that contains the sorted indices of the global_iov */ + int *sorted = NULL; + int *displs = NULL; + int *bytes_per_process = NULL; + int *bytes_sent = NULL; + + /* iovec structure and count of the buffer passed in */ + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + int iov_index = 0; + + char *send_buf = NULL; + + int global_fview_count = 0; + struct iovec *global_fview = NULL; + + int local_count = 0; + struct iovec *iov = NULL; + + int broken_count = 0; + struct iovec *broken_iovec = NULL; + + int *fview_count = NULL; + + int global_count = 0; + char *global_buf = NULL; + +#if TIME_BREAKDOWN + double start_time=0, end_time=0, start_time2=0, end_time2=0; + double total=0 , total_io=0; +#endif + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time = MPI_Wtime(); + } +#endif + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + } + /************************************************************************** + ** In case the data is not contigous in memory, decode it into an iovec ** + **************************************************************************/ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + } + else { + max_data = count * datatype->super.size; + } + + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + ompi_io_ompio_set_aggregator_props (fh, + mca_fcoll_ylib_num_io_procs, + max_data); + mca_fcoll_ylib_num_io_procs = + ceil((float)fh->f_size/fh->f_procs_per_group); + fh->f_aggregator_index = + ceil((float)fh->f_size/mca_fcoll_ylib_num_io_procs); + } + + /********************************************************************* + *** Generate the File offsets/lengths corresponding to this write *** + ********************************************************************/ + ompi_io_ompio_generate_current_file_view (fh, + max_data, + &iov, + &local_count); + /* + for (i=0 ; if_rank, + iov[i].iov_base, + iov[i].iov_len); + } + */ + + /************************************************************* + * Breakdown the file view at each process per OST then send * + * each portion of the file view to the corresp aggregator * + *************************************************************/ + ompi_io_ompio_break_file_view (fh, + iov, + local_count, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size, + &broken_iovec, + &broken_count); + /* + for (i=0 ; if_rank, + broken_iovec[i].iov_base, + broken_iovec[i].iov_len); + } + */ + + if (NULL != iov) { + free (iov); + iov = NULL; + } + + ompi_io_ompio_distribute_file_view (fh, + broken_iovec, + broken_count, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size, + &fview_count, + &global_fview, + &global_fview_count); + /* + for (i=0 ; if_rank, + global_fview[i].iov_base, + global_fview[i].iov_len); + } + */ + + if (0 == fh->f_rank%fh->f_aggregator_index) { + if (global_fview_count) { + for (i=0 ; if_size * sizeof (int)); + if (NULL == bytes_per_process) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs = (int *) malloc (fh->f_size * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + fh->f_comm->c_coll.coll_allreduce (&total_bytes, + &total_bytes_global, + 1, + MPI_DOUBLE, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + + bytes_sent = (int *)malloc (mca_fcoll_ylib_num_io_procs * sizeof (int)); + if (NULL == bytes_sent) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + bytes_per_cycle = mca_fcoll_ylib_stripe_size * mca_fcoll_ylib_blocks_per_cycle; + cycles = ceil ((float)total_bytes_global/bytes_per_cycle); + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time = MPI_Wtime(); + total = end_time-start_time; + printf ("%d: Preprocessing --- %f\n", fh->f_rank, total); + total = 0; + } +#endif + + for (index = 0; index < cycles; index++) { + int k = 0; + size_t total_bytes_sent = 0; + size_t temp = 0; + global_count = 0; + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time = MPI_Wtime(); + } +#endif + + memset(bytes_sent, 0x0, mca_fcoll_ylib_num_io_procs*sizeof(int)); + if (0 == fh->f_rank%fh->f_aggregator_index) { + memset(displs, 0x0, fh->f_size*sizeof(int)); + memset(bytes_per_process, 0x0, fh->f_size*sizeof(int)); + + if (total_bytes > bytes_per_cycle) { + bytes_to_write_in_cycle = bytes_per_cycle; + } + else { + bytes_to_write_in_cycle = total_bytes; + } + } + + /* + printf ("****%d: Total_bytes: %d CYCLE %d Bytes %d OFFSET %d******\n", + fh->f_rank, + total_bytes, + index, + bytes_to_write_in_cycle, + fh->f_offset); + sleep(1); + */ + /********************************************************** + **Gather the Data from all the processes at the writers ** + *********************************************************/ + + /* Calculate how much data will be contributed in this cycle + by each process*/ + previous = current; + prev_bytes_rem = bytes_rem; + temp = bytes_per_cycle * mca_fcoll_ylib_num_io_procs * (index+1) + + fh->f_offset; + + while (current < broken_count) { + if (temp >= + (size_t)((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base + + broken_iovec[current].iov_len)) { + k = ((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base / + mca_fcoll_ylib_stripe_size) % mca_fcoll_ylib_num_io_procs; + + if (bytes_rem) { + bytes_sent[k] += bytes_rem; + total_bytes_sent += bytes_rem; + bytes_rem = 0; + } + else { + bytes_sent[k] += broken_iovec[current].iov_len; + total_bytes_sent += broken_iovec[current].iov_len; + } + current ++; + } + else { + k = ((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base / + mca_fcoll_ylib_stripe_size) % mca_fcoll_ylib_num_io_procs; + if (bytes_rem) { + bytes_sent[k] += temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + total_bytes_sent += temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + bytes_rem -= temp - + ((broken_iovec[current].iov_len - bytes_rem) + + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + break; + } + else { + if (temp > (size_t)broken_iovec[current].iov_base) { + bytes_sent[k] += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base; + total_bytes_sent += temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base; + bytes_rem = broken_iovec[current].iov_len - + (temp - + (OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base); + break; + } + else { + break; + } + } + } + } + /* + for (i=0 ; if_rank, + bytes_sent[i], i); + } + } + sleep(3); + */ + if (0 == fh->f_rank%fh->f_aggregator_index && bytes_to_write_in_cycle) { + /* Calculate how much data will be recieved this cycle + by each aggregator*/ + while (bytes_to_write_in_cycle) { + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[current_index] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + if (bytes_remaining) { + if (bytes_remaining <= bytes_to_write_in_cycle) { + bytes_per_process[n] += bytes_remaining; + current_index ++; + bytes_to_write_in_cycle -= bytes_remaining; + bytes_remaining = 0; + continue; + } + else { + bytes_per_process[n] += bytes_to_write_in_cycle; + bytes_remaining -= bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + } + else { + if (bytes_to_write_in_cycle < + global_fview[sorted[current_index]].iov_len) { + bytes_per_process[n] += bytes_to_write_in_cycle; + bytes_remaining = + global_fview[sorted[current_index]].iov_len - + bytes_to_write_in_cycle; + bytes_to_write_in_cycle = 0; + break; + } + else { + bytes_per_process[n] += + global_fview[sorted[current_index]].iov_len; + bytes_to_write_in_cycle -= + global_fview[sorted[current_index]].iov_len; + current_index ++; + continue; + } + } + } + /* + for (i=0 ; if_size ; i++) { + printf ("%d --> expecting %d from %d\n",fh->f_rank, + bytes_per_process[i], i); + } + */ + /* Calculate the displacement on where to put the data and allocate + the recieve buffer (global_buf) */ + displs[0] = 0; + global_count = bytes_per_process[0]; + for (i=1 ; if_size ; i++) { + global_count += bytes_per_process[i]; + displs[i] = displs[i-1] + bytes_per_process[i-1]; + } + /* + for (i=0 ; if_size ; i++) { + printf ("Proc %d sending %d at %d\n", + i, + bytes_per_process[i], + displs[i]); + } + */ + if (0 != global_count) { + global_buf = malloc (global_count); + if (NULL == global_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + + if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { + send_buf = &((char*)buf)[total_bytes_written]; + } + else if (total_bytes_sent) { + /* allocate a send buffer and copy the data that needs + to be sent into it in case the data is non-contigous + in memory */ + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + + send_buf = malloc (total_bytes_sent); + if (NULL == send_buf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + remaining = total_bytes_sent; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *) mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + } + + /* distribute the data to its corresponding aggregator */ + + ompi_io_ompio_gather_data (fh, + send_buf, + total_bytes_sent, + bytes_sent, + broken_iovec, + previous, + prev_bytes_rem, + global_buf, + bytes_per_process, + displs, + mca_fcoll_ylib_num_io_procs, + mca_fcoll_ylib_stripe_size); + /* + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (k=0 ; kf_rank, + ((int *)global_buf)[k]); + } + } + */ + if (! (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY)) { + if (NULL != send_buf) { + free (send_buf); + send_buf = NULL; + } + } + + total_bytes_written += total_bytes_sent; + total_bytes -= global_count; + + /********************************************************** + **************** DONE GATHERING OF DATA ****************** + *********************************************************/ + + /********************************************************** + ******* Create the io array, and pass it to fbtl ********* + *********************************************************/ + if (0 == fh->f_rank%fh->f_aggregator_index && global_count) { + int bytes_to_write = global_count; + int *temp = NULL; + int block = 1; + k = 0; + + temp = (int *)malloc (sizeof(int) * fh->f_size); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_size*sizeof(int)); + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write) { + int start = 0; + + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + blocks = fview_count[0]; + for (j=0 ; jf_size ; j++) { + if (sorted[x] < blocks) { + n = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_write -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_left -= bytes_to_write; + bytes_to_write = 0;; + k ++; + break; + } + } + else { + if (bytes_to_write < (int)global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_write; + bytes_to_write = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[n]]; + temp[n] += (int)fh->f_io_array[k].length; + bytes_to_write -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + + fh->f_num_of_io_entries = k; + /* + printf("%d: *************************** %d\n", fh->f_rank, fh->f_num_of_io_entries); + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %d LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + */ +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + start_time2 = MPI_Wtime(); + } +#endif + if (fh->f_num_of_io_entries) { + if (OMPI_SUCCESS != fh->f_fbtl->fbtl_pwritev (fh, NULL)) { + opal_output (1, "WRITE FAILED\n"); + return OMPI_ERROR; + } + } +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time2 = MPI_Wtime(); + total_io += end_time2-start_time2; + } +#endif + if (NULL != temp) { + free (temp); + temp = NULL; + } + } + /********************************************************** + ******************** DONE WRITING ************************ + *********************************************************/ + + if (0 == fh->f_rank%fh->f_aggregator_index) { + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + if (NULL != global_buf) { + free (global_buf); + global_buf = NULL; + } + } +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + end_time = MPI_Wtime(); + total += end_time-start_time; + } +#endif + } + +#if TIME_BREAKDOWN + if (0 == fh->f_rank%fh->f_aggregator_index) { + printf ("%d: Total --- %f I/O ---- %f\n", fh->f_rank, total, total_io); + } +#endif + + if (NULL != sorted) { + free (sorted); + sorted = NULL; + } + if (NULL != broken_iovec) { + free (broken_iovec); + broken_iovec = NULL; + } + if (NULL != global_fview) { + free (global_fview); + global_fview = NULL; + } + if (NULL != fview_count) { + free (fview_count); + fview_count = NULL; + } + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + if (NULL != bytes_per_process) { + free (bytes_per_process); + bytes_per_process = NULL; + } + if (NULL != bytes_sent) { + free (bytes_sent); + bytes_sent = NULL; + } + if (NULL != displs) { + free (displs); + displs = NULL; + } + /* + if (NULL != total_bytes_per_process) { + free (total_bytes_per_process); + total_bytes_per_process = NULL; + } + */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_begin.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_begin.c new file mode 100644 index 0000000000..e224cf90ee --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_begin.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_ylib_file_write_all_begin (mca_io_ompio_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + printf ("DYNAMIC WRITE ALL BEGIN\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_end.c b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_end.c new file mode 100644 index 0000000000..bb3db38a42 --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_file_write_all_end.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "ompi/mca/fcoll/fcoll.h" + + +int +mca_fcoll_ylib_file_write_all_end (mca_io_ompio_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + printf ("DYNAMIC WRITE ALL END\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fcoll/ylib/fcoll_ylib_module.c b/ompi/mca/fcoll/ylib/fcoll_ylib_module.c new file mode 100644 index 0000000000..c960a6cffe --- /dev/null +++ b/ompi/mca/fcoll/ylib/fcoll_ylib_module.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "fcoll_ylib.h" + +#include + +#include "mpi.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fcoll_base_module_1_0_0_t ylib = { + mca_fcoll_ylib_module_init, + mca_fcoll_ylib_module_finalize, + mca_fcoll_ylib_file_read_all, + mca_fcoll_ylib_file_read_all_begin, + mca_fcoll_ylib_file_read_all_end, + mca_fcoll_ylib_file_write_all, + mca_fcoll_ylib_file_write_all_begin, + mca_fcoll_ylib_file_write_all_end +}; + +int +mca_fcoll_ylib_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +mca_fcoll_base_module_1_0_0_t * +mca_fcoll_ylib_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fcoll_ylib_priority; + if (0 >= mca_fcoll_ylib_priority) { + return NULL; + } + + if (mca_fcoll_base_query_table (fh, "ylib")) { + if (*priority < 50) { + *priority = 50; + } + } + + return &ylib; +} + +int mca_fcoll_ylib_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fcoll_ylib_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fcoll_ylib_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/Makefile.am b/ompi/mca/fs/Makefile.am new file mode 100644 index 0000000000..608888196e --- /dev/null +++ b/ompi/mca/fs/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_fs.la +libmca_fs_la_SOURCES = + +# local files +headers = fs.h +libmca_fs_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +nobase_ompi_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/ompi/mca/fs/base/Makefile.am b/ompi/mca/fs/base/Makefile.am new file mode 100644 index 0000000000..d356eb33ca --- /dev/null +++ b/ompi/mca/fs/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_fs_la_SOURCES += \ + base/fs_base_close.c \ + base/fs_base_file_select.c \ + base/fs_base_file_unselect.c \ + base/fs_base_find_available.c \ + base/fs_base_open.c diff --git a/ompi/mca/fs/base/base.h b/ompi/mca/fs/base/base.h new file mode 100644 index 0000000000..0f6edb7d61 --- /dev/null +++ b/ompi/mca/fs/base/base.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * MCA fs base framework public interface functions. + */ + +#ifndef MCA_FS_BASE_H +#define MCA_FS_BASE_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/fs/fs.h" +#include "opal/mca/mca.h" + + +BEGIN_C_DECLS + +OMPI_DECLSPEC int mca_fs_base_open(void); + +OMPI_DECLSPEC int mca_fs_base_close(void); + +OMPI_DECLSPEC int mca_fs_base_file_select(struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred); + +OMPI_DECLSPEC int mca_fs_base_file_unselect(struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fs_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads); + +OMPI_DECLSPEC int mca_fs_base_init_file (struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_fs_base_get_param (struct mca_io_ompio_file_t *file, int keyval); +/* + * Globals + */ + +OMPI_DECLSPEC extern int mca_fs_base_param; +OMPI_DECLSPEC extern int mca_fs_base_output; + +OMPI_DECLSPEC extern bool mca_fs_base_components_opened_valid; +OMPI_DECLSPEC extern bool mca_fs_base_components_available_valid; + +OMPI_DECLSPEC extern opal_list_t mca_fs_base_components_opened; +OMPI_DECLSPEC extern opal_list_t mca_fs_base_components_available; + +END_C_DECLS + +#endif /* MCA_BASE_FS_H */ diff --git a/ompi/mca/fs/base/fs_base_close.c b/ompi/mca/fs/base/fs_base_close.c new file mode 100644 index 0000000000..42281142f9 --- /dev/null +++ b/ompi/mca/fs/base/fs_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHTOB$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "ompi/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" + +int mca_fs_base_close(void) +{ + /* + Close all components that are still open. This may be the opened + list (if we're in ompi_info), or it may be the available list (if + we're anywhere else). + */ + + if (mca_fs_base_components_opened_valid) { + mca_base_components_close(mca_fs_base_output, + &mca_fs_base_components_opened, NULL); + OBJ_DESTRUCT(&mca_fs_base_components_opened); + mca_fs_base_components_opened_valid = false; + } else if (mca_fs_base_components_available_valid) { + mca_base_components_close(mca_fs_base_output, + &mca_fs_base_components_available, NULL); + OBJ_DESTRUCT(&mca_fs_base_components_available); + mca_fs_base_components_available_valid = false; + } + + /* Close the output stream for this framework */ + opal_output_close (mca_fs_base_output); + + /* All done */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/base/fs_base_file_select.c b/ompi/mca/fs/base/fs_base_file_select.c new file mode 100644 index 0000000000..1c6719e20b --- /dev/null +++ b/ompi/mca/fs/base/fs_base_file_select.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/class/opal_list.h" +#include "opal/util/argv.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a opal_list_t which contains + * these modules + */ +struct queried_module_t { + opal_list_item_t super; + mca_fs_base_component_t *om_component; + mca_fs_base_module_t *om_module; +}; +typedef struct queried_module_t queried_module_t; +static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); + + +/* + * Only one fs module can be attached to each file. + * + * This module calls the query funtion on all the components that were + * detected by fs_base_open. This function is called on a + * per-file basis. This function has the following function. + * + * 1. Iterate over the list of available_components + * 2. Call the query function on each of these components. + * 3. query function returns the structure containing pointers + * to its module and its priority + * 4. Select the module with the highest priority + * 5. Call the init function on the selected module so that it does the + * right setup for the file + * 6. Call finalize on all the other modules which returned + * their module but were unfortunate to not get selected + */ + +int mca_fs_base_file_select (struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred) +{ + int priority; + int best_priority; + opal_list_item_t *item; + opal_list_item_t *next_item; + mca_base_component_priority_list_item_t *selectable_item; + char *names, **name_array; + int num_names; + mca_base_component_priority_list_item_t *cpli; + mca_fs_base_component_t *component; + mca_fs_base_component_t *best_component; + mca_fs_base_module_t *module; + opal_list_t queried; + queried_module_t *om; + opal_list_t *selectable; + char *str; + int err = MPI_SUCCESS; + int i; + bool was_selectable_constructed = false; + + /* Check and see if a preferred component was provided. If it was + provided then it should be used (if possible) */ + + if (NULL != preferred) { + + /* We have a preferred component. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_component_name[0]); + + opal_output_verbose(10, mca_fs_base_output, + "fs:base:file_select: Checking preferred component: %s", + str); + + /* query the component for its priority and get its module + structure. This is necessary to proceed */ + + component = (mca_fs_base_component_t *)preferred; + module = component->fsm_file_query (file, &priority); + if (NULL != module && + NULL != module->fs_module_init) { + + /* this query seems to have returned something legitimate + * and we can now go ahead and initialize the + * file with it * but first, the functions which + * are null need to be filled in */ + + /*fill_null_pointers (module);*/ + file->f_fs = module; + file->f_fs_component = preferred; + + return module->fs_module_init(file); + } + /* His preferred component is present, but is unable to + * run. This is not a good sign. We should try selecting + * some other component We let it fall through and select + * from the list of available components + */ + } /*end of selection for preferred component */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred component was provided but for some reason was + * not able to be selected + * 2. No preferred component was provided + * + * All we need to do is to go through the list of available + * components and find the one which has the highest priority and + * use that for this file + */ + + /* Check if anything was requested by means on the name parameters */ + names = NULL; + mca_base_param_lookup_string (mca_fs_base_param, &names); + + if (NULL != names && 0 < strlen(names)) { + name_array = opal_argv_split (names, ','); + num_names = opal_argv_count (name_array); + + opal_output_verbose(10, mca_fs_base_output, + "fs:base:file_Select: Checking all available module"); + + /* since there are somethings which the mca requested through the + if the intersection is NULL, then we barf saying that the requested + modules are not being available */ + + selectable = OBJ_NEW(opal_list_t); + was_selectable_constructed = true; + + /* go through the compoents_available list and check against the names + * to see whether this can be added or not */ + + for (item = opal_list_get_first(&mca_fs_base_components_available); + item != opal_list_get_end(&mca_fs_base_components_available); + item = opal_list_get_next(item)) { + /* convert the opal_list_item_t returned into the proper type */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fs_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fs_base_output, + "select: initialising %s component %s", + component->fsm_version.mca_type_name, + component->fsm_version.mca_component_name); + + /* check if this name is present in the mca_base_params */ + for (i=0; i < num_names; i++) { + if (0 == strcmp(name_array[i], component->fsm_version.mca_component_name)) { + /* this is present, and should be added o the selectable list */ + + /* We need to create a seperate object to initialise this list with + * since we cannot have the same item in 2 lists */ + + selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); + *selectable_item = *cpli; + opal_list_append (selectable, (opal_list_item_t *)selectable_item); + break; + } + } + } + + /* check for a NULL intersection between the available list and the + * list which was asked for */ + + if (0 == opal_list_get_size(selectable)) { + was_selectable_constructed = true; + OBJ_RELEASE (selectable); + opal_output_verbose (10, mca_fs_base_output, + "fs:base:file_select: preferred modules were not available"); + return OMPI_ERROR; + } + } else { /* if there was no name_array, then we need to simply initialize + selectable to mca_fs_base_components_available */ + selectable = &mca_fs_base_components_available; + } + + best_component = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, opal_list_t); + + for (item = opal_list_get_first(selectable); + item != opal_list_get_end(selectable); + item = opal_list_get_next(item)) { + /* + * convert the opal_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_fs_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_fs_base_output, + "select: initialising %s component %s", + component->fsm_version.mca_type_name, + component->fsm_version.mca_component_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == component->fsm_file_query) { + opal_output_verbose(10, mca_fs_base_output, + "select: no query, ignoring the component"); + } else { + /* + * call the query function and see what it returns + */ + module = component->fsm_file_query (file, &priority); + + if (NULL == module || + NULL == module->fs_module_init) { + /* + * query did not return any action which can be used + */ + opal_output_verbose(10, mca_fs_base_output, + "select: query returned failure"); + } else { + opal_output_verbose(10, mca_fs_base_output, + "select: query returned priority %d", + priority); + /* + * is this the best component we have found till now? + */ + if (priority > best_priority) { + best_priority = priority; + best_component = component; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_component = component; + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); + } /* end else of if (NULL == module) */ + } /* end else of if (NULL == component->fsm_init) */ + } /* end for ... end of traversal */ + + /* We have to remove empty out the selectable list if the selectable + * list was constructed as a duplicate and not as a pointer to the + * mca_base_components_available list. So, check and destroy */ + + if (was_selectable_constructed) { + + /* remove all the items first */ + for (item = opal_list_get_first(&mca_fs_base_components_available); + item != opal_list_get_end(&mca_fs_base_components_available); + item = next_item) { + next_item = opal_list_get_next(item); + OBJ_RELEASE (item); + } + + /* release the list itself */ + OBJ_RELEASE (selectable); + was_selectable_constructed = false; + } + + /* + * Now we have alist of components which successfully returned + * their module struct. One of these components has the best + * priority. The rest have to be comm_unqueried to counter the + * effects of file_query'ing them. Finalize happens only on + * components which should are initialized. + */ + if (NULL == best_component) { + /* + * This typically means that there was no component which was + * able to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of components which have successfully + * returned their priorities from the query. We now have to + * unquery() those components which have not been selected and + * init() the component which was selected + */ + for (item = opal_list_remove_first(&queried); + NULL != item; + item = opal_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_component == best_component) { + /* + * this is the chosen component, we have to initialise the + * module of this component. + * + * ANJU: a component might not have all the functions + * defined. Whereever a function pointer is null in the + * module structure we need to fill it in with the base + * structure function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through and + * elease the other objects + */ + + /*fill_null_pointers (om->om_module);*/ + file->f_fs = om->om_module; + err = om->om_module->fs_module_init(file); + file->f_fs_component = (mca_base_component_t *)best_component; + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_component->fsm_file_unquery) { + /* unquery the component only if they have some clean + * up job to do. Components which are queried but do + * not actually do anything typically do not have a + * unquery. Hence this check is necessary + */ + (void) om->om_component->fsm_file_unquery(file); + opal_output_verbose(10, mca_fs_base_output, + "select: component %s is not selected", + om->om_component->fsm_version.mca_component_name); + } /* end if */ + } /* if not best component */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + opal_output_verbose(10, mca_fs_base_output, + "select: component %s selected", + best_component->fsm_version.mca_component_name); + + OBJ_DESTRUCT(&queried); + + return err; +} diff --git a/ompi/mca/fs/base/fs_base_file_unselect.c b/ompi/mca/fs/base/fs_base_file_unselect.c new file mode 100644 index 0000000000..7197a4e0cd --- /dev/null +++ b/ompi/mca/fs/base/fs_base_file_unselect.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "mpi.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "opal/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" + +int mca_fs_base_file_unselect(mca_io_ompio_file_t *file) +{ + if (NULL != file->f_fs && NULL != file->f_fs->fs_module_finalize) { + return file->f_fs->fs_module_finalize(file); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/base/fs_base_find_available.c b/ompi/mca/fs/base/fs_base_find_available.c new file mode 100644 index 0000000000..890eea017d --- /dev/null +++ b/ompi/mca/fs/base/fs_base_find_available.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/class/opal_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" + +opal_list_t mca_fs_base_modules_available; +bool mca_fs_base_modules_available_valid = false; + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); + +int mca_fs_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads) +{ + bool found = false; + mca_base_component_priority_list_item_t *entry; + opal_list_item_t *p; + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_fs_base_components_available, opal_list_t); + mca_fs_base_components_available_valid = true; + + /* The list of components which we should check is already present + in mca_fs_base_components_opened, which was established in + mca_fs_base_open */ + + for (found = false, + p = opal_list_remove_first (&mca_fs_base_components_opened); + NULL != p; + p = opal_list_remove_first (&mca_fs_base_components_opened)) { + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->super.cli_component = + ((mca_base_component_list_item_t *)p)->cli_component; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->super.cli_component, entry, + enable_progress_threads, + enable_mpi_threads)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + opal_list_append (&mca_fs_base_components_available, + (opal_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_component_repository_release(entry->super.cli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_fs_base_components_opened); + mca_fs_base_components_opened_valid = false; + + /* There should atleast be one fs component which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_fs_base_components_available); + mca_fs_base_components_available_valid = false; + opal_output_verbose (10, mca_fs_base_output, + "fs:find_available: no fs components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + opal_output_verbose(10, mca_fs_base_output, + "fs:find_available: querying fs component %s", + m->mca_component_name); + + /* This component has been successfully opened, now try to query it */ + if (2 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_2_0_0(m, entry, enable_progress_threads, + enable_mpi_threads); + } else { + /* unrecognised API version */ + opal_output_verbose(10, mca_fs_base_output, + "fs:find_available:unrecognised fs API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + opal_output_verbose(10, mca_fs_base_output, + "fs:find_available fs component %s is not available", + m->mca_component_name); + if (NULL != m->mca_close_component) { + m->mca_close_component(); + } + } else { + opal_output_verbose(10, mca_fs_base_output, + "fs:find_avalable: fs component %s is available", + m->mca_component_name); + + } + /* All done */ + return ret; +} + + +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_fs_base_component_2_0_0_t *fs = + (mca_fs_base_component_2_0_0_t *) component; + + return fs->fsm_init_query(enable_progress_threads, + enable_mpi_threads); +} diff --git a/ompi/mca/fs/base/fs_base_open.c b/ompi/mca/fs/base/fs_base_open.c new file mode 100644 index 0000000000..c2250d9b8c --- /dev/null +++ b/ompi/mca/fs/base/fs_base_open.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include + +#include "ompi/class/ompi_free_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/show_help.h" + +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#ifdef __WINDOWS__ + const mca_base_component_t *mca_fs_base_static_components[] = {NULL}; +#else +#include "ompi/mca/fs/base/static-components.h" +#endif + +/* + * Global variables; most of which are loaded by back-ends of MCA + * variables + */ +int mca_fs_base_param = -1; +int mca_fs_base_output = -1; + +opal_list_t mca_fs_base_components_opened; +opal_list_t mca_fs_base_components_available; + +bool mca_fs_base_components_available_valid = false; +bool mca_fs_base_components_opened_valid = false; + +mca_fs_base_component_t mca_fs_base_selected_component; +mca_fs_base_module_t mca_fs; + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int mca_fs_base_open(void) +{ + /* Open an output stream for this framework */ + + mca_fs_base_output = opal_output_open(NULL); + + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("fs", mca_fs_base_output, + mca_fs_base_static_components, + &mca_fs_base_components_opened, true)) { + return OMPI_ERROR; + } + mca_fs_base_components_opened_valid = true; + + /* Find the index of the MCA "fs" param for selection */ + + mca_fs_base_param = mca_base_param_find("fs", "base", NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/base/static-components.h b/ompi/mca/fs/base/static-components.h new file mode 100644 index 0000000000..757981885c --- /dev/null +++ b/ompi/mca/fs/base/static-components.h @@ -0,0 +1,18 @@ +/* + * $HEADER$ + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + + +const mca_base_component_t *mca_fs_base_static_components[] = { + + NULL +}; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + diff --git a/ompi/mca/fs/fs.h b/ompi/mca/fs/fs.h new file mode 100644 index 0000000000..f8c3ea92f5 --- /dev/null +++ b/ompi/mca/fs/fs.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_FS_H +#define OMPI_MCA_FS_H + +#include "ompi_config.h" +#include "mpi.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +BEGIN_C_DECLS + +struct mca_io_ompio_file_t; + +/* + * Macro for use in components that are of type coll + */ +#define MCA_FS_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "fs", 2, 0, 0 + +/* + * These are the component function prototypes. These function pointers + * go into the component structure. These functions (query() and finalize() + * are called during fs_base_select(). Each component is query() ied + * and subsequently, all the unselected components are finalize() 'ed + * so that any *stuff* they did during query() can be undone. By + * similar logic, finalize() is also called on the component which + * was selected when the communicator is being destroyed. + * + * So, to sum it up, every component carries 4 functions: + * 1. open() - called during MPI_INIT + * 2. close() - called during MPI_FINALIZE + * 3. query() - called to select a particular component + * 4. finalize() - called when actions taken during query have + * to be undone + */ + +/* + * **************** component struct ******************************* + * *********** These functions go in the component struct ********** + * **************** component struct ******************************* + */ + +typedef int (*mca_fs_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, + bool enable_mpi_threads); + +typedef struct mca_fs_base_module_1_0_0_t * +(*mca_fs_base_component_file_query_1_0_0_fn_t) (struct mca_io_ompio_file_t *file, + int *priority); + +typedef int (*mca_fs_base_component_file_unquery_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +/* + * ****************** component struct ****************************** + * Structure for fs v2.0.0 components.This is chained to MCA v2.0.0 + * ****************** component struct ****************************** + */ +struct mca_fs_base_component_2_0_0_t { + mca_base_component_t fsm_version; + mca_base_component_data_t fsm_data; + + mca_fs_base_component_init_query_1_0_0_fn_t fsm_init_query; + mca_fs_base_component_file_query_1_0_0_fn_t fsm_file_query; + mca_fs_base_component_file_unquery_1_0_0_fn_t fsm_file_unquery; +}; +typedef struct mca_fs_base_component_2_0_0_t mca_fs_base_component_2_0_0_t; +typedef struct mca_fs_base_component_2_0_0_t mca_fs_base_component_t; + +/* + * *********************************************************************** + * ************************ Interface function definitions ************** + * These are the typedefs for the function pointers to various fs + * backend functions which will be used by the various fs components + * *********************************************************************** + */ + +typedef int (*mca_fs_base_module_init_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fs_base_module_finalize_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_fs_base_module_file_open_fn_t)( + struct ompi_communicator_t *comm, char *filename, int amode, + struct ompi_info_t *info, struct mca_io_ompio_file_t *fh); +typedef int (*mca_fs_base_module_file_close_fn_t)(struct mca_io_ompio_file_t *fh); +typedef int (*mca_fs_base_module_file_delete_fn_t)( + char *filename, struct ompi_info_t *info); +typedef int (*mca_fs_base_module_file_set_size_fn_t) + (struct mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE size); +typedef int (*mca_fs_base_module_file_get_size_fn_t) + (struct mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE *size); +typedef int (*mca_fs_base_module_file_set_info_fn_t) + (struct mca_io_ompio_file_t *fh, struct ompi_info_t *info); +typedef int (*mca_fs_base_module_file_sync_fn_t) + (struct mca_io_ompio_file_t *fh); + +/* + * *********************************************************************** + * *************************** module structure ************************* + * *********************************************************************** + */ +struct mca_fs_base_module_1_0_0_t { + /* + * Per-file initialization function. This is called only + * on the module which is selected. The finalize corresponding to + * this function is present on the component struct above + */ + mca_fs_base_module_init_1_0_0_fn_t fs_module_init; + mca_fs_base_module_finalize_1_0_0_fn_t fs_module_finalize; + + /* FS function pointers */ + mca_fs_base_module_file_open_fn_t fs_file_open; + mca_fs_base_module_file_close_fn_t fs_file_close; + mca_fs_base_module_file_delete_fn_t fs_file_delete; + mca_fs_base_module_file_set_size_fn_t fs_file_set_size; + mca_fs_base_module_file_get_size_fn_t fs_file_get_size; + mca_fs_base_module_file_set_info_fn_t fs_file_set_info; + mca_fs_base_module_file_sync_fn_t fs_file_sync; +}; +typedef struct mca_fs_base_module_1_0_0_t mca_fs_base_module_1_0_0_t; +typedef mca_fs_base_module_1_0_0_t mca_fs_base_module_t; + +END_C_DECLS + +#endif /* OMPI_MCA_FS_H */ diff --git a/ompi/mca/fs/lustre/Makefile.am b/ompi/mca/fs/lustre/Makefile.am new file mode 100644 index 0000000000..dda947e32c --- /dev/null +++ b/ompi/mca/fs/lustre/Makefile.am @@ -0,0 +1,57 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fs_lustre_DSO +component_noinst = +component_install = mca_fs_lustre.la +else +component_noinst = libmca_fs_lustre.la +component_install = +endif + +# Source files + +fs_lustre_sources = \ + fs_lustre.h \ + fs_lustre.c \ + fs_lustre_component.c \ + fs_lustre_file_open.c \ + fs_lustre_file_close.c \ + fs_lustre_file_delete.c \ + fs_lustre_file_sync.c \ + fs_lustre_file_set_size.c \ + fs_lustre_file_get_size.c \ + fs_lustre_file_set_info.c + +AM_CPPFLAGS = $(fs_lustre_CPPFLAGS) + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fs_lustre_la_SOURCES = $(fs_lustre_sources) +mca_fs_lustre_la_LIBADD = +mca_fs_lustre_la_LDFLAGS = -module -avoid-version $(fs_lustre_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fs_lustre_la_SOURCES = $(fs_lustre_sources) +libmca_fs_lustre_la_LIBADD = +libmca_fs_lustre_la_LDFLAGS = -module -avoid-version $(fs_lustre_LDFLAGS) diff --git a/ompi/mca/fs/lustre/configure.m4 b/ompi/mca/fs/lustre/configure.m4 new file mode 100644 index 0000000000..49f7f0bd29 --- /dev/null +++ b/ompi/mca/fs/lustre/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_fs_lustre_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_fs_lustre_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/fs/lustre/Makefile]) + + OMPI_CHECK_LUSTRE([fs_lustre], + [fs_lustre_happy="yes"], + [fs_lustre_happy="no"]) + + AS_IF([test "$fs_lustre_happy" = "yes"], + [fs_lustre_WRAPPER_EXTRA_LDFLAGS="$fs_lustre_LDFLAGS" + fs_lustre_WRAPPER_EXTRA_LIBS="$fs_lustre_LIBS" + $1], + [$2]) + + AC_CHECK_HEADERS([lustre/liblustreapi.h], [], + [AC_CHECK_HEADERS([lustre/liblustreapi.h], [], [$2], + [AC_INCLUDES_DEFAULT])], + [AC_INCLUDES_DEFAULT]) + + + # substitute in the things needed to build lustre + AC_SUBST([fs_lustre_CFLAGS]) + AC_SUBST([fs_lustre_CPPFLAGS]) + AC_SUBST([fs_lustre_LDFLAGS]) + AC_SUBST([fs_lustre_LIBS]) +])dnl diff --git a/ompi/mca/fs/lustre/fs_lustre.c b/ompi/mca/fs/lustre/fs_lustre.c new file mode 100644 index 0000000000..fb5ce7c96b --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/lustre/fs_lustre.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fs_base_module_1_0_0_t lustre = { + mca_fs_lustre_module_init, /* initalise after being selected */ + mca_fs_lustre_module_finalize, /* close a module on a communicator */ + mca_fs_lustre_file_open, + mca_fs_lustre_file_close, + mca_fs_lustre_file_delete, + mca_fs_lustre_file_set_size, + mca_fs_lustre_file_get_size, + mca_fs_lustre_file_set_info, + mca_fs_lustre_file_sync +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fs_lustre_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fs_base_module_1_0_0_t * +mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fs_lustre_priority; + + if (LUSRE == fh->f_fstype) { + if (*priority < 50) { + *priority = 50; + } + } + + return &lustre; +} + +int mca_fs_lustre_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fs_lustre_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fs_lustre_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre.h b/ompi/mca/fs/lustre/fs_lustre.h new file mode 100644 index 0000000000..a391bdf3e2 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FS_LUSTRE_H +#define MCA_FS_LUSTRE_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +extern int mca_fs_lustre_priority; +extern int mca_fs_lustre_stripe_size; +extern int mca_fs_lustre_stripe_width; + +BEGIN_C_DECLS + +int mca_fs_lustre_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fs_base_module_1_0_0_t * +mca_fs_lustre_component_file_query (mca_io_ompio_file_t *fh, int *priority); +int mca_fs_lustre_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fs_lustre_module_init (mca_io_ompio_file_t *file); +int mca_fs_lustre_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_lustre_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_fs_lustre_file_open (struct ompi_communicator_t *comm, + char *filename, + int amode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh); + +int mca_fs_lustre_file_close (mca_io_ompio_file_t *fh); + +int mca_fs_lustre_file_delete (char *filename, + struct ompi_info_t *info); + +int mca_fs_lustre_file_set_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE size); + +int mca_fs_lustre_file_get_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size); + +int mca_fs_lustre_file_set_info (mca_io_ompio_file_t *fh, + struct ompi_info_t *info); + +int mca_fs_lustre_file_sync (mca_io_ompio_file_t *fh); + +int mca_fs_lustre_file_seek (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence); +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FS_LUSTRE_H */ diff --git a/ompi/mca/fs/lustre/fs_lustre_component.c b/ompi/mca/fs/lustre/fs_lustre_component.c new file mode 100644 index 0000000000..0f31ef1b46 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_component.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fs_lustre.h" +#include "mpi.h" + +/* + * Public string showing the fs lustre component version number + */ +const char *mca_fs_lustre_component_version_string = + "OMPI/MPI lustre FS MCA component version " OMPI_VERSION; + +static int lustre_register(void); + +int mca_fs_lustre_priority = 20; +int mca_fs_lustre_stripe_size = 0; +int mca_fs_lustre_stripe_width = 0; +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fs_base_component_2_0_0_t mca_fs_lustre_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "lustre", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + lustre_register, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fs_lustre_component_init_query, /* get thread level */ + mca_fs_lustre_component_file_query, /* get priority and actions */ + mca_fs_lustre_component_file_unquery /* undo what was done by previous function */ +}; + +static int +lustre_register(void) +{ + int param; + + param = mca_base_param_find ("fs", NULL, "lustre_stripe_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fs_lustre_stripe_size); + } + param = mca_base_param_find ("fs", NULL, "lustre_stripe_width"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fs_lustre_stripe_width); + } + + mca_base_param_reg_int (&mca_fs_lustre_component.fsm_version, + "priority", + "Priority of the lustre fs component", + false, false, mca_fs_lustre_priority, + &mca_fs_lustre_priority); + mca_base_param_reg_int (&mca_fs_lustre_component.fsm_version, + "stripe_size", + "stripe size of a file over lustre", + false, false, mca_fs_lustre_stripe_size, + &mca_fs_lustre_stripe_size); + mca_base_param_reg_int (&mca_fs_lustre_component.fsm_version, + "stripe_width", + "stripe width of a file over lustre", + false, false, mca_fs_lustre_stripe_width, + &mca_fs_lustre_stripe_width); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_close.c b/ompi/mca/fs/lustre/fs_lustre_file_close.c new file mode 100644 index 0000000000..1816379ba9 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_close.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" + +#include +#include +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_close_lustre + * + * Function: - closes a new file + * Accepts: - file handle + * Returns: - Success if file closed + */ +int +mca_fs_lustre_file_close (mca_io_ompio_file_t *fh) +{ + fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier_module); + close (fh->fd); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_delete.c b/ompi/mca/fs/lustre/fs_lustre_file_delete.c new file mode 100644 index 0000000000..0bc59e6963 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_delete.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_delete_lustre + * + * Function: - deletes a file + * Accepts: - file name & info + * Returns: - Success if file closed + */ +int +mca_fs_lustre_file_delete (char* file_name, + struct ompi_info_t *info) +{ + int ret; + + ret = unlink(file_name); + if (0 > ret) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_get_size.c b/ompi/mca/fs/lustre/fs_lustre_file_get_size.c new file mode 100644 index 0000000000..544a87706e --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_get_size.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_get_size_lustre + * + * Function: - get_size of a file + * Accepts: - same arguments as MPI_File_get_size() + * Returns: - Success if size is get + */ +int +mca_fs_lustre_file_get_size (mca_io_ompio_file_t *file_handle, + OMPI_MPI_OFFSET_TYPE *size) +{ + printf ("LUSTRE GET SIZE\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_open.c b/ompi/mca/fs/lustre/fs_lustre_file_open.c new file mode 100644 index 0000000000..a69a25088f --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_open.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" + +#include +#include +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" + +#include +#include +#include + +/* + * file_open_lustre + * + * Function: - opens a new file + * Accepts: - same arguments as MPI_File_open() + * Returns: - Success if new file handle + */ +int +mca_fs_lustre_file_open (struct ompi_communicator_t *comm, + char* filename, + int access_mode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh) +{ + int amode; + /* int *fp = NULL; + struct lov_user_md *lump; + */ + int old_mask, perm; + int rc; + struct lov_user_md *lump; + + if (fh->f_perm == OMPIO_PERM_NULL) { + old_mask = umask(022); + umask(old_mask); + perm = old_mask ^ 0666; + } + else { + perm = fh->f_perm; + } + + amode = 0; + if (fh->f_amode & MPI_MODE_CREATE) + amode = amode | O_CREAT; + if (fh->f_amode & MPI_MODE_RDONLY) + amode = amode | O_RDONLY; + if (fh->f_amode & MPI_MODE_WRONLY) + amode = amode | O_WRONLY; + if (fh->f_amode & MPI_MODE_RDWR) + amode = amode | O_RDWR; + if (fh->f_amode & MPI_MODE_EXCL) + amode = amode | O_EXCL; + + if ((mca_fs_lustre_stripe_size || mca_fs_lustre_stripe_width) && + (amode&O_CREAT) && (amode&O_RDWR)) { + if (0 == fh->f_rank) { + llapi_file_create(filename, + mca_fs_lustre_stripe_size, + -1, /* MSC need to change that */ + mca_fs_lustre_stripe_width, + 0); /* MSC need to change that */ + + fh->fd = open(filename, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, perm); + if (fh->fd < 0) { + fprintf(stderr, "Can't open %s file: %d (%s)\n", + filename, errno, strerror(errno)); + return OMPI_ERROR; + } + close (fh->fd); + } + fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier_module); + } + + fh->fd = open (filename, amode, perm); + if (fh->fd < 0) { + return OMPI_ERROR; + } + + if (mca_fs_lustre_stripe_size > 0) { + fh->f_stripe_size = mca_fs_lustre_stripe_size; + } + else { + rc = llapi_file_get_stripe(filename, lump); + if (rc != 0) { + fprintf(stderr, "get_stripe failed: %d (%s)\n",errno, strerror(errno)); + return -1; + } + fh->f_stripe_size = lump->lmm_stripe_size; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_set_info.c b/ompi/mca/fs/lustre/fs_lustre_file_set_info.c new file mode 100644 index 0000000000..5830e77f03 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_set_info.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_set_info_lustre + * + * Function: - set_info of a file + * Accepts: - same arguments as MPI_File_set_info() + * Returns: - Success if info is set + */ +int +mca_fs_lustre_file_set_info (mca_io_ompio_file_t *file_handle, + struct ompi_info_t *info) +{ + printf ("LUSTRE SET INFO\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_set_size.c b/ompi/mca/fs/lustre/fs_lustre_file_set_size.c new file mode 100644 index 0000000000..942fffd922 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_set_size.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_set_size_lustre + * + * Function: - set_size of a file + * Accepts: - same arguments as MPI_File_set_size() + * Returns: - Success if size is set + */ +int +mca_fs_lustre_file_set_size (mca_io_ompio_file_t *file_handle, + OMPI_MPI_OFFSET_TYPE size) +{ + printf ("LUSTRE SET SIZE\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/lustre/fs_lustre_file_sync.c b/ompi/mca/fs/lustre/fs_lustre_file_sync.c new file mode 100644 index 0000000000..577d1a4306 --- /dev/null +++ b/ompi/mca/fs/lustre/fs_lustre_file_sync.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2009 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_lustre.h" +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_sync_lustre + * + * Function: - closes a new file + * Accepts: - file handle + * Returns: - Success if file closed + */ +int +mca_fs_lustre_file_sync (mca_io_ompio_file_t *fh) +{ + int err; + + err = fsync(fh->fd); + + if (-1 == err) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/Makefile.am b/ompi/mca/fs/pvfs2/Makefile.am new file mode 100644 index 0000000000..40e283c306 --- /dev/null +++ b/ompi/mca/fs/pvfs2/Makefile.am @@ -0,0 +1,57 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fs_pvfs2_DSO +component_noinst = +component_install = mca_fs_pvfs2.la +else +component_noinst = libmca_fs_pvfs2.la +component_install = +endif + +# Source files + +fs_pvfs2_sources = \ + fs_pvfs2.h \ + fs_pvfs2.c \ + fs_pvfs2_component.c \ + fs_pvfs2_file_open.c \ + fs_pvfs2_file_close.c \ + fs_pvfs2_file_delete.c \ + fs_pvfs2_file_sync.c \ + fs_pvfs2_file_set_size.c \ + fs_pvfs2_file_get_size.c \ + fs_pvfs2_file_set_info.c + +AM_CPPFLAGS = $(fs_pvfs2_CPPFLAGS) + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fs_pvfs2_la_SOURCES = $(fs_pvfs2_sources) +mca_fs_pvfs2_la_LIBADD = +mca_fs_pvfs2_la_LDFLAGS = -module -avoid-version $(fs_pvfs2_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fs_pvfs2_la_SOURCES = $(fs_pvfs2_sources) +libmca_fs_pvfs2_la_LIBADD = +libmca_fs_pvfs2_la_LDFLAGS = -module -avoid-version $(fs_pvfs2_LDFLAGS) diff --git a/ompi/mca/fs/pvfs2/configure.m4 b/ompi/mca/fs/pvfs2/configure.m4 new file mode 100644 index 0000000000..4786c82926 --- /dev/null +++ b/ompi/mca/fs/pvfs2/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_fs_pvfs2_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_ompi_fs_pvfs2_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/fs/pvfs2/Makefile]) + + OMPI_CHECK_PVFS2([fs_pvfs2], + [fs_pvfs2_happy="yes"], + [fs_pvfs2_happy="no"]) + + AS_IF([test "$fs_pvfs2_happy" = "yes"], + [fs_pvfs2_WRAPPER_EXTRA_LDFLAGS="$fs_pvfs2_LDFLAGS" + fs_pvfs2_WRAPPER_EXTRA_LIBS="$fs_pvfs2_LIBS" + $1], + [$2]) + + AC_CHECK_HEADERS([pvfs2.h], [], + [AC_CHECK_HEADERS([pvfs2.h], [], [$2], + [AC_INCLUDES_DEFAULT])], + [AC_INCLUDES_DEFAULT]) + + + # substitute in the things needed to build pvfs2 + AC_SUBST([fs_pvfs2_CFLAGS]) + AC_SUBST([fs_pvfs2_CPPFLAGS]) + AC_SUBST([fs_pvfs2_LDFLAGS]) + AC_SUBST([fs_pvfs2_LIBS]) +])dnl diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.c b/ompi/mca/fs/pvfs2/fs_pvfs2.c new file mode 100644 index 0000000000..793f0bc3d1 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/pvfs2/fs_pvfs2.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fs_base_module_1_0_0_t pvfs2 = { + mca_fs_pvfs2_module_init, /* initalise after being selected */ + mca_fs_pvfs2_module_finalize, /* close a module on a communicator */ + mca_fs_pvfs2_file_open, + mca_fs_pvfs2_file_close, + mca_fs_pvfs2_file_delete, + mca_fs_pvfs2_file_set_size, + mca_fs_pvfs2_file_get_size, + mca_fs_pvfs2_file_set_info, + mca_fs_pvfs2_file_sync +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fs_pvfs2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fs_base_module_1_0_0_t * +mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fs_pvfs2_priority; + + if (PVFS2 == fh->f_fstype) { + if (*priority < 50) { + *priority = 50; + } + } + + return &pvfs2; +} + +int mca_fs_pvfs2_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fs_pvfs2_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fs_pvfs2_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2.h b/ompi/mca/fs/pvfs2/fs_pvfs2.h new file mode 100644 index 0000000000..f7bc229ef1 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FS_PVFS2_H +#define MCA_FS_PVFS2_H +/* +#ifdef HAVE_PVFS2_H +#include "pvfs2.h" +#endif + +#ifdef PVFS2_VERSION_MAJOR +#include "pvfs2-compat.h" +#endif +*/ +#include "pvfs2.h" +#include "pvfs2-compat.h" + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +extern int mca_fs_pvfs2_priority; +extern int mca_fs_pvfs2_stripe_size; +extern int mca_fs_pvfs2_stripe_width; +extern int mca_fs_pvfs2_IS_INITIALIZED; + +BEGIN_C_DECLS + +struct mca_fs_pvfs2_s { + PVFS_credentials credentials; + PVFS_object_ref object_ref; +} mca_fs_pvfs2_s; +typedef struct mca_fs_pvfs2_s mca_fs_pvfs2; + +int mca_fs_pvfs2_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fs_base_module_1_0_0_t * +mca_fs_pvfs2_component_file_query (mca_io_ompio_file_t *fh, int *priority); +int mca_fs_pvfs2_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fs_pvfs2_module_init (mca_io_ompio_file_t *file); +int mca_fs_pvfs2_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, + char *filename, + int amode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh); + +int mca_fs_pvfs2_file_close (mca_io_ompio_file_t *fh); + +int mca_fs_pvfs2_file_delete (char *filename, + struct ompi_info_t *info); + +int mca_fs_pvfs2_file_set_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE size); + +int mca_fs_pvfs2_file_get_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size); + +int mca_fs_pvfs2_file_set_info (mca_io_ompio_file_t *fh, + struct ompi_info_t *info); + +int mca_fs_pvfs2_file_sync (mca_io_ompio_file_t *fh); + +int mca_fs_pvfs2_file_seek (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence); +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FS_PVFS2_H */ diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_component.c b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c new file mode 100644 index 0000000000..a9b80eef90 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_component.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" +#include "mpi.h" + +/* + * Public string showing the fs pvfs2 component version number + */ +const char *mca_fs_pvfs2_component_version_string = + "OMPI/MPI pvfs2 FS MCA component version " OMPI_VERSION; + +static int pvfs2_register(void); + +int mca_fs_pvfs2_priority = 0; +int mca_fs_pvfs2_stripe_size = -1; +int mca_fs_pvfs2_stripe_width = -1; +int mca_fs_pvfs2_IS_INITIALIZED = 0; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fs_base_component_2_0_0_t mca_fs_pvfs2_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "pvfs2", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + pvfs2_register, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fs_pvfs2_component_init_query, /* get thread level */ + mca_fs_pvfs2_component_file_query, /* get priority and actions */ + mca_fs_pvfs2_component_file_unquery /* undo what was done by previous function */ +}; + +static int +pvfs2_register(void) +{ + int param; + + param = mca_base_param_find ("fs", NULL, "pvfs2_stripe_size"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fs_pvfs2_stripe_size); + } + param = mca_base_param_find ("fs", NULL, "pvfs2_stripe_width"); + if (param >= 0) + { + mca_base_param_lookup_int (param, &mca_fs_pvfs2_stripe_width); + } + + mca_base_param_reg_int (&mca_fs_pvfs2_component.fsm_version, + "priority", + "Priority of the pvfs2 fs component", + false, false, mca_fs_pvfs2_priority, + &mca_fs_pvfs2_priority); + mca_base_param_reg_int (&mca_fs_pvfs2_component.fsm_version, + "stripe_size", + "stripe size of a file over pvfs2", + false, false, mca_fs_pvfs2_stripe_size, + &mca_fs_pvfs2_stripe_size); + mca_base_param_reg_int (&mca_fs_pvfs2_component.fsm_version, + "stripe_width", + "stripe width of a file over pvfs2", + false, false, mca_fs_pvfs2_stripe_width, + &mca_fs_pvfs2_stripe_width); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c new file mode 100644 index 0000000000..42ef1a6e95 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_close.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include +#include +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_close_pvfs2 + * + * Function: - closes a new file + * Accepts: - file handle + * Returns: - Success if file closed + */ +int +mca_fs_pvfs2_file_close (mca_io_ompio_file_t *fh) +{ + if (NULL != fh->f_fs_ptr) { + free (fh->f_fs_ptr); + fh->f_fs_ptr = NULL; + } + /* + fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier_module); + close (fh->fd); + */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c new file mode 100644 index 0000000000..2e0801c141 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_delete.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_delete_pvfs2 + * + * Function: - deletes a file + * Accepts: - file name & info + * Returns: - Success if file closed + */ +int +mca_fs_pvfs2_file_delete (char* file_name, + struct ompi_info_t *info) +{ + PVFS_credentials credentials; + PVFS_sysresp_getparent resp_getparent; + int ret; + PVFS_fs_id pvfs2_id; + char pvfs2_path[OMPIO_MAX_NAME] = {0}; + char * ncache_timeout; + + if (!mca_fs_pvfs2_IS_INITIALIZED) { + /* disable the pvfs2 ncache */ + ncache_timeout = getenv("PVFS2_NCACHE_TIMEOUT"); + if (ncache_timeout == NULL ) + setenv("PVFS2_NCACHE_TIMEOUT", "0", 1); + + ret = PVFS_util_init_defaults(); + if (ret < 0) { + return OMPI_ERROR; + } + mca_fs_pvfs2_IS_INITIALIZED = 1; + } + + memset (&credentials, 0, sizeof(PVFS_credentials)); + PVFS_util_gen_credentials (&credentials); + + ret = PVFS_util_resolve(file_name, &pvfs2_id, pvfs2_path, OMPIO_MAX_NAME); + if (ret != 0) { + return OMPI_ERROR; + } + + ret = PVFS_sys_getparent(pvfs2_id, pvfs2_path, &credentials, &resp_getparent); + + ret = PVFS_sys_remove(resp_getparent.basename, + resp_getparent.parent_ref, &credentials); + if (ret != 0) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c new file mode 100644 index 0000000000..eb2b884f28 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_get_size.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_get_size_pvfs2 + * + * Function: - get_size of a file + * Accepts: - same arguments as MPI_File_get_size() + * Returns: - Success if size is get + */ +int +mca_fs_pvfs2_file_get_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size) +{ + int ret; + mca_fs_pvfs2 *pvfs2_fs; + PVFS_sysresp_getattr resp_getattr; + + pvfs2_fs = (mca_fs_pvfs2 *)fh->f_fs_ptr; + + ret = PVFS_sys_getattr (pvfs2_fs->object_ref, PVFS_ATTR_SYS_SIZE, + &(pvfs2_fs->credentials), &resp_getattr); + if (ret != 0 ) { + return OMPI_ERROR; + } + + *size = resp_getattr.attr.size; + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c new file mode 100644 index 0000000000..2fee1c9625 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_open.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype.h" +#include "ompi/datatype/ompi_datatype.h" + +struct open_status_s { + int error; + PVFS_object_ref object_ref; +}; +typedef struct open_status_s open_status; + +static void fake_an_open(PVFS_fs_id id, + char *pvfs2_name, + int access_mode, + int stripe_width, + PVFS_size stripe_size, + mca_fs_pvfs2 *pvfs2_fs, + open_status *o_status); +/* + * file_open_pvfs2: This is the same strategy as ROMIO's pvfs2 open + * + * Function: - opens a new file + * Accepts: - same arguments as MPI_File_open() + * Returns: - Success if new file handle + */ +int +mca_fs_pvfs2_file_open (struct ompi_communicator_t *comm, + char* filename, + int access_mode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh) +{ + /* int amode; + int old_mask, perm; + */ + int ret; + mca_fs_pvfs2 *pvfs2_fs; + PVFS_fs_id pvfs2_id; + char pvfs2_path[OMPIO_MAX_NAME] = {0}; + char * ncache_timeout; + open_status o_status = {0, {0, 0}}; + struct ompi_datatype_t *open_status_type; + struct ompi_datatype_t *types[2] = {&ompi_mpi_int.dt, &ompi_mpi_byte.dt}; + int lens[2] = {1, sizeof(PVFS_object_ref)}; + OPAL_PTRDIFF_TYPE offsets[2]; + + /* We are going to do what ROMIO does with one process resolving + * the name and broadcasting to others */ + + pvfs2_fs = (mca_fs_pvfs2 *) malloc(sizeof(mca_fs_pvfs2)); + if (NULL == pvfs2_fs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (!mca_fs_pvfs2_IS_INITIALIZED) { + /* disable the pvfs2 ncache */ + ncache_timeout = getenv("PVFS2_NCACHE_TIMEOUT"); + if (ncache_timeout == NULL ) { + setenv("PVFS2_NCACHE_TIMEOUT", "0", 1); + } + ret = PVFS_util_init_defaults(); + if (ret < 0) { + PVFS_perror("PVFS_util_init_defaults", ret); + return OMPI_ERROR; + } + mca_fs_pvfs2_IS_INITIALIZED = 1; + } + + memset(&(pvfs2_fs->credentials), 0, sizeof(PVFS_credentials)); + PVFS_util_gen_credentials(&(pvfs2_fs->credentials)); + + if (OMPIO_ROOT == fh->f_rank) { + ret = PVFS_util_resolve(filename, &pvfs2_id, pvfs2_path, OMPIO_MAX_NAME); + if (ret < 0 ) { + PVFS_perror("PVFS_util_resolve", ret); + o_status.error = -1; + } + else { + fake_an_open (pvfs2_id, + pvfs2_path, + access_mode, + mca_fs_pvfs2_stripe_width, + (PVFS_size)mca_fs_pvfs2_stripe_size, + pvfs2_fs, + &o_status); + } + pvfs2_fs->object_ref = o_status.object_ref; + fh->f_fs_ptr = pvfs2_fs; + } + + /* broadcast status and (possibly valid) object reference */ + MPI_Address(&o_status.error, &offsets[0]); + MPI_Address(&o_status.object_ref, &offsets[1]); + + ompi_datatype_create_struct (2, lens, offsets, types, &open_status_type); + ompi_datatype_commit (&open_status_type); + + fh->f_comm->c_coll.coll_bcast (MPI_BOTTOM, + 1, + open_status_type, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + + ompi_datatype_destroy (&open_status_type); + + if (o_status.error != 0) + { + if (NULL != pvfs2_fs) { + free(pvfs2_fs); + pvfs2_fs = NULL; + } + return OMPI_ERROR; + } + + pvfs2_fs->object_ref = o_status.object_ref; + fh->f_fs_ptr = pvfs2_fs; + if (mca_fs_pvfs2_stripe_size > 0) { + fh->f_stripe_size = mca_fs_pvfs2_stripe_size; + } + else { + fh->f_stripe_size = 65536; + } + return OMPI_SUCCESS; + + /* + if (fh->f_perm == OMPIO_PERM_NULL) { + old_mask = umask(022); + umask(old_mask); + perm = old_mask ^ 0666; + } + else { + perm = fh->f_perm; + } + + amode = 0; + if (fh->f_amode & MPI_MODE_CREATE) + amode = amode | O_CREAT; + if (fh->f_amode & MPI_MODE_RDONLY) + amode = amode | O_RDONLY; + if (fh->f_amode & MPI_MODE_WRONLY) + amode = amode | O_WRONLY; + if (fh->f_amode & MPI_MODE_RDWR) + amode = amode | O_RDWR; + if (fh->f_amode & MPI_MODE_EXCL) + amode = amode | O_EXCL; + + fh->fd = open (filename, amode, perm); + if (fh->fd < 0) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; + */ +} + +static void fake_an_open(PVFS_fs_id id, + char *pvfs2_name, + int access_mode, + int stripe_width, + PVFS_size stripe_size, + mca_fs_pvfs2 *pvfs2_fs, + open_status *o_status) +{ + int ret; + PVFS_sysresp_lookup resp_lookup; + PVFS_sysresp_getparent resp_getparent; + PVFS_sysresp_create resp_create; + PVFS_sys_attr attribs; + PVFS_sys_dist *dist; + + memset(&attribs, 0, sizeof(PVFS_sys_attr)); + + attribs.owner = geteuid(); + attribs.group = getegid(); + attribs.perms = 0644; + attribs.mask = PVFS_ATTR_SYS_ALL_SETABLE; + attribs.atime = time(NULL); + attribs.mtime = attribs.atime; + attribs.ctime = attribs.atime; + + if (stripe_width > 0 ) { + attribs.dfile_count = stripe_width; + attribs.mask |= PVFS_ATTR_SYS_DFILE_COUNT; + } + + dist = NULL; + + memset(&resp_lookup, 0, sizeof(resp_lookup)); + memset(&resp_getparent, 0, sizeof(resp_getparent)); + memset(&resp_create, 0, sizeof(resp_create)); + + ret = PVFS_sys_lookup(id, + pvfs2_name, + &(pvfs2_fs->credentials), + &resp_lookup, + PVFS2_LOOKUP_LINK_FOLLOW); + + if (ret == (-PVFS_ENOENT)) { + if (access_mode & MPI_MODE_CREATE) { + ret = PVFS_sys_getparent(id, + pvfs2_name, + &(pvfs2_fs->credentials), + &resp_getparent); + if (ret < 0) { + opal_output (1, "pvfs_sys_getparent returns with %d\n", ret); + o_status->error = ret; + return; + } + + /* Set the distribution strip size if specified */ + if (0 < stripe_size) { + /* Note that the distribution is hardcoded here */ + dist = PVFS_sys_dist_lookup ("simple_stripe"); + ret = PVFS_sys_dist_setparam (dist, + "strip_size", + &stripe_size); + if (ret < 0) + { + opal_output (1, + "pvfs_sys_dist_setparam returns with %d\n", ret); + o_status->error = ret; + } + } + + /* Perform file creation */ + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, + attribs, + &(pvfs2_fs->credentials), + dist, + &resp_create); + /* +#ifdef HAVE_PVFS2_CREATE_WITHOUT_LAYOUT + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, + attribs, + &(pvfs2_fs->credentials), + dist, + &resp_create); +#else + ret = PVFS_sys_create(resp_getparent.basename, + resp_getparent.parent_ref, + attribs, + &(pvfs2_fs->credentials), + dist, + NULL, + &resp_create); + #endif + */ + + /* if many creates are happening in this directory, the earlier + * sys_lookup may have returned ENOENT, but the sys_create could + * return EEXISTS. That means the file has been created anyway, so + * less work for us and we can just open it up and return the + * handle */ + if (ret == (-PVFS_EEXIST)) { + ret = PVFS_sys_lookup(id, + pvfs2_name, + &(pvfs2_fs->credentials), + &resp_lookup, + PVFS2_LOOKUP_LINK_FOLLOW); + if ( ret < 0 ) { + o_status->error = ret; + return; + } + o_status->error = ret; + o_status->object_ref = resp_lookup.ref; + return; + } + o_status->object_ref = resp_create.ref; + } + else { + opal_output (1, "cannot create file without MPI_MODE_CREATE\n"); + o_status->error = ret; + return; + } + } + else if (access_mode & MPI_MODE_EXCL) { + /* lookup should not succeed if opened with EXCL */ + o_status->error = -PVFS_EEXIST; + return; + } + else { + o_status->object_ref = resp_lookup.ref; + } + o_status->error = ret; + return; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_info.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_info.c new file mode 100644 index 0000000000..d898634c09 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_info.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_set_info_pvfs2 + * + * Function: - set_info of a file + * Accepts: - same arguments as MPI_File_set_info() + * Returns: - Success if info is set + */ +int +mca_fs_pvfs2_file_set_info (mca_io_ompio_file_t *file_handle, + struct ompi_info_t *info) +{ + printf ("PVFS2 SET INFO\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c new file mode 100644 index 0000000000..f850ff5bf1 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_set_size.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_set_size_pvfs2 + * + * Function: - set_size of a file + * Accepts: - same arguments as MPI_File_set_size() + * Returns: - Success if size is set + */ +int +mca_fs_pvfs2_file_set_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE size) +{ + int ret; + mca_fs_pvfs2 *pvfs2_fs; + + pvfs2_fs = (mca_fs_pvfs2 *)fh->f_fs_ptr; + + if (OMPIO_ROOT == fh->f_rank) { + ret = PVFS_sys_truncate(pvfs2_fs->object_ref, + size, &(pvfs2_fs->credentials)); + fh->f_comm->c_coll.coll_bcast (&ret, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + } + else { + fh->f_comm->c_coll.coll_bcast (&ret, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + } + + if (ret != 0) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c b/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c new file mode 100644 index 0000000000..1b3fd1f733 --- /dev/null +++ b/ompi/mca/fs/pvfs2/fs_pvfs2_file_sync.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* This code is based on the PVFS2 ADIO module in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + +#include "ompi_config.h" +#include "fs_pvfs2.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_sync_pvfs2 + * + * Function: - closes a new file + * Accepts: - file handle + * Returns: - Success if file closed + */ +int +mca_fs_pvfs2_file_sync (mca_io_ompio_file_t *fh) +{ + int ret; + mca_fs_pvfs2 *pvfs2_fs; + + ret = OMPI_SUCCESS; + + pvfs2_fs = (mca_fs_pvfs2 *)fh->f_fs_ptr; + + if (OMPIO_ROOT == fh->f_rank) { + ret = PVFS_sys_flush(pvfs2_fs->object_ref, &(pvfs2_fs->credentials)); + } + + fh->f_comm->c_coll.coll_bcast (&ret, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + + if (0 != ret) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/Makefile.am b/ompi/mca/fs/ufs/Makefile.am new file mode 100644 index 0000000000..fc56912cab --- /dev/null +++ b/ompi/mca/fs/ufs/Makefile.am @@ -0,0 +1,53 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_fs_ufs_DSO +component_noinst = +component_install = mca_fs_ufs.la +else +component_noinst = libmca_fs_ufs.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_fs_ufs_la_SOURCES = $(sources) +mca_fs_ufs_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_fs_ufs_la_SOURCES = $(sources) +libmca_fs_ufs_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + fs_ufs.h \ + fs_ufs.c \ + fs_ufs_component.c \ + fs_ufs_file_open.c \ + fs_ufs_file_close.c \ + fs_ufs_file_delete.c \ + fs_ufs_file_sync.c \ + fs_ufs_file_set_size.c \ + fs_ufs_file_get_size.c \ + fs_ufs_file_set_info.c diff --git a/ompi/mca/fs/ufs/fs_ufs.c b/ompi/mca/fs/ufs/fs_ufs.c new file mode 100644 index 0000000000..3382a30ab8 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/ufs/fs_ufs.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_fs_base_module_1_0_0_t ufs = { + mca_fs_ufs_module_init, /* initalise after being selected */ + mca_fs_ufs_module_finalize, /* close a module on a communicator */ + mca_fs_ufs_file_open, + mca_fs_ufs_file_close, + mca_fs_ufs_file_delete, + mca_fs_ufs_file_set_size, + mca_fs_ufs_file_get_size, + mca_fs_ufs_file_set_info, + mca_fs_ufs_file_sync +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_fs_ufs_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_fs_base_module_1_0_0_t * +mca_fs_ufs_component_file_query (mca_io_ompio_file_t *fh, int *priority) +{ + *priority = mca_fs_ufs_priority; + + if (UFS == fh->f_fstype) { + if (*priority < 50) { + *priority = 50; + } + } + + return &ufs; +} + +int mca_fs_ufs_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_fs_ufs_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_fs_ufs_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs.h b/ompi/mca/fs/ufs/fs_ufs.h new file mode 100644 index 0000000000..ec006fe389 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_FS_UFS_H +#define MCA_FS_UFS_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +extern int mca_fs_ufs_priority; + +BEGIN_C_DECLS + +int mca_fs_ufs_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_fs_base_module_1_0_0_t * +mca_fs_ufs_component_file_query (mca_io_ompio_file_t *fh, int *priority); +int mca_fs_ufs_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_fs_ufs_module_init (mca_io_ompio_file_t *file); +int mca_fs_ufs_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_fs_base_component_2_0_0_t mca_fs_ufs_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_fs_ufs_file_open (struct ompi_communicator_t *comm, + char *filename, + int amode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh); + +int mca_fs_ufs_file_close (mca_io_ompio_file_t *fh); + +int mca_fs_ufs_file_delete (char *filename, + struct ompi_info_t *info); + +int mca_fs_ufs_file_set_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE size); + +int mca_fs_ufs_file_get_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size); + +int mca_fs_ufs_file_set_info (mca_io_ompio_file_t *fh, + struct ompi_info_t *info); + +int mca_fs_ufs_file_sync (mca_io_ompio_file_t *fh); + +int mca_fs_ufs_file_seek (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence); +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_FS_UFS_H */ diff --git a/ompi/mca/fs/ufs/fs_ufs_component.c b/ompi/mca/fs/ufs/fs_ufs_component.c new file mode 100644 index 0000000000..93805f55df --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_component.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "fs_ufs.h" +#include "mpi.h" + +int mca_fs_ufs_priority = 10; + +/* + * Public string showing the fs ufs component version number + */ +const char *mca_fs_ufs_component_version_string = + "OMPI/MPI ufs FS MCA component version " OMPI_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_fs_base_component_2_0_0_t mca_fs_ufs_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_FS_BASE_VERSION_2_0_0, + + /* Component name and version */ + "ufs", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + NULL, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_fs_ufs_component_init_query, /* get thread level */ + mca_fs_ufs_component_file_query, /* get priority and actions */ + mca_fs_ufs_component_file_unquery /* undo what was done by previous function */ +}; diff --git a/ompi/mca/fs/ufs/fs_ufs_file_close.c b/ompi/mca/fs/ufs/fs_ufs_file_close.c new file mode 100644 index 0000000000..704abe4fe0 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_close.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" + +#include +#include +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_close_ufs + * + * Function: - closes a new file + * Accepts: - file handle + * Returns: - Success if file closed + */ +int +mca_fs_ufs_file_close (mca_io_ompio_file_t *fh) +{ + fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier_module); + /* close (*(int *)fh->fd);*/ + close (fh->fd); + /* if (NULL != fh->fd) + { + free (fh->fd); + fh->fd = NULL; + }*/ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_delete.c b/ompi/mca/fs/ufs/fs_ufs_file_delete.c new file mode 100644 index 0000000000..af0a1c4f93 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_delete.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_delete_ufs + * + * Function: - deletes a file + * Accepts: - file name & info + * Returns: - Success if file closed + */ +int +mca_fs_ufs_file_delete (char* file_name, + struct ompi_info_t *info) +{ + int ret; + + ret = unlink(file_name); + if (0 > ret) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_get_size.c b/ompi/mca/fs/ufs/fs_ufs_file_get_size.c new file mode 100644 index 0000000000..3b3be53d50 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_get_size.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +#include + +/* + * file_get_size_ufs + * + * Function: - get_size of a file + * Accepts: - same arguments as MPI_File_get_size() + * Returns: - Success if size is get + */ +int +mca_fs_ufs_file_get_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size) +{ + *size = lseek(fh->fd, 0, SEEK_END); + if (-1 == *size) { + perror ("lseek"); + return OMPI_ERROR; + } + + if (-1 == (lseek(fh->fd, fh->f_offset, SEEK_SET))) { + perror ("lseek"); + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_open.c b/ompi/mca/fs/ufs/fs_ufs_file_open.c new file mode 100644 index 0000000000..672ef7ff1b --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_open.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2010 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" + +#include +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" + +/* + * file_open_ufs + * + * Function: - opens a new file + * Accepts: - same arguments as MPI_File_open() + * Returns: - Success if new file handle + */ +int +mca_fs_ufs_file_open (struct ompi_communicator_t *comm, + char* filename, + int access_mode, + struct ompi_info_t *info, + mca_io_ompio_file_t *fh) +{ + int amode; + /* int *fp = NULL;*/ + int old_mask, perm; + + if (fh->f_perm == OMPIO_PERM_NULL) + { + old_mask = umask(022); + umask(old_mask); + perm = old_mask ^ 0666; + } + else + { + perm = fh->f_perm; + } + + amode = 0; + if (fh->f_amode & MPI_MODE_CREATE) + amode = amode | O_CREAT; + if (fh->f_amode & MPI_MODE_RDONLY) + amode = amode | O_RDONLY; + if (fh->f_amode & MPI_MODE_WRONLY) + amode = amode | O_WRONLY; + if (fh->f_amode & MPI_MODE_RDWR) + amode = amode | O_RDWR; + if (fh->f_amode & MPI_MODE_EXCL) + amode = amode | O_EXCL; + + /* + fp = (int*)malloc(sizeof(int)); + if (NULL == fp) + { + return OMPI_ERROR; + } + + *fp = open (filename, amode, perm); + if (-1 == *fp) + { + return OMPI_ERROR; + } + fh->fd = (void*)fp; + */ + + fh->fd = open (filename, amode, perm); + if (-1 == fh->fd) + { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_set_info.c b/ompi/mca/fs/ufs/fs_ufs_file_set_info.c new file mode 100644 index 0000000000..688a7e0f38 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_set_info.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +/* + * file_set_info_ufs + * + * Function: - set_info of a file + * Accepts: - same arguments as MPI_File_set_info() + * Returns: - Success if info is set + */ +int +mca_fs_ufs_file_set_info (mca_io_ompio_file_t *file_handle, + struct ompi_info_t *info) +{ + printf ("UFS SET INFO\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_set_size.c b/ompi/mca/fs/ufs/fs_ufs_file_set_size.c new file mode 100644 index 0000000000..9cb4cd44d8 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_set_size.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +#include + +/* + * file_set_size_ufs + * + * Function: - set_size of a file + * Accepts: - same arguments as MPI_File_set_size() + * Returns: - Success if size is set + */ +int +mca_fs_ufs_file_set_size (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE size) +{ + int err = 0; + + err = ftruncate(fh->fd, size); + + fh->f_comm->c_coll.coll_bcast (&err, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + if (-1 == err) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/fs/ufs/fs_ufs_file_sync.c b/ompi/mca/fs/ufs/fs_ufs_file_sync.c new file mode 100644 index 0000000000..30c227ed21 --- /dev/null +++ b/ompi/mca/fs/ufs/fs_ufs_file_sync.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "fs_ufs.h" +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/fs/fs.h" + +int +mca_fs_ufs_file_sync (mca_io_ompio_file_t *fh) +{ + int err; + + err = fsync(fh->fd); + + if (-1 == err) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} diff --git a/ompi/mca/io/base/io_base_delete.c b/ompi/mca/io/base/io_base_delete.c index a3c895d73b..1685e409a5 100644 --- a/ompi/mca/io/base/io_base_delete.c +++ b/ompi/mca/io/base/io_base_delete.c @@ -133,14 +133,12 @@ int mca_io_base_delete(char *filename, struct ompi_info_t *info) /* show_help */ return OMPI_ERROR; } - /* Do some kind of collective operation to find a module that everyone has available */ - #if 1 /* For the moment, just take the top module off the list */ - - item = opal_list_remove_first(selectable); + /* MSC actually take the buttom */ + item = opal_list_remove_last(selectable); avail = (avail_io_t *) item; selected = *avail; OBJ_RELEASE(avail); @@ -162,11 +160,11 @@ int mca_io_base_delete(char *filename, struct ompi_info_t *info) OBJ_RELEASE(selectable); /* Finally -- delete the file with the selected component */ - + if (OMPI_SUCCESS != (err = delete_file(&selected, filename, info))) { return err; } - + /* Announce the winner */ opal_output_verbose(10, mca_io_base_output, @@ -231,7 +229,24 @@ static opal_list_t *check_components(opal_list_t *components, /* Put this item on the list in priority order (highest priority first). Should it go first? */ + /* MSC actually put it Lowest priority first */ + for(item2 = opal_list_get_first(selectable); + item2 != opal_list_get_end(selectable); + item2 = opal_list_get_next(item2)) { + avail2 = (avail_io_t*)item2; + if(avail->ai_priority < avail2->ai_priority) { + opal_list_insert_pos(selectable, + item2, (opal_list_item_t*)avail); + break; + } + } + + if(opal_list_get_end(selectable) == item2) { + opal_list_append(selectable, (opal_list_item_t*)avail); + } + + /* item2 = opal_list_get_first(selectable); avail2 = (avail_io_t *) item2; if (opal_list_get_end(selectable) == item2 || @@ -247,16 +262,17 @@ static opal_list_t *check_components(opal_list_t *components, break; } } - + */ /* If we didn't find a place to put it in the list, then append it (because it has the lowest priority found so far) */ - + /* if (opal_list_get_end(selectable) == item2) { opal_list_append(selectable, (opal_list_item_t *) avail); } } + */ } } } diff --git a/ompi/mca/io/base/io_base_file_select.c b/ompi/mca/io/base/io_base_file_select.c index 13a3f3fe11..ee94792a58 100644 --- a/ompi/mca/io/base/io_base_file_select.c +++ b/ompi/mca/io/base/io_base_file_select.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +35,14 @@ #include "ompi/mca/io/io.h" #include "ompi/mca/io/base/base.h" #include "ompi/mca/io/base/io_base_request.h" - +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" /* * Local types @@ -180,8 +188,8 @@ int mca_io_base_file_select(ompi_file_t *file, #if 1 /* For the moment, just take the top module off the list */ - - item = opal_list_remove_first(selectable); + /* MSC actually take the buttom */ + item = opal_list_remove_last(selectable); avail = (avail_io_t *) item; selected = *avail; OBJ_RELEASE(avail); @@ -209,6 +217,44 @@ int mca_io_base_file_select(ompi_file_t *file, file->f_io_selected_module = selected.ai_module; file->f_io_selected_data = selected.ai_module_data; + if (!strcmp (selected.ai_component.v2_0_0.io_version.mca_component_name, + "ompio")) { + int ret; + + if (OMPI_SUCCESS != (ret = mca_fs_base_open())) { + return err; + } + if (OMPI_SUCCESS != (ret = mca_fcoll_base_open())) { + return err; + } + if (OMPI_SUCCESS != (ret = mca_fbtl_base_open())) { + return err; + } + if (OMPI_SUCCESS != (ret = mca_sharedfp_base_open())) { + return err; + } + + if (OMPI_SUCCESS != + (ret = mca_fs_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { + return err; + } + if (OMPI_SUCCESS != + (ret = mca_fcoll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { + return err; + } + if (OMPI_SUCCESS != + (ret = mca_fbtl_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { + return err; + } + if (OMPI_SUCCESS != + (ret = mca_sharedfp_base_find_available(OMPI_ENABLE_PROGRESS_THREADS, + OMPI_ENABLE_THREAD_MULTIPLE))) { + return err; + } + } /* Finally -- intialize the selected module. */ if (OMPI_SUCCESS != (err = module_init(file))) { @@ -279,7 +325,24 @@ static opal_list_t *check_components(opal_list_t *components, /* Put this item on the list in priority order (highest priority first). Should it go first? */ + /* MSC actually put it Lowest priority first */ + for(item2 = opal_list_get_first(selectable); + item2 != opal_list_get_end(selectable); + item2 = opal_list_get_next(item2)) { + avail2 = (avail_io_t*)item2; + if(avail->ai_priority < avail2->ai_priority) { + opal_list_insert_pos(selectable, + item2, (opal_list_item_t*)avail); + break; + } + } + + if(opal_list_get_end(selectable) == item2) { + opal_list_append(selectable, (opal_list_item_t*)avail); + } + + /* item2 = opal_list_get_first(selectable); avail2 = (avail_io_t *) item2; if (opal_list_get_end(selectable) == item2 || @@ -295,16 +358,16 @@ static opal_list_t *check_components(opal_list_t *components, break; } } - + */ /* If we didn't find a place to put it in the list, then append it (because it has the lowest priority found so far) */ - + /* if (opal_list_get_end(selectable) == item2) { opal_list_append(selectable, (opal_list_item_t *) avail); } - } + }*/ } } } diff --git a/ompi/mca/io/ompio/Makefile.am b/ompi/mca/io/ompio/Makefile.am new file mode 100644 index 0000000000..0f52f8e992 --- /dev/null +++ b/ompi/mca/io/ompio/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_io_ompio_DSO +component_noinst = +component_install = mca_io_ompio.la +else +component_noinst = libmca_io_ompio.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_io_ompio_la_SOURCES = $(sources) +mca_io_ompio_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_io_ompio_la_SOURCES = $(sources) +libmca_io_ompio_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + io_ompio.h \ + io_ompio.c \ + io_ompio_component.c \ + io_ompio_module.c \ + io_ompio_coll_offset.c \ + io_ompio_coll_array.c \ + io_ompio_file_set_view.c \ + io_ompio_file_open.c \ + io_ompio_file_write.c \ + io_ompio_file_read.c \ + io_ompio_nbc.c diff --git a/ompi/mca/io/ompio/io_ompio.c b/ompi/mca/io/ompio/io_ompio.c new file mode 100644 index 0000000000..88689c6cc9 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio.c @@ -0,0 +1,2650 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/pml/pml.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/info/info.h" +#include "ompi/request/request.h" + +#include +#include +#include /* or */ + +#ifdef HAVE_PVFS2_H +#include "pvfs2.h" +#endif + +#include "io_ompio.h" + +static void get_parent_dir (char *filename, char **dirnamep); + +int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh) +{ + + if (NULL != fh) { + ompi_datatype_t *types[2], *default_file_view; + int blocklen[2] = {1, 1}; + OPAL_PTRDIFF_TYPE d[2], base; + int i; + + fh->f_info = MPI_INFO_NULL; + fh->f_comm = MPI_COMM_NULL; + fh->f_rank = -1; + fh->f_size = 0; + fh->f_io_array = NULL; + fh->f_perm = OMPIO_PERM_NULL; + fh->f_flags = 0; + fh->f_bytes_per_agg = mca_io_ompio_bytes_per_agg; + fh->f_datarep = strdup ("native"); + + fh->f_offset = 0; + fh->f_disp = 0; + fh->f_position_in_file_view = 0; + fh->f_index_in_file_view = 0; + fh->f_total_bytes = 0; + + fh->f_procs_in_group = NULL; + fh->f_procs_per_group = -1; + + + ompi_datatype_create_contiguous(1048576, &ompi_mpi_byte.dt, &default_file_view); + + fh->f_etype = default_file_view; + fh->f_filetype = default_file_view; + + /* Default file View */ + fh->f_iov_type = MPI_DATATYPE_NULL; + fh->f_iov_count = 1; + fh->f_decoded_iov = (struct iovec*)malloc(fh->f_iov_count * + sizeof(struct iovec)); + if (NULL == fh->f_decoded_iov) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + fh->f_cc_size = 1; + fh->f_stripe_size = mca_io_ompio_bytes_per_agg; + fh->f_decoded_iov[0].iov_len = 1; + fh->f_decoded_iov[0].iov_base = 0; + + /* + * Create a derived datatype for the created iovec + */ + types[0] = &ompi_mpi_long.dt; + types[1] = &ompi_mpi_long.dt; + MPI_Address( fh->f_decoded_iov, d); + MPI_Address( &fh->f_decoded_iov[0].iov_len, d+1); + base = d[0]; + for (i=0 ; i<2 ; i++) { + d[i] -= base; + } + ompi_datatype_create_struct (2, + blocklen, + d, + types, + &fh->f_iov_type); + ompi_datatype_commit (&fh->f_iov_type); + + fh->f_view_extent = 1; + fh->f_view_size = 1; + fh->f_etype_size = 1; + + return OMPI_SUCCESS; + } + else { + return OMPI_ERROR; + } +} + +int ompi_io_ompio_generate_current_file_view (mca_io_ompio_file_t *fh, + size_t max_data, + struct iovec **f_iov, + int *iov_count) +{ + + + + + struct iovec *iov = NULL; + size_t bytes_to_write; + size_t sum_previous_counts = 0; + int j, k; + int block = 1; + + + /* allocate an initial iovec, will grow if needed */ + iov = (struct iovec *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + sum_previous_counts = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + bytes_to_write = max_data; + k = 0; + + while (bytes_to_write) { + OPAL_PTRDIFF_TYPE disp; + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + iov = (struct iovec *)realloc + (iov, OMPIO_IOVEC_INITIAL_SIZE *block *sizeof(struct iovec)); + if (NULL == iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_counts) <= 0) { + sum_previous_counts += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_counts = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_counts; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + + disp = (OPAL_PTRDIFF_TYPE)(fh->f_decoded_iov[j].iov_base) + + (fh->f_total_bytes - sum_previous_counts); + iov[k].iov_base = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if ((fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_counts)) + >= bytes_to_write) { + iov[k].iov_len = bytes_to_write; + } + else { + iov[k].iov_len = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_counts); + } + + fh->f_total_bytes += iov[k].iov_len; + bytes_to_write -= iov[k].iov_len; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_counts; + fh->f_index_in_file_view = j; + + *iov_count = k; + *f_iov = iov; + + return OMPI_SUCCESS; +} + +int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset) +{ + + + size_t i = 0; + size_t k = 0; + + fh->f_offset += fh->f_view_extent * + (((offset-fh->f_offset)*fh->f_etype_size)/fh->f_view_size); + + fh->f_position_in_file_view = 0; + + fh->f_total_bytes = (offset*fh->f_etype_size) % fh->f_view_size; + + + fh->f_index_in_file_view = 0; + i = fh->f_total_bytes; + k = 0; + while (1) { + k += fh->f_decoded_iov[fh->f_index_in_file_view].iov_len; + if (i >= k) { + i = i - fh->f_decoded_iov[fh->f_index_in_file_view].iov_len; + fh->f_position_in_file_view += + fh->f_decoded_iov[fh->f_index_in_file_view].iov_len; + fh->f_index_in_file_view = fh->f_index_in_file_view+1; + } + else { + break; + } + } + + return OMPI_SUCCESS; +} + +int ompi_io_ompio_decode_datatype (mca_io_ompio_file_t *fh, + ompi_datatype_t *datatype, + int count, + void *buf, + size_t *max_data, + struct iovec **iov, + uint32_t *iovec_count) +{ + + + + opal_convertor_t convertor; + size_t remaining_length = 0; + uint32_t i; + uint32_t temp_count; + struct iovec * temp_iov; + size_t temp_data; + + opal_convertor_clone (fh->f_convertor, &convertor, 0); + + if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor, + &(datatype->super), + count, + buf)) { + opal_output (1, "Cannot attach the datatype to a convertor\n"); + return OMPI_ERROR; + } + remaining_length = count * datatype->super.size; + + temp_count = OMPIO_IOVEC_INITIAL_SIZE; + temp_iov = (struct iovec*)malloc(temp_count * sizeof(struct iovec)); + if (NULL == temp_iov) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (0 == opal_convertor_raw(&convertor, + temp_iov, + &temp_count, + &temp_data)) { +#if 0 + printf ("New raw extraction (iovec_count = %d, max_data = %d)\n", + temp_count, temp_data); + for (i = 0; i < temp_count; i++) { + printf ("\t{%p, %d}\n", temp_iov[i].iov_base, temp_iov[i].iov_len); + } +#endif + + *iovec_count = *iovec_count + temp_count; + *max_data = *max_data + temp_data; + *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); + if (NULL == *iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0 ; if_rank) { + printf ("%d Entries: \n",*iovec_count); + for (i=0 ; i<*iovec_count ; i++) { + printf ("\t{%p, %d}\n", + (*iov)[i].iov_base, + (*iov)[i].iov_len); + } + } + */ + if (remaining_length != 0) { + printf( "Not all raw description was been extracted (%lu bytes missing)\n", + (unsigned long) remaining_length ); + } + + if (NULL != temp_iov) { + free (temp_iov); + temp_iov = NULL; + } + + return OMPI_SUCCESS; +} + +int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (io_array[temp_arr[left]].offset > + io_array[temp_arr[j]].offset)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (io_array[temp_arr[right]].offset > + io_array[temp_arr[largest]].offset)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + +int ompi_io_ompio_sort_iovec (struct iovec *iov, + int num_entries, + int *sorted) +{ + int i = 0; + int j = 0; + int left = 0; + int right = 0; + int largest = 0; + int heap_size = num_entries - 1; + int temp = 0; + unsigned char done = 0; + int* temp_arr = NULL; + + if (0 == num_entries) { + return OMPI_SUCCESS; + } + + temp_arr = (int*)malloc(num_entries*sizeof(int)); + if (NULL == temp_arr) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + temp_arr[0] = 0; + for (i = 1; i < num_entries; ++i) { + temp_arr[i] = i; + } + /* num_entries can be a large no. so NO RECURSION */ + for (i = num_entries/2-1 ; i>=0 ; i--) { + done = 0; + j = i; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + if ((left <= heap_size) && + (iov[temp_arr[left]].iov_base > iov[temp_arr[j]].iov_base)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (iov[temp_arr[right]].iov_base > + iov[temp_arr[largest]].iov_base)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + } + + for (i = num_entries-1; i >=1; --i) { + temp = temp_arr[0]; + temp_arr[0] = temp_arr[i]; + temp_arr[i] = temp; + heap_size--; + done = 0; + j = 0; + largest = j; + + while (!done) { + left = j*2+1; + right = j*2+2; + + if ((left <= heap_size) && + (iov[temp_arr[left]].iov_base > + iov[temp_arr[j]].iov_base)) { + largest = left; + } + else { + largest = j; + } + if ((right <= heap_size) && + (iov[temp_arr[right]].iov_base > + iov[temp_arr[largest]].iov_base)) { + largest = right; + } + if (largest != j) { + temp = temp_arr[largest]; + temp_arr[largest] = temp_arr[j]; + temp_arr[j] = temp; + j = largest; + } + else { + done = 1; + } + } + sorted[i] = temp_arr[i]; + } + sorted[0] = temp_arr[0]; + + if (NULL != temp_arr) { + free(temp_arr); + temp_arr = NULL; + } + return OMPI_SUCCESS; +} + +int ompi_io_ompio_set_aggregator_props (mca_io_ompio_file_t *fh, + int num_aggregators, + size_t bytes_per_proc) +{ + + + int j; + int root_offset; + int ndims, i=1, n=0, total_groups=0; + int *dims=NULL, *periods=NULL, *coords=NULL, *coords_tmp=NULL; + int procs_per_node = 1; /* MSC TODO - Figure out a way to get this info */ + size_t max_bytes_per_proc = 0; + + + + /* + OMPI_MPI_OFFSET_TYPE temp; + int global_flag, flag; + */ + fh->f_flags |= OMPIO_AGGREGATOR_IS_SET; + + if (-1 == num_aggregators) { + /* Determine Topology Information */ + if (fh->f_comm->c_flags & OMPI_COMM_CART) { + MPI_Cartdim_get(fh->f_comm, &ndims); + + dims = (int*)malloc (ndims * sizeof(int)); + if (NULL == dims) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + periods = (int*)malloc (ndims * sizeof(int)); + if (NULL == periods) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + coords = (int*)malloc (ndims * sizeof(int)); + if (NULL == coords) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + coords_tmp = (int*)malloc (ndims * sizeof(int)); + if (NULL == coords_tmp) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + MPI_Cart_get(fh->f_comm, ndims, dims, periods, coords); + + /* + printf ("NDIMS = %d\n", ndims); + for (j=0 ; jf_rank,j,dims[j],j,periods[j],j,coords[j]); + } + */ + + while (1) { + if (fh->f_size/dims[0]*i >= procs_per_node) { + fh->f_procs_per_group = fh->f_size/dims[0]*i; + break; + } + i++; + } + + total_groups = ceil((float)fh->f_size/fh->f_procs_per_group); + + if ((coords[0]/i + 1) == total_groups && 0 != (total_groups%i)) { + fh->f_procs_per_group = (fh->f_size/dims[0]) * (total_groups%i); + } + /* + printf ("BEFORE ADJUSTMENT: %d ---> procs_per_group = %d total_groups = %d\n", + fh->f_rank, fh->f_procs_per_group, total_groups); + */ + /* check if the current grouping needs to be expanded or shrinked */ + if ((size_t)mca_io_ompio_bytes_per_agg < + bytes_per_proc * fh->f_procs_per_group) { + + root_offset = ceil ((float)mca_io_ompio_bytes_per_agg/bytes_per_proc); + if (fh->f_procs_per_group/root_offset != coords[1]/root_offset) { + fh->f_procs_per_group = root_offset; + } + else { + fh->f_procs_per_group = fh->f_procs_per_group%root_offset; + } + } + else if ((size_t)mca_io_ompio_bytes_per_agg > + bytes_per_proc * fh->f_procs_per_group) { + i = ceil ((float)mca_io_ompio_bytes_per_agg/ + (bytes_per_proc * fh->f_procs_per_group)); + root_offset = fh->f_procs_per_group * i; + + if (fh->f_size/root_offset != fh->f_rank/root_offset) { + fh->f_procs_per_group = root_offset; + } + else { + fh->f_procs_per_group = fh->f_size%root_offset; + } + } + /* + printf ("AFTER ADJUSTMENT: %d (%d) ---> procs_per_group = %d\n", + fh->f_rank, coords[1], fh->f_procs_per_group); + */ + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == fh->f_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (j=0 ; jf_size ; j++) { + MPI_Cart_coords (fh->f_comm, j, ndims, coords_tmp); + if (coords_tmp[0]/i == coords[0]/i) { + if ((coords_tmp[1]/root_offset)*root_offset == + (coords[1]/root_offset)*root_offset) { + fh->f_procs_in_group[n] = j; + n++; + } + } + } + + fh->f_aggregator_index = 0; + + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (j=0 ; jf_procs_per_group; j++) { + printf ("%d: Proc %d: %d\n", fh->f_rank, j, fh->f_procs_in_group[j]); + } + } + */ + + if (NULL != dims) { + free (dims); + dims = NULL; + } + if (NULL != periods) { + free (periods); + periods = NULL; + } + if (NULL != coords) { + free (coords); + coords = NULL; + } + if (NULL != coords_tmp) { + free (coords_tmp); + coords_tmp = NULL; + } + return OMPI_SUCCESS; + } + + /* + temp = fh->f_iov_count; + fh->f_comm->c_coll.coll_bcast (&temp, + 1, + MPI_LONG, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + + if (temp != fh->f_iov_count) { + flag = 0; + } + else { + flag = 1; + } + fh->f_comm->c_coll.coll_allreduce (&flag, + &global_flag, + 1, + MPI_INT, + MPI_MIN, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + */ + if (fh->f_flags & OMPIO_UNIFORM_FVIEW) { + OMPI_MPI_OFFSET_TYPE *start_offsets = NULL; + OMPI_MPI_OFFSET_TYPE stride = 0; + + if (OMPIO_ROOT == fh->f_rank) { + start_offsets = malloc (fh->f_size * sizeof(OMPI_MPI_OFFSET_TYPE)); + } + + fh->f_comm->c_coll.coll_gather (&fh->f_decoded_iov[0].iov_base, + 1, + MPI_LONG, + start_offsets, + 1, + MPI_LONG, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_gather_module); + if (OMPIO_ROOT == fh->f_rank) { + stride = start_offsets[1] - start_offsets[0]; + for (i=2 ; if_size ; i++) { + if (stride != start_offsets[i]-start_offsets[i-1]) { + break; + } + } + } + + if (NULL != start_offsets) { + free (start_offsets); + start_offsets = NULL; + } + + fh->f_comm->c_coll.coll_bcast (&i, + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + + fh->f_procs_per_group = i; + max_bytes_per_proc = bytes_per_proc; + } + else { + fh->f_procs_per_group = 1; + fh->f_comm->c_coll.coll_allreduce (&bytes_per_proc, + &max_bytes_per_proc, + 1, + MPI_LONG, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + } + /* + printf ("BEFORE ADJUSTMENT: %d ---> procs_per_group = %d\n", + fh->f_rank, fh->f_procs_per_group); + + printf ("COMPARING %d to %d x %d = %d\n", + mca_io_ompio_bytes_per_agg, + bytes_per_proc, + fh->f_procs_per_group, + fh->f_procs_per_group*bytes_per_proc); + */ + /* check if the current grouping needs to be expanded or shrinked */ + if ((size_t)mca_io_ompio_bytes_per_agg < + max_bytes_per_proc * fh->f_procs_per_group) { + root_offset = ceil ((float)mca_io_ompio_bytes_per_agg/max_bytes_per_proc); + + if (fh->f_procs_per_group/root_offset != + (fh->f_rank%fh->f_procs_per_group)/root_offset) { + fh->f_procs_per_group = root_offset; + } + else { + fh->f_procs_per_group = fh->f_procs_per_group%root_offset; + } + } + else if ((size_t)mca_io_ompio_bytes_per_agg > + max_bytes_per_proc * fh->f_procs_per_group) { + i = ceil ((float)mca_io_ompio_bytes_per_agg/ + (max_bytes_per_proc * fh->f_procs_per_group)); + root_offset = fh->f_procs_per_group * i; + i = root_offset; + + if (root_offset > fh->f_size) { + root_offset = fh->f_size; + } + + if (fh->f_size/root_offset != fh->f_rank/root_offset) { + fh->f_procs_per_group = root_offset; + } + else { + fh->f_procs_per_group = fh->f_size%root_offset; + } + } + /* + printf ("AFTER ADJUSTMENT: %d ---> procs_per_group = %d\n", + fh->f_rank, fh->f_procs_per_group); + */ + fh->f_procs_in_group = (int*)malloc + (fh->f_procs_per_group * sizeof(int)); + if (NULL == fh->f_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (j=0 ; jf_size ; j++) { + if (j/i == fh->f_rank/i) { + if (((j%i)/root_offset)*root_offset == + ((fh->f_rank%i)/root_offset)*root_offset) { + fh->f_procs_in_group[n] = j; + n++; + } + } + } + + fh->f_aggregator_index = 0; + /* + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + for (j=0 ; jf_procs_per_group; j++) { + printf ("%d: Proc %d: %d\n", fh->f_rank, j, fh->f_procs_in_group[j]); + } + } + */ + return OMPI_SUCCESS; + } + + /* calculate the offset at which each group of processes will start */ + root_offset = ceil ((float)fh->f_size/num_aggregators); + + /* calculate the number of processes in the local group */ + if (fh->f_size/root_offset != fh->f_rank/root_offset) { + fh->f_procs_per_group = root_offset; + } + else { + fh->f_procs_per_group = fh->f_size%root_offset; + } + + fh->f_procs_in_group = (int*)malloc (fh->f_procs_per_group * sizeof(int)); + if (NULL == fh->f_procs_in_group) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (j=0 ; jf_procs_per_group ; j++) { + fh->f_procs_in_group[j] = (fh->f_rank/root_offset) * root_offset + j; + } + + fh->f_aggregator_index = 0; + + return OMPI_SUCCESS; +} + +void ompi_io_ompio_resolve_fs_type (mca_io_ompio_file_t *fh, + enum ompio_fs_type *fstype) +{ + /* The code in this function is based on the ADIO FS selection in ROMIO + * Copyright (C) 1997 University of Chicago. + * See COPYRIGHT notice in top-level directory. + */ + + int err; + char *dir; + struct statfs fsbuf; + char *tmp; + + + tmp = strchr (fh->f_filename, ':'); + if (!tmp) { + if (OMPIO_ROOT == fh->f_rank) { + do { + err = statfs (fh->f_filename, &fsbuf); + } while (err && (errno == ESTALE)); + + if (err && (errno == ENOENT)) { + get_parent_dir (fh->f_filename, &dir); + err = statfs (dir, &fsbuf); + free (dir); + } + +#ifdef HAVE_LUSTRE_LIBLUSTREAPI_H +#ifndef LL_SUPER_MAGIC +#define LL_SUPER_MAGIC 0x0BD00BD0 +#endif + if (fsbuf.f_type == LL_SUPER_MAGIC) { + *fstype = LUSTRE; + } +#endif + +#ifdef HAVE_PVFS2_H + if (fsbuf.f_type == PVFS2_SUPER_MAGIC) { + *fstype = PVFS2; + } +#endif + if (0 == *fstype) { + *fstype = UFS; + } + } + + fh->f_comm->c_coll.coll_bcast (&(*fstype), + 1, + MPI_INT, + OMPIO_ROOT, + fh->f_comm, + fh->f_comm->c_coll.coll_bcast_module); + } + else { + if (!strncmp(fh->f_filename, "pvfs2:", 6) || + !strncmp(fh->f_filename, "PVFS2:", 6)) { + *fstype = PVFS2; + } + else if (!strncmp(fh->f_filename, "lustre:", 7) || + !strncmp(fh->f_filename, "LUSTRE:", 7)) { + *fstype = LUSTRE; + } + else if (!strncmp(fh->f_filename, "ufs:", 4) || + !strncmp(fh->f_filename, "UFS:", 4)) { + *fstype = UFS; + } + } + return; +} + +static void get_parent_dir (char *filename, char **dirnamep) +{ + + + int err; + char *dir = NULL, *slash; + struct stat statbuf; + + + + err = lstat(filename, &statbuf); + + if (err || (!S_ISLNK(statbuf.st_mode))) { + /* no such file, or file is not a link; these are the "normal" + * cases where we can just return the parent directory. + */ + dir = strdup(filename); + } + else { + /* filename is a symlink. we've presumably already tried + * to stat it and found it to be missing (dangling link), + * but this code doesn't care if the target is really there + * or not. + */ + int namelen; + char *linkbuf; + + linkbuf = malloc(PATH_MAX+1); + namelen = readlink(filename, linkbuf, PATH_MAX+1); + if (namelen == -1) { + /* something strange has happened between the time that + * we determined that this was a link and the time that + * we attempted to read it; punt and use the old name. + */ + dir = strdup(filename); + } + else { + /* successfully read the link */ + linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */ + dir = strdup(linkbuf); + free(linkbuf); + } + } + + slash = strrchr(dir, '/'); + if (!slash) strncpy(dir, ".", 2); + else { + if (slash == dir) *(dir + 1) = '\0'; + else *slash = '\0'; + } + + *dirnamep = dir; + return; +} + + + +int ompi_io_ompio_break_file_view (mca_io_ompio_file_t *fh, + struct iovec *iov, + int count, + int stripe_count, + size_t stripe_size, + struct iovec **broken_iov, + int *broken_count) +{ + + + + struct iovec *temp_iov = NULL; + int i = 0; + int k = 0; + int block = 1; + int broken = 0; + size_t remaining = 0; + size_t temp = 0; + OPAL_PTRDIFF_TYPE current_offset = 0; + + + /* allocate an initial iovec, will grow if needed */ + temp_iov = (struct iovec *) malloc + (count * sizeof (struct iovec)); + if (NULL == temp_iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (i < count) { + if (count*block <= k) { + block ++; + temp_iov = (struct iovec *)realloc + (temp_iov, count * block *sizeof(struct iovec)); + if (NULL == temp_iov) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + if (0 == broken) { + temp = (OPAL_PTRDIFF_TYPE)(iov[i].iov_base)%stripe_size; + if ((stripe_size-temp) >= iov[i].iov_len) { + temp_iov[k].iov_base = iov[i].iov_base; + temp_iov[k].iov_len = iov[i].iov_len; + i++; + k++; + } + else { + temp_iov[k].iov_base = iov[i].iov_base; + temp_iov[k].iov_len = stripe_size-temp; + current_offset = (OPAL_PTRDIFF_TYPE)(temp_iov[k].iov_base) + + temp_iov[k].iov_len; + remaining = iov[i].iov_len - temp_iov[k].iov_len; + k++; + broken ++; + } + continue; + } + temp = current_offset%stripe_size; + if ((stripe_size-temp) >= remaining) { + temp_iov[k].iov_base = (IOVBASE_TYPE *)current_offset; + temp_iov[k].iov_len = remaining; + i++; + k++; + broken = 0; + current_offset = 0; + remaining = 0; + } + else { + temp_iov[k].iov_base = (IOVBASE_TYPE *)current_offset; + temp_iov[k].iov_len = stripe_size-temp; + current_offset += temp_iov[k].iov_len; + remaining -= temp_iov[k].iov_len; + k++; + broken ++; + } + } + *broken_iov = temp_iov; + *broken_count = k; + + return 1; +} +int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, + struct iovec *broken_iov, + int broken_count, + int num_aggregators, + size_t stripe_size, + int **fview_count, + struct iovec **iov, + int *count) +{ + + + int *num_entries = NULL; + int *broken_index = NULL; + int temp = 0; + int *fview_cnt = NULL; + int global_fview_count = 0; + int i = 0; + int *displs = NULL; + int rc = OMPI_SUCCESS; + struct iovec *global_fview = NULL; + struct iovec **broken = NULL; + MPI_Request *req=NULL, *sendreq=NULL; + + + num_entries = (int *) malloc (sizeof (int) * num_aggregators); + if (NULL == num_entries) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + broken_index = (int *) malloc (sizeof (int) * num_aggregators); + if (NULL == broken_index) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memset (num_entries, 0x0, num_aggregators * sizeof (int)); + memset (broken_index, 0x0, num_aggregators * sizeof (int)); + + /* calculate how many entries in the broken iovec belong to each aggregator */ + for (i=0 ; if_rank%fh->f_aggregator_index) { + fview_cnt = (int *) malloc (sizeof (int) * fh->f_size); + if (NULL == fview_cnt) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + sendreq = (MPI_Request *)malloc (num_aggregators * sizeof(MPI_Request)); + if (NULL == sendreq) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* gather at each aggregator how many entires from the broken file view it + expects from each process */ + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + rc = MCA_PML_CALL(irecv(&fview_cnt[i], + 1, + MPI_INT, + i, + OMPIO_TAG_GATHER, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + for (i=0 ; if_aggregator_index, + OMPIO_TAG_GATHER, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &sendreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + rc = ompi_request_wait_all (fh->f_size, req, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + rc = ompi_request_wait_all (num_aggregators, sendreq, MPI_STATUSES_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + + /* + for (i=0 ; if_comm->c_coll.coll_gather (&num_entries[i], + 1, + MPI_INT, + fview_cnt, + 1, + MPI_INT, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_gather_module); + } + */ + + if (0 == fh->f_rank%fh->f_aggregator_index) { + displs = (int*) malloc (fh->f_size * sizeof (int)); + if (NULL == displs) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + displs[0] = 0; + global_fview_count = fview_cnt[0]; + for (i=1 ; if_size ; i++) { + global_fview_count += fview_cnt[i]; + displs[i] = displs[i-1] + fview_cnt[i-1]; + } + + if (global_fview_count) { + global_fview = (struct iovec*)malloc (global_fview_count * + sizeof(struct iovec)); + if (NULL == global_fview) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + } + + broken = (struct iovec**)malloc (num_aggregators * sizeof(struct iovec *)); + if (NULL == broken) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (i=0 ; i%d: OFFSET: %d LENGTH: %d\n", + fh->f_rank, + i, + broken[i][j].iov_base, + broken[i][j].iov_len); + } + } + sleep(1); + */ + + if (0 == fh->f_rank%fh->f_aggregator_index) { + ptrdiff_t lb, extent; + rc = ompi_datatype_get_extent(fh->f_iov_type, &lb, &extent); + if (OMPI_SUCCESS != rc) { + goto exit; + } + for (i=0; if_size ; i++) { + if (fview_cnt[i]) { + char *ptmp; + ptmp = ((char *) global_fview) + (extent * displs[i]); + rc = MCA_PML_CALL(irecv(ptmp, + fview_cnt[i], + fh->f_iov_type, + i, + OMPIO_TAG_GATHERV, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0 ; if_iov_type, + i*fh->f_aggregator_index, + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &sendreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (fview_cnt[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0; if_comm->c_coll.coll_gatherv (broken[i], + num_entries[i], + fh->f_iov_type, + global_fview, + fview_cnt, + displs, + fh->f_iov_type, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_gatherv_module); + } + */ + /* + for (i=0 ; if_rank, + global_fview[i].iov_base, + global_fview[i].iov_len); + } + */ + exit: + for (i=0 ; i part) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[temp]+ + temp_position[temp]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + (total_bytes_sent-bytes_remaining)), + part); + bytes_remaining -= part; + temp_position[temp] += part; + part = 0; + current ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[temp]+ + temp_position[temp]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + (total_bytes_sent-bytes_remaining)), + bytes_remaining); + break; + } + } + else { + if (bytes_remaining > broken_iovec[current].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[temp]+ + temp_position[temp]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + (total_bytes_sent-bytes_remaining)), + broken_iovec[current].iov_len); + bytes_remaining -= broken_iovec[current].iov_len; + temp_position[temp] += broken_iovec[current].iov_len; + current ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[temp]+ + temp_position[temp]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + (total_bytes_sent-bytes_remaining)), + bytes_remaining); + break; + } + } + } + + sendreq = (MPI_Request *)malloc (num_aggregators * sizeof(MPI_Request)); + if (NULL == sendreq) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(irecv((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_GATHERV, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0 ; if_aggregator_index, + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &sendreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + for (i=0; if_comm->c_coll.coll_gatherv (sbuf[i], + bytes_sent[i], + MPI_BYTE, + global_buf, + bytes_per_process, + displs, + MPI_BYTE, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_gatherv_module); + } + */ + + exit: + for (i=0 ; if_aggregator_index, + OMPIO_TAG_SCATTERV, + fh->f_comm, + &recvreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(isend((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0; if_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + /* + for (i=0 ; if_comm->c_coll.coll_scatterv (global_buf, + bytes_per_process, + displs, + MPI_BYTE, + rbuf[i], + bytes_received[i], + MPI_BYTE, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_scatterv_module); + } + */ + bytes_remaining = total_bytes_recv; + + while (bytes_remaining) { + temp = (int)((OPAL_PTRDIFF_TYPE)broken_iovec[current].iov_base/stripe_size) + % num_aggregators; + + if (part) { + if (bytes_remaining > part) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)receive_buf + + (total_bytes_recv-bytes_remaining)), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[temp]+ + temp_position[temp]), + part); + bytes_remaining -= part; + temp_position[temp] += part; + part = 0; + current ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)receive_buf + + (total_bytes_recv-bytes_remaining)), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[temp]+ + temp_position[temp]), + bytes_remaining); + break; + } + } + else { + if (bytes_remaining > broken_iovec[current].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)receive_buf + + (total_bytes_recv-bytes_remaining)), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[temp]+ + temp_position[temp]), + broken_iovec[current].iov_len); + bytes_remaining -= broken_iovec[current].iov_len; + temp_position[temp] += broken_iovec[current].iov_len; + current ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)receive_buf + + (total_bytes_recv-bytes_remaining)), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[temp]+ + temp_position[temp]), + bytes_remaining); + break; + } + } + } + + exit: + for (i=0 ; if_flags & OMPIO_CONTIGUOUS_MEMORY) && total_bytes_sent) { + for (i=0 ; i part[i]) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf+ + temp_position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + + (broken_iovec[current[i]].iov_len + - part[i])), + part[i]); + temp -= part[i]; + temp_position += part[i]; + part[i] = 0; + position += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf+ + temp_position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + + (broken_iovec[current[i]].iov_len + - part[i])), + temp); + temp_position += temp; + break; + } + } + else { + if (temp > broken_iovec[current[i]].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf+ + temp_position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position), + broken_iovec[current[i]].iov_len); + temp -= broken_iovec[current[i]].iov_len; + temp_position += broken_iovec[current[i]].iov_len; + position += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf + + temp_position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position), + temp); + temp_position += temp; + break; + } + } + } + } + } + else if (total_bytes_sent) { + for (i=0 ; i position) { + break; + } + temp2 += decoded_iov[k].iov_len; + } + current_position = position - temp2; + } + else { + continue; + } + + while (temp) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[k].iov_base) + current_position; + if (temp >= + (decoded_iov[k].iov_len - current_position)) { + memcpy (sbuf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[k].iov_len - current_position); + temp -= (decoded_iov[k].iov_len - current_position); + temp_position += + (decoded_iov[k].iov_len - current_position); + k++; + current_position = 0; + } + else { + memcpy (sbuf+temp_position, + (IOVBASE_TYPE *)mem_address, + temp); + temp_position += temp; + break; + } + } + } + } + + /* send the data */ + sendreq = (MPI_Request *)malloc (num_aggregators * sizeof(MPI_Request)); + if (NULL == sendreq) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(irecv((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_GATHERV, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + opal_output (1, "Aggregator %d failed to recieve data from process %d\n", + fh->f_rank, i); + goto exit; + } + } + } + } + + temp_position = 0; + for (i=0 ; if_aggregator_index, + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &sendreq[i])); + if (OMPI_SUCCESS != rc) { + opal_output (1, "Process %d failed to send data to Aggregator %d\n", + fh->f_rank, i*fh->f_aggregator_index); + goto exit; + } + temp_position += bytes_sent[i]; + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + opal_output (1, "%d request_wait failed for %d\n", + fh->f_rank, i); + goto exit; + } + } + } + } + for (i=0; if_rank, i*fh->f_aggregator_index); + goto exit; + } + } + } + + exit: + + if (NULL != req) { + free (req); + } + if (NULL != sendreq) { + free (sendreq); + } + if (NULL != sbuf) { + free (sbuf); + sbuf = NULL; + } + return rc; +} + +int ompi_io_ompio_receive_data (mca_io_ompio_file_t *fh, + void *recv_buf, + size_t total_bytes_recv, + struct iovec *decoded_iov, + int decoded_count, + int *bytes_recv, + struct iovec *broken_iovec, + int *current, + size_t *part, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size) +{ + void *rbuf = NULL; + size_t temp_position = 0; + int i, k; + int rc = OMPI_SUCCESS; + MPI_Request *req=NULL, *recvreq=NULL; + + if (total_bytes_recv) { + rbuf = malloc (total_bytes_recv); + if (NULL == rbuf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + recvreq = (MPI_Request *)malloc (num_aggregators * sizeof(MPI_Request)); + if (NULL == recvreq) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0 ; if_aggregator_index, + OMPIO_TAG_SCATTERV, + fh->f_comm, + &recvreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + temp_position += bytes_recv[i]; + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(isend((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0; if_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + temp_position = 0; + if ((fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) && total_bytes_recv) { + for (i=0 ; i part[i]) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + + (broken_iovec[current[i]].iov_len + - part[i])), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf+ + temp_position), + part[i]); + temp -= part[i]; + temp_position += part[i]; + part[i] = 0; + position += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + + (broken_iovec[current[i]].iov_len + - part[i])), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf+ + temp_position), + temp); + temp_position += temp; + break; + } + } + else { + if (temp > broken_iovec[current[i]].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf+ + temp_position), + broken_iovec[current[i]].iov_len); + temp -= broken_iovec[current[i]].iov_len; + temp_position += broken_iovec[current[i]].iov_len; + position += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf + + temp_position), + temp); + temp_position += temp; + break; + } + } + } + } + } + else if (total_bytes_recv) { + for (i=0 ; i position) { + break; + } + temp2 += decoded_iov[k].iov_len; + } + current_position = position - temp2; + } + else { + continue; + } + + while (temp) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[k].iov_base) + current_position; + if (temp >= + (decoded_iov[k].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *)mem_address, + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf + + temp_position), + decoded_iov[k].iov_len - current_position); + temp -= (decoded_iov[k].iov_len - current_position); + temp_position += + (decoded_iov[k].iov_len - current_position); + k++; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *)mem_address, + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf + + temp_position), + temp); + temp_position += temp; + break; + } + } + } + } + exit: + if (NULL != req) { + free (req); + } + if (NULL != recvreq) { + free (recvreq); + } + if (NULL != rbuf) { + free (rbuf); + rbuf = NULL; + } + return rc; +} + + + + + + +#if 0 + +int ompi_io_ompio_receive_data (mca_io_ompio_file_t *fh, + void *recv_buf, + int *bytes_recv, + struct iovec *broken_iovec, + int *current, + size_t *part, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size) +{ + void **rbuf = NULL; + size_t *temp_position = NULL; + int i, k; + int rc = OMPI_SUCCESS; + MPI_Request *req=NULL, *recvreq=NULL; + + rbuf = (void**) malloc (num_aggregators * sizeof(void *)); + if (NULL == rbuf) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + temp_position = (size_t *) malloc (num_aggregators * sizeof(size_t)); + if (NULL == temp_position) { + opal_output (1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset (temp_position, 0x0, num_aggregators * sizeof (size_t)); + + for (i=0 ; if_aggregator_index, + OMPIO_TAG_SCATTERV, + fh->f_comm, + &recvreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(isend((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0; if_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + /* + for (i=0 ; if_comm->c_coll.coll_scatterv (global_buf, + bytes_per_process, + displs, + MPI_BYTE, + rbuf[i], + bytes_recv[i], + MPI_BYTE, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_scatterv_module); + } + */ + + for (i=0 ; i part[i]) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + temp_position[i] + + (broken_iovec[current[i]].iov_len - part[i])), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[i]+ + temp_position[i]), + part[i]); + temp -= part[i]; + temp_position[i] += part[i]; + part[i] = 0; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + temp_position[i] + + (broken_iovec[current[i]].iov_len - part[i])), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[i]+ + temp_position[i]), + temp); + break; + } + } + else { + if (temp > broken_iovec[current[i]].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[i]+ + temp_position[i]), + broken_iovec[current[i]].iov_len); + temp -= broken_iovec[current[i]].iov_len; + temp_position[i] += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)recv_buf + + position + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)rbuf[i]+ + temp_position[i]), + temp); + break; + } + } + } + } + + exit: + for (i=0 ; i part[i]) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[i]+ + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + temp_position[i] + + (broken_iovec[current[i]].iov_len - part[i])), + part[i]); + temp -= part[i]; + temp_position[i] += part[i]; + part[i] = 0; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[i]+ + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + temp_position[i] + + (broken_iovec[current[i]].iov_len - part[i])), + temp); + break; + } + } + else { + if (temp > broken_iovec[current[i]].iov_len) { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[i]+ + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + temp_position[i]), + broken_iovec[current[i]].iov_len); + temp -= broken_iovec[current[i]].iov_len; + temp_position[i] += broken_iovec[current[i]].iov_len; + current[i] ++; + } + else { + memcpy ((IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)sbuf[i]+ + temp_position[i]), + (IOVBASE_TYPE *)((OPAL_PTRDIFF_TYPE)send_buf + + position + temp_position[i]), + temp); + break; + } + } + } + } + sendreq = (MPI_Request *)malloc (num_aggregators * sizeof(MPI_Request)); + if (NULL == sendreq) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + req = (MPI_Request *)malloc (fh->f_size * sizeof(MPI_Request)); + if (NULL == req) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = MCA_PML_CALL(irecv((char *)global_buf + displs[i], + bytes_per_process[i], + MPI_BYTE, + i, + OMPIO_TAG_GATHERV, + fh->f_comm, + &req[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + + for (i=0 ; if_aggregator_index, + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + fh->f_comm, + &sendreq[i])); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + + if (0 == fh->f_rank%fh->f_aggregator_index) { + for (i=0; if_size ; i++) { + if (bytes_per_process[i]) { + rc = ompi_request_wait (&req[i], MPI_STATUS_IGNORE); + if (OMPI_SUCCESS != rc) { + goto exit; + } + } + } + } + for (i=0; if_comm->c_coll.coll_gatherv (sbuf[i], + bytes_sent[i], + MPI_BYTE, + global_buf, + bytes_per_process, + displs, + MPI_BYTE, + i*fh->f_aggregator_index, + fh->f_comm, + fh->f_comm->c_coll.coll_gatherv_module); + } + */ + + exit: + for (i=0 ; i + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/errhandler/errhandler.h" +#include "opal/threads/mutex.h" +#include "ompi/file/file.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/fcache/fcache.h" +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "opal/datatype/opal_convertor.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/request/request.h" + +extern int mca_io_ompio_cycle_buffer_size; +extern int mca_io_ompio_bytes_per_agg; + +/* + * Flags + */ +#define OMPIO_CONTIGUOUS_MEMORY 0x00000001 +#define OMPIO_UNIFORM_FVIEW 0x00000002 +#define OMPIO_FILE_IS_OPEN 0x00000004 +#define OMPIO_FILE_VIEW_IS_SET 0x00000008 +#define OMPIO_CONTIGUOUS_FVIEW 0x00000010 +#define OMPIO_AGGREGATOR_IS_SET 0x00000020 + +/* + * General values + */ +#define OMPIO_PREALLOC_MAX_BUF_SIZE 33554432 +#define OMPIO_PERM_NULL -1 +#define OMPIO_IOVEC_INITIAL_SIZE 100 +#define OMPIO_ROOT 0 +#define OMPIO_MAX_NAME 100 +#define OMPIO_TAG_GATHER -100 +#define OMPIO_TAG_GATHERV -101 +#define OMPIO_TAG_BCAST -102 +#define OMPIO_TAG_SCATTERV -103 + +/* ACCESS MODES --- not needed.. just use MPI_MODE_... */ +#define OMPIO_MODE_CREATE 1 +#define OMPIO_MODE_RDONLY 2 +#define OMPIO_MODE_WRONLY 4 +#define OMPIO_MODE_RDWR 8 +#define OMPIO_MODE_DELETE_ON_CLOSE 16 +#define OMPIO_MODE_UNIQUE_OPEN 32 +#define OMPIO_MODE_EXCL 64 +#define OMPIO_MODE_APPEND 128 +#define OMPIO_MODE_SEQUENTIAL 256 + +BEGIN_C_DECLS + +enum ompio_fs_type +{ + UFS = 1, + PVFS2 = 2, + LUSTRE = 3 +}; + +OMPI_DECLSPEC extern mca_io_base_component_2_0_0_t mca_io_ompio_component; +/* + * global variables, instantiated in module.c + */ +extern opal_mutex_t mca_io_ompio_mutex; +extern mca_io_base_module_2_0_0_t mca_io_ompio_module; +OMPI_DECLSPEC extern mca_io_base_component_2_0_0_t mca_io_ompio_component; + +typedef struct mca_io_ompio_io_array_t { + void *memory_address; + void *offset; /* we need that of type OMPI_MPI_OFFSET_TYPE */ + size_t length; + /*mca_io_ompio_server_t io_server;*/ +} mca_io_ompio_io_array_t; + +/** + * Back-end structure for MPI_File + */ +struct mca_io_ompio_file_t { + /* General parameters */ + int fd; + OMPI_MPI_OFFSET_TYPE f_offset; /* byte offset of current position */ + OMPI_MPI_OFFSET_TYPE f_disp; /* file_view displacement */ + int f_rank; + int f_size; + int f_amode; + int f_perm; + ompi_communicator_t *f_comm; + char *f_filename; + char *f_datarep; + opal_convertor_t *f_convertor; + ompi_info_t *f_info; + int32_t f_flags; + void *f_fs_ptr; + int f_atomicity; + size_t f_stripe_size; + size_t f_cc_size; + int f_bytes_per_agg; + enum ompio_fs_type f_fstype; + + /* process grouping parameters */ + int *f_procs_in_group; + int f_procs_per_group; + int f_aggregator_index; + + /* File View parameters */ + struct iovec *f_decoded_iov; + uint32_t f_iov_count; + ompi_datatype_t *f_iov_type; + size_t f_position_in_file_view; /* in bytes */ + size_t f_total_bytes; /* total bytes read/written within 1 Fview*/ + int f_index_in_file_view; + OPAL_PTRDIFF_TYPE f_view_extent; + size_t f_view_size; + ompi_datatype_t *f_etype; + ompi_datatype_t *f_filetype; + size_t f_etype_size; + + /* contains IO requests that needs to be read/written */ + mca_io_ompio_io_array_t *f_io_array; + int f_num_of_io_entries; + + /* Hooks for modules to hang things */ + mca_base_component_t *f_fs_component; + mca_base_component_t *f_fcoll_component; + mca_base_component_t *f_fcache_component; + mca_base_component_t *f_fbtl_component; + mca_base_component_t *f_sharedfp_component; + + /* structure of function pointers */ + mca_fs_base_module_t *f_fs; + mca_fcoll_base_module_t *f_fcoll; + mca_fcache_base_module_t *f_fcache; + mca_fbtl_base_module_t *f_fbtl; + mca_sharedfp_base_module_t *f_sharedfp; + + /* No Error handling done yet + struct ompi_errhandler_t *error_handler; + ompi_errhandler_type_t errhandler_type; + */ +}; +typedef struct mca_io_ompio_file_t mca_io_ompio_file_t; + +struct mca_io_ompio_data_t { + mca_io_ompio_file_t ompio_fh; +}; +typedef struct mca_io_ompio_data_t mca_io_ompio_data_t; + +OMPI_DECLSPEC int ompi_io_ompio_set_file_defaults (mca_io_ompio_file_t *fh); + +OMPI_DECLSPEC void ompi_io_ompio_resolve_fs_type (mca_io_ompio_file_t *fh, + enum ompio_fs_type *fstype); + +/* + * Function that takes in a datatype and buffer, and decodes that datatype + * into an iovec using the convertor_raw function + */ +OMPI_DECLSPEC int ompi_io_ompio_decode_datatype (mca_io_ompio_file_t *fh, + struct ompi_datatype_t *datatype, + int count, + void *buf, + size_t *max_data, + struct iovec **iov, + uint32_t *iov_count); + +/* + * Function that sorts an io_array according to the offset by filling + * up an array of the indices into the array (HEAP SORT) + */ +OMPI_DECLSPEC int ompi_io_ompio_sort (mca_io_ompio_io_array_t *io_array, + int num_entries, + int *sorted); + +OMPI_DECLSPEC int ompi_io_ompio_sort_iovec (struct iovec *iov, + int num_entries, + int *sorted); + +OMPI_DECLSPEC int ompi_io_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset); + +OMPI_DECLSPEC int ompi_io_ompio_generate_current_file_view (mca_io_ompio_file_t *fh, + size_t max_data, + struct iovec **f_iov, + int *iov_count); + +OMPI_DECLSPEC int ompi_io_ompio_generate_groups (mca_io_ompio_file_t *fh, + int num_aggregators, + int *root, + int *procs_per_group, + int **ranks); +OMPI_DECLSPEC int ompi_io_ompio_set_aggregator_props (mca_io_ompio_file_t *fh, + int num_aggregators, + size_t bytes_per_proc); + +OMPI_DECLSPEC int ompi_io_ompio_break_file_view (mca_io_ompio_file_t *fh, + struct iovec *iov, + int count, + int num_aggregators, + size_t stripe_size, + struct iovec **broken_iov, + int *broken_count); + +OMPI_DECLSPEC int ompi_io_ompio_distribute_file_view (mca_io_ompio_file_t *fh, + struct iovec *broken_iov, + int broken_count, + int num_aggregators, + size_t stripe_size, + int **fview_count, + struct iovec **iov, + int *count); + +OMPI_DECLSPEC int ompi_io_ompio_gather_data (mca_io_ompio_file_t *fh, + void *send_buf, + size_t total_bytes_sent, + int *bytes_sent, + struct iovec *broken_iovec, + int broken_index, + size_t partial, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size); + +OMPI_DECLSPEC int ompi_io_ompio_scatter_data (mca_io_ompio_file_t *fh, + void *receive_buf, + size_t total_bytes_recv, + int *bytes_received, + struct iovec *broken_iovec, + int broken_index, + size_t partial, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size); + +OMPI_DECLSPEC int ompi_io_ompio_send_data (mca_io_ompio_file_t *fh, + void *send_buf, + size_t total_bytes_sent, + struct iovec *decoded_iov, + int decoded_count, + int *bytes_sent, + struct iovec *broken_iovec, + int *current, + size_t *part, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size); + +OMPI_DECLSPEC int ompi_io_ompio_receive_data (mca_io_ompio_file_t *fh, + void *recv_buf, + size_t total_bytes_recv, + struct iovec *decoded_iov, + int decoded_count, + int *bytes_recv, + struct iovec *broken_iovec, + int *current, + size_t *part, + void *global_buf, + int *bytes_per_process, + int *displs, + int num_aggregators, + size_t stripe_size); + +/* + * Modified versions of Collective operations + * Based on root offsets + */ +OMPI_DECLSPEC int ompi_io_ompio_gatherv (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_scatterv (void *sbuf, + int *scounts, + int *disps, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_allgather (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_allgatherv (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_gather (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_bcast (void *buff, + int count, + ompi_datatype_t *datatype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm); + +/* + * Modified versions of Collective operations + * Based on an array of procs in group + */ +OMPI_DECLSPEC int ompi_io_ompio_gatherv_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_scatterv_array (void *sbuf, + int *scounts, + int *disps, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_allgather_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_allgatherv_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_gather_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); +OMPI_DECLSPEC int ompi_io_ompio_bcast_array (void *buff, + int count, + ompi_datatype_t *datatype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm); + +/* Function declaration for get and utility method to use with libNBC + implementation in io_ompio_nbc.c */ +OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_num_io_procs (int *num_procs); +OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_cycle_buffer_size (int *cycle_buffer_size); +OMPI_DECLSPEC int mca_io_ompio_get_fcoll_dynamic_constant_cbs (int *constant_cbs); +OMPI_DECLSPEC int mca_io_ompio_get_f_aggregator_index (ompi_file_t *fh); +OMPI_DECLSPEC int mca_io_ompio_get_f_procs_in_group (ompi_file_t *fh, + int **value); +OMPI_DECLSPEC int mca_io_ompio_get_f_procs_per_group (ompi_file_t *fh); +OMPI_DECLSPEC int mca_io_ompio_get_f_comm (ompi_file_t *fh, + ompi_communicator_t **value); +OMPI_DECLSPEC int mca_io_ompio_get_iov_type (ompi_file_t *fh, + ompi_datatype_t **value); +OMPI_DECLSPEC signed int mca_io_ompio_get_f_flags (ompi_file_t *fh); +OMPI_DECLSPEC int mca_io_ompio_get_fd (ompi_file_t *fh); +OMPI_DECLSPEC int mca_io_ompio_get_f_num_of_io_entries (ompi_file_t *fh); +OMPI_DECLSPEC int mca_io_ompio_get_f_io_array (ompi_file_t *fh, + mca_io_ompio_io_array_t **f_io_array); +OMPI_DECLSPEC int mca_io_ompio_free_f_io_array (ompi_file_t *fh); + +OMPI_DECLSPEC int mca_io_ompio_get_datatype_size (ompi_datatype_t *datatype); +OMPI_DECLSPEC int mca_io_ompio_decode_datatype_external(ompi_file_t *fh, + struct ompi_datatype_t *datatype, + int count, + void *buf, + size_t *max_data, + struct iovec **iov, + uint32_t *iov_count); +OMPI_DECLSPEC int mca_io_ompio_generate_current_file_view (ompi_file_t *fp, + size_t max_data, + struct iovec **f_iov, + int *iov_count); +OMPI_DECLSPEC int mca_io_ompio_set_aggregator_props (ompi_file_t *fh, + int num_aggregators, + size_t bytes_per_proc); +OMPI_DECLSPEC int mca_io_ompio_generate_io_array (ompi_file_t *file, + struct iovec *global_view, + int *tglobal_count, + int *fview_count, + int *bytes_per_process, + char *global_buf, + int *tblocks, + int *sorted, + int *nvalue, + int *bytes_left, + int *sorted_index); +OMPI_DECLSPEC int mca_io_ompio_datatype_is_contiguous (ompi_datatype_t *datatype, + ompi_file_t *fp); +OMPI_DECLSPEC int mca_io_ompio_non_contiguous_create_send_buf (int *bytes_sent, + struct iovec *decoded_iov, + char *send_buf); +OMPI_DECLSPEC int mca_io_ompio_non_contiguous_create_receive_buf(int *bytes_received, + struct iovec *decoded_iov, + char *receive_buf); + +/* libNBC utility methods declarations ends here */ +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_io_ompio_file_set_view (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE disp, + struct ompi_datatype_t *etype, + struct ompi_datatype_t *filetype, + char *datarep, + struct ompi_info_t *info); +int mca_io_ompio_file_get_view (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *disp, + struct ompi_datatype_t **etype, + struct ompi_datatype_t **filetype, + char *datarep); +int mca_io_ompio_file_open (struct ompi_communicator_t *comm, + char *filename, + int amode, + struct ompi_info_t *info, + struct ompi_file_t *fh); +int mca_io_ompio_file_close (struct ompi_file_t *fh); +int mca_io_ompio_file_delete (char *filename, + struct ompi_info_t *info); +int mca_io_ompio_file_set_size (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE size); +int mca_io_ompio_file_preallocate (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE size); +int mca_io_ompio_file_get_size (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE * size); +int mca_io_ompio_file_get_amode (struct ompi_file_t *fh, + int *amode); +int mca_io_ompio_file_set_info (struct ompi_file_t *fh, + struct ompi_info_t *info); +int mca_io_ompio_file_get_info (struct ompi_file_t *fh, + struct ompi_info_t ** info_used); +int mca_io_ompio_file_sync (struct ompi_file_t *fh); +int mca_io_ompio_file_seek (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offet, + int whence); +/* Section 9.3 */ +int mca_io_ompio_file_set_view (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE disp, + struct ompi_datatype_t *etype, + struct ompi_datatype_t *filetype, + char *datarep, + struct ompi_info_t *info); +int mca_io_ompio_file_get_view (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *disp, + struct ompi_datatype_t **etype, + struct ompi_datatype_t **filetype, + char *datarep); + +/* Section 9.4.2 */ +int mca_io_ompio_file_read_at (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_read_at_all (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write_at (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write_at_all (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_iread_at (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); +int mca_io_ompio_file_iwrite_at (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); + +/* Section 9.4.3 */ +int mca_io_ompio_file_read (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_read_all (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write_all (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_iread (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); +int mca_io_ompio_file_iwrite (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); +int mca_io_ompio_file_seek (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence); +int mca_io_ompio_file_get_position (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *offset); +int mca_io_ompio_file_get_byte_offset (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + OMPI_MPI_OFFSET_TYPE *disp); + +/* Section 9.4.4 */ +int mca_io_ompio_file_read_shared (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write_shared (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_iread_shared (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); +int mca_io_ompio_file_iwrite_shared (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request); +int mca_io_ompio_file_read_ordered (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_write_ordered (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status); +int mca_io_ompio_file_seek_shared (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence); +int mca_io_ompio_file_get_position_shared (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *offset); + +/* Section 9.4.5 */ +int mca_io_ompio_file_read_at_all_begin (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_read_at_all_end (struct ompi_file_t *fh, + void *buf, + ompi_status_public_t *status); +int mca_io_ompio_file_write_at_all_begin (struct ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_write_at_all_end (struct ompi_file_t *fh, + void *buf, + ompi_status_public_t *status); +int mca_io_ompio_file_read_all_begin (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_read_all_end (struct ompi_file_t *fh, + void *buf, + ompi_status_public_t *status); +int mca_io_ompio_file_write_all_begin (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_write_all_end (struct ompi_file_t *fh, + void *buf, + ompi_status_public_t *status); +int mca_io_ompio_file_read_ordered_begin (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_read_ordered_end (struct ompi_file_t *fh, + void *buf, + ompi_status_public_t *status); +int mca_io_ompio_file_write_ordered_begin (struct ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype); +int mca_io_ompio_file_write_ordered_end (struct ompi_file_t *fh, + void *buf, + struct ompi_status_public_t *status); + +/* Section 9.5.1 */ +int mca_io_ompio_file_get_type_extent (struct ompi_file_t *fh, + struct ompi_datatype_t *datatype, + MPI_Aint *extent); + +/* Section 9.6.1 */ +int mca_io_ompio_file_set_atomicity (struct ompi_file_t *fh, + int flag); +int mca_io_ompio_file_get_atomicity (struct ompi_file_t *fh, + int *flag); +int mca_io_ompio_file_sync (struct ompi_file_t *fh); +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + + +END_C_DECLS + +#endif /* MCA_IO_OMPIO_H */ diff --git a/ompi/mca/io/ompio/io_ompio_coll_array.c b/ompi/mca/io/ompio/io_ompio_coll_array.c new file mode 100644 index 0000000000..cf46a656a9 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_coll_array.c @@ -0,0 +1,447 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/mca/pml/pml.h" +#include "opal/datatype/opal_datatype.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/request/request.h" + +#include +#include "io_ompio.h" + + +int ompi_io_ompio_allgatherv_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm) +{ + int err = OMPI_SUCCESS; + OPAL_PTRDIFF_TYPE extent, lb; + int i, rank, j; + char *send_buf = NULL; + struct ompi_datatype_t *newtype, *send_type; + + rank = ompi_comm_rank (comm); + for (j = 0; j < procs_per_group; j++) { + if (procs_in_group[j] == rank) { + break; + } + } + + if (MPI_IN_PLACE == sbuf) { + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + send_type = rdtype; + send_buf = (char*)rbuf; + + for (i = 0; i < j; i++) { + send_buf += (rcounts[i] * extent); + } + } + else { + send_buf = (char*)sbuf; + send_type = sdtype; + } + + err = ompi_io_ompio_gatherv_array (send_buf, + rcounts[j], + send_type, + rbuf, + rcounts, + disps, + rdtype, + root_index, + procs_in_group, + procs_per_group, + comm); + if (OMPI_SUCCESS != err) { + return err; + } + + err = ompi_datatype_create_indexed (procs_per_group, + rcounts, + disps, + rdtype, + &newtype); + if (MPI_SUCCESS != err) { + return err; + } + err = ompi_datatype_commit (&newtype); + if(MPI_SUCCESS != err) { + return err; + } + + ompi_io_ompio_bcast_array (rbuf, + 1, + newtype, + root_index, + procs_in_group, + procs_per_group, + comm); + + ompi_datatype_destroy (&newtype); + + return OMPI_SUCCESS; +} + +int ompi_io_ompio_gatherv_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + char *ptmp; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (procs_in_group[root_index] != rank) { + if (scount > 0) { + return MCA_PML_CALL(send(sbuf, + scount, + sdtype, + procs_in_group[root_index], + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + comm)); + } + return err; + } + + /* writer processes, loop receiving data from proceses + belonging to each corresponding root */ + + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + + for (i=0; i 0) { + err = MCA_PML_CALL(recv(ptmp, + rcounts[i], + rdtype, + procs_in_group[i], + OMPIO_TAG_GATHERV, + comm, + MPI_STATUS_IGNORE)); + } + } + + if (OMPI_SUCCESS != err) { + return err; + } + } + /* All done */ + + return err; +} + +int ompi_io_ompio_scatterv_array (void *sbuf, + int *scounts, + int *disps, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + char *ptmp; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (procs_in_group[root_index] != rank) { + if (rcount > 0) { + err = MCA_PML_CALL(recv(rbuf, + rcount, + rdtype, + procs_in_group[root_index], + OMPIO_TAG_SCATTERV, + comm, + MPI_STATUS_IGNORE)); + } + return err; + } + + /* writer processes, loop sending data to proceses + belonging to each corresponding root */ + + err = opal_datatype_get_extent (&sdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + + for (i=0 ; i 0) { + err = MCA_PML_CALL(send(ptmp, + scounts[i], + sdtype, + procs_in_group[i], + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + comm)); + } + } + if (OMPI_SUCCESS != err) { + return err; + } + } + /* All done */ + + return err; +} + +int ompi_io_ompio_allgather_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm) +{ + int err = OMPI_SUCCESS; + int rank; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (((void *) 1) == sbuf && 0 != rank) { + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + sbuf = ((char*) rbuf) + (rank * extent * rcount); + sdtype = rdtype; + scount = rcount; + } + + /* Gather and broadcast. */ + err = ompi_io_ompio_gather_array (sbuf, + scount, + sdtype, + rbuf, + rcount, + rdtype, + root_index, + procs_in_group, + procs_per_group, + comm); + + if (OMPI_SUCCESS == err) { + err = ompi_io_ompio_bcast_array (rbuf, + rcount * procs_per_group, + rdtype, + root_index, + procs_in_group, + procs_per_group, + comm); + } + /* All done */ + + return err; +} + +int ompi_io_ompio_gather_array (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_index, + int *procs_in_group, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i; + int rank; + char *ptmp; + OPAL_PTRDIFF_TYPE incr; + OPAL_PTRDIFF_TYPE extent, lb; + int err = OMPI_SUCCESS; + + rank = ompi_comm_rank (comm); + + /* Everyone but the writers sends data and returns. */ + if (procs_in_group[root_index] != rank) { + err = MCA_PML_CALL(send(sbuf, + scount, + sdtype, + procs_in_group[root_index], + OMPIO_TAG_GATHER, + MCA_PML_BASE_SEND_STANDARD, + comm)); + return err; + } + + /* writers, loop receiving the data. */ + opal_datatype_get_extent (&rdtype->super, &lb, &extent); + incr = extent * rcount; + + for (i = 0, ptmp = (char *) rbuf; + i < procs_per_group; + ++i, ptmp += incr) { + if (procs_in_group[i] == rank) { + if (MPI_IN_PLACE != sbuf) { + err = ompi_datatype_sndrcv (sbuf, + scount, + sdtype , + ptmp, + rcount, + rdtype); + } + else { + err = OMPI_SUCCESS; + } + } + else { + err = MCA_PML_CALL(recv(ptmp, + rcount, + rdtype, + procs_in_group[i], + OMPIO_TAG_GATHER, + comm, + MPI_STATUS_IGNORE)); + /* + for (k=0 ; k<4 ; k++) + printf ("RECV %p %d \n", + ((struct iovec *)ptmp)[k].iov_base, + ((struct iovec *)ptmp)[k].iov_len); + */ + } + + if (OMPI_SUCCESS != err) { + return err; + } + } + + /* All done */ + + return err; +} + +int ompi_io_ompio_bcast_array (void *buff, + int count, + ompi_datatype_t *datatype, + int root_index, + int *procs_in_group, + int procs_per_group, + ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + + rank = ompi_comm_rank (comm); + + /* Non-writers receive the data. */ + if (procs_in_group[root_index] != rank) { + err = MCA_PML_CALL(recv(buff, + count, + datatype, + procs_in_group[root_index], + OMPIO_TAG_BCAST, + comm, + MPI_STATUS_IGNORE)); + return err; + } + + /* Writers sends data to all others. */ + + + for (i=0 ; i +#include "io_ompio.h" + + +int ompi_io_ompio_allgatherv (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm) +{ + int err = OMPI_SUCCESS; + OPAL_PTRDIFF_TYPE extent, lb; + int i, rank; + char *send_buf = NULL; + struct ompi_datatype_t *newtype, *send_type; + + rank = ompi_comm_rank (comm); + + if (MPI_IN_PLACE == sbuf) { + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + send_type = rdtype; + send_buf = (char*)rbuf; + for (i = 0; i < rank%root_offset; ++i) { + send_buf += (rcounts[i] * extent); + } + } + else { + send_buf = (char*)sbuf; + send_type = sdtype; + } + + err = ompi_io_ompio_gatherv (send_buf, + rcounts[rank%root_offset], + send_type, + rbuf, + rcounts, + disps, + rdtype, + root_offset, + procs_per_group, + comm); + if (OMPI_SUCCESS != err) { + return err; + } + + err = ompi_datatype_create_indexed (procs_per_group, + rcounts, + disps, + rdtype, + &newtype); + if (MPI_SUCCESS != err) { + return err; + } + err = ompi_datatype_commit (&newtype); + if(MPI_SUCCESS != err) { + return err; + } + + ompi_io_ompio_bcast (rbuf, + 1, + newtype, + root_offset, + procs_per_group, + comm); + + ompi_datatype_destroy (&newtype); + + return OMPI_SUCCESS; +} + +int ompi_io_ompio_gatherv (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int *rcounts, + int *disps, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + char *ptmp; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (OMPIO_ROOT != rank%root_offset) { + if (scount > 0) { + return MCA_PML_CALL(send(sbuf, + scount, + sdtype, + (rank/root_offset)*root_offset, + OMPIO_TAG_GATHERV, + MCA_PML_BASE_SEND_STANDARD, + comm)); + } + return err; + } + + /* writer processes, loop receiving data from proceses + belonging to each corresponding root */ + + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + + for (i=rank ; i 0) { + err = MCA_PML_CALL(recv(ptmp, + rcounts[i%root_offset], + rdtype, + i, + OMPIO_TAG_GATHERV, + comm, + MPI_STATUS_IGNORE)); + } + } + + if (OMPI_SUCCESS != err) { + return err; + } + } + /* All done */ + + return err; +} + +int ompi_io_ompio_scatterv (void *sbuf, + int *scounts, + int *disps, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + char *ptmp; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (OMPIO_ROOT != rank%root_offset) { + if (rcount > 0) { + err = MCA_PML_CALL(recv(rbuf, + rcount, + rdtype, + (rank/root_offset)*root_offset, + OMPIO_TAG_SCATTERV, + comm, + MPI_STATUS_IGNORE)); + } + return err; + } + + /* writer processes, loop receiving data from proceses + belonging to each corresponding root */ + + err = opal_datatype_get_extent (&sdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + + for (i=rank ; i 0) { + err = MCA_PML_CALL(send(ptmp, + scounts[i%root_offset], + sdtype, + i, + OMPIO_TAG_SCATTERV, + MCA_PML_BASE_SEND_STANDARD, + comm)); + } + } + if (OMPI_SUCCESS != err) { + return err; + } + } + /* All done */ + + return err; +} + +int ompi_io_ompio_allgather (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm) +{ + int err = OMPI_SUCCESS; + int rank; + OPAL_PTRDIFF_TYPE extent, lb; + + rank = ompi_comm_rank (comm); + + if (((void *) 1) == sbuf && 0 != rank) { + err = opal_datatype_get_extent (&rdtype->super, &lb, &extent); + if (OMPI_SUCCESS != err) { + return OMPI_ERROR; + } + sbuf = ((char*) rbuf) + (rank * extent * rcount); + sdtype = rdtype; + scount = rcount; + } + + /* Gather and broadcast. */ + err = ompi_io_ompio_gather (sbuf, + scount, + sdtype, + rbuf, + rcount, + rdtype, + root_offset, + procs_per_group, + comm); + + if (OMPI_SUCCESS == err) { + err = ompi_io_ompio_bcast (rbuf, + rcount * procs_per_group, + rdtype, + root_offset, + procs_per_group, + comm); + } + /* All done */ + + return err; +} + +int ompi_io_ompio_gather (void *sbuf, + int scount, + ompi_datatype_t *sdtype, + void *rbuf, + int rcount, + ompi_datatype_t *rdtype, + int root_offset, + int procs_per_group, + struct ompi_communicator_t *comm) +{ + int i; + int rank; + char *ptmp; + OPAL_PTRDIFF_TYPE incr; + OPAL_PTRDIFF_TYPE extent, lb; + int err = OMPI_SUCCESS; + + rank = ompi_comm_rank (comm); + + /* Everyone but the writers sends data and returns. */ + if (OMPIO_ROOT != rank%root_offset) { + err = MCA_PML_CALL(send(sbuf, + scount, + sdtype, + (rank/root_offset)*root_offset, + OMPIO_TAG_GATHER, + MCA_PML_BASE_SEND_STANDARD, + comm)); + return err; + } + + /* writers, loop receiving the data. */ + opal_datatype_get_extent (&rdtype->super, &lb, &extent); + incr = extent * rcount; + + for (i = rank, ptmp = (char *) rbuf; + i < procs_per_group+rank; + ++i, ptmp += incr) { + if (i == rank) { + if (MPI_IN_PLACE != sbuf) { + err = ompi_datatype_sndrcv (sbuf, + scount, + sdtype , + ptmp, + rcount, + rdtype); + } + else { + err = OMPI_SUCCESS; + } + } + else { + err = MCA_PML_CALL(recv(ptmp, + rcount, + rdtype, + i, + OMPIO_TAG_GATHER, + comm, + MPI_STATUS_IGNORE)); + /* + for (k=0 ; k<4 ; k++) + printf ("RECV %p %d \n", + ((struct iovec *)ptmp)[k].iov_base, + ((struct iovec *)ptmp)[k].iov_len); + */ + } + + if (OMPI_SUCCESS != err) { + return err; + } + } + + /* All done */ + + return err; +} +int ompi_io_ompio_bcast (void *buff, + int count, + ompi_datatype_t *datatype, + int root_offset, + int procs_per_group, + ompi_communicator_t *comm) +{ + int i, rank; + int err = OMPI_SUCCESS; + + rank = ompi_comm_rank (comm); + + /* Non-writers receive the data. */ + if (OMPIO_ROOT != rank%root_offset) { + err = MCA_PML_CALL(recv(buff, + count, + datatype, + (rank/root_offset)*root_offset, + OMPIO_TAG_BCAST, + comm, + MPI_STATUS_IGNORE)); + return err; + } + + /* Writers sends data to all others. */ + + for (i=rank ; i= 0) { + mca_base_param_lookup_int (param, &mca_io_ompio_cycle_buffer_size); + } + param = mca_base_param_find ("io", NULL, "ompio_bytes_per_agg"); + if (param >= 0) { + mca_base_param_lookup_int (param, &mca_io_ompio_bytes_per_agg); + } + + priority_param = + mca_base_param_reg_int(&mca_io_ompio_component.io_version, + "priority", + "Priority of the io ompio component", + false, false, priority_param, NULL); + delete_priority_param = + mca_base_param_reg_int(&mca_io_ompio_component.io_version, + "delete_priority", + "Delete priority of the io ompio component", + false, false, delete_priority_param, NULL); + + mca_base_param_reg_string(&mca_io_ompio_component.io_version, + "version", + "Version of OMPIO", + false, true, NULL, NULL); + + mca_base_param_reg_int (&mca_io_ompio_component.io_version, + "cycle_buffer_size", + "Cycle Buffer Size of individual reads/writes", + false, false, mca_io_ompio_cycle_buffer_size, + &mca_io_ompio_cycle_buffer_size); + + mca_base_param_reg_int (&mca_io_ompio_component.io_version, + "bytes_per_agg", + "Bytes per aggregator process for automatic selection", + false, false, mca_io_ompio_bytes_per_agg, + &mca_io_ompio_bytes_per_agg); + + /* + mca_base_param_reg_string(&mca_io_ompio_component.io_version, + "user_configure_params", + "User-specified command line parameters passed to OMPIO's configure script", + false, true, + MCA_io_ompio_USER_CONFIGURE_FLAGS, NULL); + mca_base_param_reg_string(&mca_io_ompio_component.io_version, + "complete_configure_params", + "Complete set of command line parameters passed to OMPIO's configure script", + false, true, + MCA_io_ompio_COMPLETE_CONFIGURE_FLAGS, NULL); + */ + /* Create the mutex */ + OBJ_CONSTRUCT(&mca_io_ompio_mutex, opal_mutex_t); + + /* Create the list of pending requests */ + + OBJ_CONSTRUCT(&mca_io_ompio_pending_requests, opal_list_t); + + return OMPI_SUCCESS; +} + + +static int close_component(void) +{ + /* Destroy the list of pending requests */ + /* JMS: Good opprotunity here to list out all the IO requests that + were not destroyed / completed upon MPI_FINALIZE */ + + OBJ_DESTRUCT(&mca_io_ompio_pending_requests); + + OBJ_DESTRUCT(&mca_io_ompio_mutex); + + return OMPI_SUCCESS; +} + + +static int init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + return OMPI_SUCCESS; +} + + +static const struct mca_io_base_module_2_0_0_t * +file_query(struct ompi_file_t *file, + struct mca_io_base_file_t **private_data, + int *priority) +{ + mca_io_ompio_data_t *data; + + /* Lookup our priority */ + + if (OMPI_SUCCESS != mca_base_param_lookup_int(priority_param, + priority)) { + return NULL; + } + + /* Allocate a space for this module to hang private data (e.g., + the OMPIO file handle) */ + + data = malloc(sizeof(mca_io_ompio_data_t)); + if (NULL == data) { + return NULL; + } + + *private_data = (struct mca_io_base_file_t*) data; + + /* All done */ + + return &mca_io_ompio_module; +} + + +static int file_unquery(struct ompi_file_t *file, + struct mca_io_base_file_t *private_data) +{ + /* Free the ompio module-specific data that was allocated in + _file_query(), above */ + + if (NULL != private_data) { + free(private_data); + } + + return OMPI_SUCCESS; +} + + +static int delete_query(char *filename, struct ompi_info_t *info, + struct mca_io_base_delete_t **private_data, + bool *usable, int *priority) +{ + /* Lookup our priority */ + + if (OMPI_SUCCESS != mca_base_param_lookup_int(delete_priority_param, + priority)) { + return OMPI_ERROR; + } + + *usable = true; + *private_data = NULL; + + return OMPI_SUCCESS; +} + +static int delete_select(char *filename, struct ompi_info_t *info, + struct mca_io_base_delete_t *private_data) +{ + int ret; + + OPAL_THREAD_LOCK (&mca_io_ompio_mutex); + ret = mca_io_ompio_file_delete (filename, info); + OPAL_THREAD_UNLOCK (&mca_io_ompio_mutex); + + return ret; +} +/* +static int io_progress (void) +{ + return OMPI_SUCCESS; +} +*/ diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c new file mode 100644 index 0000000000..4a663df8f7 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "ompi/file/file.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +#include +#include "io_ompio.h" + +int +mca_io_ompio_file_open (ompi_communicator_t *comm, + char *filename, + int amode, + ompi_info_t *info, + ompi_file_t *fh) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + int remote_arch; + + if (&ompi_mpi_comm_null.comm == comm) { + ret = MPI_ERR_COMM; + goto fn_fail; + } + + if (OMPI_COMM_IS_INTER (comm)) { + ret = MPI_ERR_COMM; + goto fn_fail; + } + + if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) + + ((amode&MPI_MODE_WRONLY)?1:0) != 1 ) { + ret = MPI_ERR_AMODE; + goto fn_fail; + } + + if ((amode & MPI_MODE_RDONLY) && + ((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) { + ret = MPI_ERR_AMODE; + goto fn_fail; + } + + if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) { + ret = MPI_ERR_AMODE; + goto fn_fail; + } + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + data->ompio_fh.f_iov_type = MPI_DATATYPE_NULL; + ompi_io_ompio_set_file_defaults (&data->ompio_fh); + + ompi_comm_dup (comm, &data->ompio_fh.f_comm); + data->ompio_fh.f_rank = ompi_comm_rank (fh->f_comm); + data->ompio_fh.f_size = ompi_comm_size (fh->f_comm); + + data->ompio_fh.f_filename = fh->f_filename; + if (NULL == data->ompio_fh.f_filename) { + ret = OMPI_ERROR; + goto fn_fail; + } + + data->ompio_fh.f_amode = amode; + data->ompio_fh.f_info = fh->f_info; + data->ompio_fh.f_atomicity = 0; + /* + if (MPI_INFO_NULL != info) + { + ret = ompi_info_dup (info, &data->ompio_fh.f_info); + } + if (OMPI_SUCCESS != ret) + { + goto fn_fail; + } + */ + remote_arch = opal_local_arch; + data->ompio_fh.f_convertor = opal_convertor_create (remote_arch, 0); + + data->ompio_fh.f_fstype = 0; + ompi_io_ompio_resolve_fs_type (&data->ompio_fh, &data->ompio_fh.f_fstype); + + if (OMPI_SUCCESS != (ret = mca_fs_base_file_select (&data->ompio_fh, + NULL))) { + opal_output(1, "mca_fs_base_file_select() failed\n"); + goto fn_fail; + } + if (OMPI_SUCCESS != (ret = mca_fbtl_base_file_select (&data->ompio_fh, + NULL))) { + opal_output(1, "mca_fbtl_base_file_select() failed\n"); + goto fn_fail; + } + + if (OMPI_SUCCESS != (ret = mca_fcoll_base_file_select (&data->ompio_fh, + NULL))) { + opal_output(1, "mca_fcoll_base_file_select() failed\n"); + goto fn_fail; + } + + ret = data->ompio_fh.f_fs->fs_file_open (comm, + filename, + amode, + info, + &data->ompio_fh); + if (ret != OMPI_SUCCESS) { + goto fn_fail; + } + + fh->f_flags |= OMPIO_FILE_IS_OPEN; + + return OMPI_SUCCESS; + + fn_fail: + opal_output(1, "OPENING FILE \"%s\" FAILED\n", filename); + return OMPI_ERROR; +} + +int +mca_io_ompio_file_close (ompi_file_t *fh) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh.f_fs->fs_file_close (&data->ompio_fh); + + mca_fs_base_file_unselect (&data->ompio_fh); + mca_fbtl_base_file_unselect (&data->ompio_fh); + /*mca_fcache_base_file_unselect (&data->ompio_fh);*/ + mca_fcoll_base_file_unselect (&data->ompio_fh); + + if (NULL != data->ompio_fh.f_io_array) { + free (data->ompio_fh.f_io_array); + data->ompio_fh.f_io_array = NULL; + } + + if (NULL != data->ompio_fh.f_procs_in_group) { + free (data->ompio_fh.f_procs_in_group); + data->ompio_fh.f_procs_in_group = NULL; + } + + if (NULL != data->ompio_fh.f_decoded_iov) { + free (data->ompio_fh.f_decoded_iov); + data->ompio_fh.f_decoded_iov = NULL; + } + + if (NULL != data->ompio_fh.f_convertor) { + free (data->ompio_fh.f_convertor); + data->ompio_fh.f_convertor = NULL; + } + + if (NULL != data->ompio_fh.f_datarep) { + free (data->ompio_fh.f_datarep); + data->ompio_fh.f_datarep = NULL; + } + + if (MPI_DATATYPE_NULL != data->ompio_fh.f_iov_type) { + ompi_datatype_destroy (&data->ompio_fh.f_iov_type); + } + + if (NULL != data->ompio_fh.f_comm) { + OBJ_RELEASE(data->ompio_fh.f_comm); + data->ompio_fh.f_comm = NULL; + } + /* + if (MPI_DATATYPE_NULL != data->ompio_fh.f_etype) + { + ompi_datatype_destroy (&data->ompio_fh.f_etype); + } + if (MPI_DATATYPE_NULL != data->ompio_fh.f_filetype) + { + ompi_datatype_destroy (&data->ompio_fh.f_filetype); + } + */ + /* + if (NULL != data->ompio_fh.f_filename) + { + free (data->ompio_fh.f_filename); + data->ompio_fh.f_filename = NULL; + } + + if (MPI_INFO_NULL != data->ompio_fh.f_info) + { + ompi_info_free (&data->ompio_fh.f_info); + } + if (MPI_COMM_NULL != data->ompio_fh.f_comm) + { + ompi_comm_free (&data->ompio_fh.f_comm); + } + */ + + return ret; +} + +int mca_io_ompio_file_delete (char *filename, + struct ompi_info_t *info) +{ + int ret = OMPI_SUCCESS; + + ret = unlink(filename); + + if (0 > ret) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +int +mca_io_ompio_file_preallocate (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE diskspace) +{ + int ret = OMPI_SUCCESS, cycles, i; + OMPI_MPI_OFFSET_TYPE tmp, current_size, size, written, len; + mca_io_ompio_data_t *data; + char *buf = NULL; + ompi_status_public_t *status = NULL; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + tmp = diskspace; + + data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, + 1, + MPI_LONG_LONG, + OMPIO_ROOT, + data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_bcast_module); + + if (tmp != diskspace) { + return OMPI_ERROR; + } + + /* ROMIO explanation + On file systems with no preallocation function, we have to + explicitly write to allocate space. Since there could be holes in the file, + we need to read up to the current file size, write it back, + and then write beyond that depending on how much + preallocation is needed. + */ + if (OMPIO_ROOT == data->ompio_fh.f_rank) { + ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, + ¤t_size); + + size = diskspace; + if (size > current_size) { + size = current_size; + } + + cycles = (size + OMPIO_PREALLOC_MAX_BUF_SIZE - 1)/ + OMPIO_PREALLOC_MAX_BUF_SIZE; + buf = (char *) malloc (OMPIO_PREALLOC_MAX_BUF_SIZE); + if (NULL == buf) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + written = 0; + + for (i=0; i size-written) { + len = size - written; + } + ret = mca_io_ompio_file_read (fh, buf, len, MPI_BYTE, status); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + ret = mca_io_ompio_file_write (fh, buf, len, MPI_BYTE, status); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + written += len; + } + + if (diskspace > current_size) { + memset(buf, 0, OMPIO_PREALLOC_MAX_BUF_SIZE); + size = diskspace - current_size; + cycles = (size + OMPIO_PREALLOC_MAX_BUF_SIZE - 1) / + OMPIO_PREALLOC_MAX_BUF_SIZE; + for (i=0; i diskspace-written) { + len = diskspace - written; + } + ret = mca_io_ompio_file_write (fh, buf, len, MPI_BYTE, status); + if (ret != OMPI_SUCCESS) { + return OMPI_ERROR; + } + written += len; + } + } + if (NULL != buf) { + free (buf); + buf = NULL; + } + } + fh->f_comm->c_coll.coll_barrier (fh->f_comm, + fh->f_comm->c_coll.coll_barrier_module); + return ret; +} + +int +mca_io_ompio_file_set_size (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE size) +{ + int ret = OMPI_SUCCESS; + OMPI_MPI_OFFSET_TYPE tmp; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + tmp = size; + + data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, + 1, + MPI_LONG_LONG, + OMPIO_ROOT, + data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_bcast_module); + + if (tmp != size) { + return OMPI_ERROR; + } + + ret = data->ompio_fh.f_fs->fs_file_set_size (&data->ompio_fh, size); + + return ret; +} + +int +mca_io_ompio_file_get_size (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *size) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, size); + + return ret; +} + + +int +mca_io_ompio_file_get_amode (ompi_file_t *fh, + int *amode) +{ + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + *amode = data->ompio_fh.f_amode; + + return OMPI_SUCCESS; +} + + +int +mca_io_ompio_file_set_info (ompi_file_t *fh, + ompi_info_t *info) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh.f_fs->fs_file_set_info (&data->ompio_fh, info); + + return ret; +} + + +int +mca_io_ompio_file_get_info (ompi_file_t *fh, + ompi_info_t ** info_used) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = ompi_info_dup (data->ompio_fh.f_info, info_used); + + return ret; +} + +int +mca_io_ompio_file_get_type_extent (ompi_file_t *fh, + struct ompi_datatype_t *datatype, + MPI_Aint *extent) +{ + opal_datatype_type_extent (&datatype->super, extent); + return OMPI_SUCCESS; +} + + +int +mca_io_ompio_file_set_atomicity (ompi_file_t *fh, + int flag) +{ + int tmp; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + if (flag) { + flag = 1; + } + + /* check if the atomicity flag is the same on all processes */ + tmp = flag; + data->ompio_fh.f_comm->c_coll.coll_bcast (&tmp, + 1, + MPI_INT, + OMPIO_ROOT, + data->ompio_fh.f_comm, + data->ompio_fh.f_comm->c_coll.coll_bcast_module); + + if (tmp != flag) { + return OMPI_ERROR; + } + + data->ompio_fh.f_atomicity = flag; + + return OMPI_SUCCESS; +} + +int +mca_io_ompio_file_get_atomicity (ompi_file_t *fh, + int *flag) +{ + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + *flag = data->ompio_fh.f_atomicity; + + return OMPI_SUCCESS; +} + +int +mca_io_ompio_file_sync (ompi_file_t *fh) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh.f_fs->fs_file_sync (&data->ompio_fh); + + return ret; +} + + +int +mca_io_ompio_file_seek (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE off, + int whence) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + OMPI_MPI_OFFSET_TYPE offset, temp_offset; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + offset = off * data->ompio_fh.f_etype_size; + + switch(whence) { + case MPI_SEEK_SET: + if (offset < 0) { + return OMPI_ERROR; + } + break; + case MPI_SEEK_CUR: + offset += data->ompio_fh.f_position_in_file_view; + offset += data->ompio_fh.f_disp; + if (offset < 0) { + return OMPI_ERROR; + } + break; + case MPI_SEEK_END: + ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, + &temp_offset); + offset += temp_offset; + if (offset < 0 || OMPI_SUCCESS != ret) { + return OMPI_ERROR; + } + break; + default: + return OMPI_ERROR; + } + + /*printf ("seeking to: %lld \n", offset);*/ + ret = ompi_io_ompio_set_explicit_offset (&data->ompio_fh, + offset/data->ompio_fh.f_etype_size); + return ret; +} + +int +mca_io_ompio_file_get_position (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE *offset) +{ + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + *offset = data->ompio_fh.f_position_in_file_view / data->ompio_fh.f_etype_size; + + return OMPI_SUCCESS; +} + + +int +mca_io_ompio_file_get_byte_offset (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + OMPI_MPI_OFFSET_TYPE *disp) +{ + mca_io_ompio_data_t *data; + int i, k, index; + size_t position; + size_t total_bytes; + size_t temp_offset; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + temp_offset = data->ompio_fh.f_view_extent * + (offset*data->ompio_fh.f_etype_size / data->ompio_fh.f_view_size); + + position = 0; + total_bytes = (offset*data->ompio_fh.f_etype_size) % data->ompio_fh.f_view_size; + index = 0; + i = total_bytes; + k = 0; + + while (1) { + k += data->ompio_fh.f_decoded_iov[index].iov_len; + if (i >= k) { + i = i - data->ompio_fh.f_decoded_iov[index].iov_len; + position += data->ompio_fh.f_decoded_iov[index].iov_len; + index = index+1; + } + else { + break; + } + } + + *disp = data->ompio_fh.f_disp + temp_offset + + (OMPI_MPI_OFFSET_TYPE)data->ompio_fh.f_decoded_iov[index].iov_base; + + return OMPI_SUCCESS; +} + +int +mca_io_ompio_file_seek_shared (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + int whence) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + + +int +mca_io_ompio_file_get_position_shared (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE * offset) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} diff --git a/ompi/mca/io/ompio/io_ompio_file_read.c b/ompi/mca/io/ompio/io_ompio_file_read.c new file mode 100644 index 0000000000..837c3c6488 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_file_read.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "ompi/file/file.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/fcoll/dynamic/fcoll_dynamic.h" +#include "ompi/mca/fcoll/static/fcoll_static.h" +#include "ompi/mca/fcoll/individual/fcoll_individual.h" +#include "ompi/mca/fcoll/two_phase/fcoll_two_phase.h" +#include "ompi/mca/fcoll/ylib/fcoll_ylib.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +#include "io_ompio.h" +#include "math.h" + +int +mca_io_ompio_file_read (ompi_file_t *fp, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + size_t total_bytes_read = 0; /* total bytes that have been read*/ + size_t bytes_to_read_in_cycle = 0; /* left to be read in a cycle*/ + size_t bytes_per_cycle = 0; /* total read in each cycle by each process*/ + int index = 0; + int cycles = 0; + + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + size_t max_data = 0; + int i = 0; /* index into the decoded iovec of the buffer */ + int j = 0; /* index into the file vie iovec */ + int k = 0; /* index into the io_array */ + size_t sum_previous_counts = 0; + size_t sum_previous_length = 0; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + cycles = ceil((float)max_data/bytes_per_cycle); + +#if 0 + printf ("Bytes per Cycle: %d Cycles: %d\n",bytes_per_cycle, cycles); +#endif + + sum_previous_length = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + + for (index = 0; index < cycles; index++) { + OPAL_PTRDIFF_TYPE disp; + int block = 1; + k = 0; + if ((index == cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_read_in_cycle = max_data % bytes_per_cycle; + } + else { + bytes_to_read_in_cycle = bytes_per_cycle; + } + + /* + ompi_io_ompio_create_list (fh->f_decoded_iov, fh->f_iov_count, + decoded_iov, iov_count, + &total_bytes_read, &bytes_to_read_in_cycle, + &sum_previous_counts, &sum_previous_length, + &decoded_iov_index, &fview_iov_index, + &fh->f_io_array, &fh->f_num_of_io_entries); + */ + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_read_in_cycle) { + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + block * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts) <= 0) { + sum_previous_counts += decoded_iov[i].iov_len; + i = i + 1; + } + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + (total_bytes_read - sum_previous_counts); + fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; + + if (decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts) >= + bytes_to_read_in_cycle) { + fh->f_io_array[k].length = bytes_to_read_in_cycle; + } + else { + fh->f_io_array[k].length = decoded_iov[i].iov_len - + (total_bytes_read - sum_previous_counts); + } + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) <= 0) { + sum_previous_length += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_length = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + (fh->f_total_bytes - sum_previous_length); + fh->f_io_array[k].offset = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) + < fh->f_io_array[k].length) { + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length); + } + } + + total_bytes_read += fh->f_io_array[k].length; + fh->f_total_bytes += fh->f_io_array[k].length; + bytes_to_read_in_cycle -= fh->f_io_array[k].length; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_num_of_io_entries = k; + +#if 0 + if (fh->f_rank == 0) { + int i; + printf("*************************** %d\n", fh->f_num_of_io_entries); + + for (i=0 ; if_num_of_io_entries ; i++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[i].memory_address, + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + } + } +#endif + + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_preadv (fh, NULL); + } + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + } + + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + + return ret; +} + +int +mca_io_ompio_file_read_at (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + mca_io_ompio_file_read (fh, + buf, + count, + datatype, + status); + return ret; +} + +int +mca_io_ompio_file_read_all (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all (&data->ompio_fh, + buf, + count, + datatype, + status); + + return ret; +} + +int +mca_io_ompio_file_read_all_begin (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all_begin (&data->ompio_fh, + buf, + count, + datatype); + + return ret; +} + +int +mca_io_ompio_file_read_all_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all_end (&data->ompio_fh, + buf, + status); + + return ret; +} + +int +mca_io_ompio_file_read_at_all (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all (&data->ompio_fh, + buf, + count, + datatype, + status); + return ret; +} + +int +mca_io_ompio_file_read_at_all_begin (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all_begin (&data->ompio_fh, + buf, + count, + datatype); + + return ret; +} + +int +mca_io_ompio_file_read_at_all_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_read_all_end (&data->ompio_fh, + buf, + status); + + return ret; +} + +int +mca_io_ompio_file_iread (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_iread_at (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_read_shared (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_iread_shared (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_read_ordered (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_read_ordered_begin (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_read_ordered_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + + + /* + if (! (fh->f_flags & OMPIO_AGGREGATOR_IS_SET)) { + if (!strcmp (data->ompio_fh.f_fcoll_component->mca_component_name, + "dynamic")) { + printf ("Dynamic: %d\n",mca_fcoll_dynamic_num_io_procs); + ompi_io_ompio_set_aggregator_props (&data->ompio_fh, + mca_fcoll_dynamic_num_io_procs); + } + else if (!strcmp (data->ompio_fh.f_fcoll_component->mca_component_name, + "static")) { + printf ("Static: %d\n",mca_fcoll_static_num_io_procs); + ompi_io_ompio_set_aggregator_props (&data->ompio_fh, + mca_fcoll_static_num_io_procs); + } + else if (!strcmp (data->ompio_fh.f_fcoll_component->mca_component_name, + "two_phase")) { + printf ("Two Phase: %d\n",mca_fcoll_two_phase_num_io_procs); + ompi_io_ompio_set_aggregator_props (&data->ompio_fh, + mca_fcoll_two_phase_num_io_procs); + mca_fcoll_two_phase_num_io_procs = + ceil((float)data->ompio_fh.f_size/data->ompio_fh.f_procs_per_group); + data->ompio_fh.f_aggregator_index = + ceil((float)data->ompio_fh.f_size/mca_fcoll_two_phase_num_io_procs); + } + else if (!strcmp (data->ompio_fh.f_fcoll_component->mca_component_name, + "ylib")) { + ompi_io_ompio_set_aggregator_props (&data->ompio_fh, + mca_fcoll_ylib_num_io_procs); + mca_fcoll_ylib_num_io_procs = + ceil((float)data->ompio_fh.f_size/data->ompio_fh.f_procs_per_group); + data->ompio_fh.f_aggregator_index = + ceil((float)data->ompio_fh.f_size/mca_fcoll_ylib_num_io_procs); + } + } + */ diff --git a/ompi/mca/io/ompio/io_ompio_file_set_view.c b/ompi/mca/io/ompio/io_ompio_file_set_view.c new file mode 100644 index 0000000000..85b0cd82ad --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_file_set_view.c @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "ompi/file/file.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" + +#include "opal/datatype/opal_convertor.h" +#include "ompi/datatype/ompi_datatype.h" +#include +#include + +#include +#include "io_ompio.h" + +OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *); + +int mca_io_ompio_file_set_view (ompi_file_t *fp, + OMPI_MPI_OFFSET_TYPE disp, + ompi_datatype_t *etype, + ompi_datatype_t *filetype, + char *datarep, + ompi_info_t *info) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + size_t max_data = 0; + MPI_Aint lb,ub; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + if (NULL != fh->f_decoded_iov) { + free (fh->f_decoded_iov); + fh->f_decoded_iov = NULL; + } + + if (NULL != fh->f_datarep) { + free (fh->f_datarep); + fh->f_datarep = NULL; + } + + fh->f_flags |= OMPIO_FILE_VIEW_IS_SET; + fh->f_disp = disp; + fh->f_offset += disp; + fh->f_datarep = strdup (datarep); + + fh->f_iov_count = 0; + + if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) { + if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1)) { + fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW; + } + } + + ompi_io_ompio_decode_datatype (fh, + filetype, + 1, + NULL, + &max_data, + &fh->f_decoded_iov, + &fh->f_iov_count); + /* + if (0 == fh->f_rank) { + int i; + printf ("%d Entries: \n",fh->f_iov_count); + for (i=0 ; if_iov_count ; i++) { + printf ("\t{%p, %lld}\n", + fh->f_decoded_iov[i].iov_base, + fh->f_decoded_iov[i].iov_len); + } + } + */ + /* + * Create a derived datatype for the created iovec + + types[0] = &ompi_mpi_long.dt; + types[1] = &ompi_mpi_long.dt; + MPI_Address( fh->f_decoded_iov, d); + MPI_Address( &fh->f_decoded_iov[0].iov_len, d+1); + base = d[0]; + for (i=0 ; i<2 ; i++) { + d[i] -= base; + } + ompi_datatype_create_struct (2, + blocklen, + d, + types, + &fh->f_iov_type); + ompi_datatype_commit (&fh->f_iov_type); + */ + opal_datatype_get_extent(&filetype->super, &lb, &fh->f_view_extent); + opal_datatype_type_ub (&filetype->super, &ub); + opal_datatype_type_size (&etype->super, &fh->f_etype_size); + opal_datatype_type_size (&filetype->super, &fh->f_view_size); + ompi_datatype_duplicate (etype, &fh->f_etype); + ompi_datatype_duplicate (filetype, &fh->f_filetype); + + fh->f_cc_size = get_contiguous_chunk_size (fh); + /* + mca_fcoll_base_param = mca_base_param_find("fcoll", NULL, NULL); + mca_base_param_lookup_string (mca_fcoll_base_param, &names); + + if (NULL == names) { + if ((int)cc_size >= mca_io_ompio_bytes_per_agg && + cc_size >= fh->f_stripe_size) { + mca_base_param_set_string(mca_fcoll_base_param, "individual"); + } + if ((int)cc_size < mca_io_ompio_bytes_per_agg && + cc_size >= fh->f_stripe_size) { + mca_base_param_set_string(mca_fcoll_base_param, "dynamic"); + } + else if ((int)cc_size < mca_io_ompio_bytes_per_agg && + cc_size < fh->f_stripe_size) { + mca_base_param_set_string(mca_fcoll_base_param, "two_phase"); + } + } + */ + if (OMPI_SUCCESS != mca_fcoll_base_file_select (&data->ompio_fh, + NULL)) { + opal_output(1, "mca_fcoll_base_file_select() failed\n"); + return OMPI_ERROR; + } + /* + printf ("%d: LB=%d UB=%d Extent=%d Size=%d\n", + fh->f_rank,lb,ub,fh->f_view_extent,fh->f_view_size); + */ + /* + ompi_ddt_type_extent (fh->f_etype, &fh->f_etype_extent); + ompi_ddt_type_extent (fh->f_filetype, &fh->f_filetype_extent); + */ + + return OMPI_SUCCESS; +} + +int mca_io_ompio_file_get_view (struct ompi_file_t *fp, + OMPI_MPI_OFFSET_TYPE *disp, + struct ompi_datatype_t **etype, + struct ompi_datatype_t **filetype, + char *datarep) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + *disp = fh->f_disp; + ompi_datatype_duplicate (fh->f_etype, etype); + ompi_datatype_duplicate (fh->f_filetype, filetype); + strcpy (datarep, fh->f_datarep); + + return OMPI_SUCCESS; +} + +OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) +{ + int uniform = 1; + int i = 0; + OMPI_MPI_OFFSET_TYPE avg = 0; + OMPI_MPI_OFFSET_TYPE global_avg = 0; + + for (i=0 ; i<(int)fh->f_iov_count ; i++) { + avg += fh->f_decoded_iov[i].iov_len; + if (i && uniform) { + if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) { + uniform = 0; + } + } + } + avg = avg/fh->f_iov_count; + fh->f_comm->c_coll.coll_allreduce (&avg, + &global_avg, + 1, + MPI_LONG, + MPI_SUM, + fh->f_comm, + fh->f_comm->c_coll.coll_allreduce_module); + global_avg = global_avg/fh->f_size; + + if (global_avg == avg && uniform) { + fh->f_flags |= OMPIO_UNIFORM_FVIEW; + } + + return global_avg; +} + + + + /* + opal_convertor_clone (fh->f_convertor, &convertor, 0); + + if (OMPI_SUCCESS != opal_convertor_prepare_for_send (&convertor, filetype, 1, NULL)) + { + printf ("Cannot attach the datatype to a convertor\n"); + return OMPI_ERROR; + } + + remaining_length = 1 * filetype->size; + + printf ("FILETYPE SIZE: %d\n",filetype->size); + while (0 == opal_convertor_raw(&convertor, fh->f_decoded_iov, &fh->f_iovec_count, &max_data)) + { +#if 1 + printf ("New raw extraction (fh->f_iovec_count = %d, max_data = %d)\n", + fh->f_iovec_count, max_data); + for (i = 0; i < fh->f_iovec_count; i++) + { + printf ("\t{%p, %d}\n", fh->f_decoded_iov[i].iov_base, fh->f_decoded_iov[i].iov_len); + } +#endif + remaining_length -= max_data; + fh->f_iovec_count = iov_num; + } +#if 1 + printf ("LAST extraction (fh->f_iovec_count = %d, max_data = %d)\n", + fh->f_iovec_count, max_data); + for (i = 0; i < fh->f_iovec_count; i++) + { + printf ("\t{%p, %d}\n", fh->f_decoded_iov[i].iov_base, fh->f_decoded_iov[i].iov_len); + } +#endif + + remaining_length -= max_data; + + if (remaining_length != 0) { + printf( "Not all raw description was been extracted (%lu bytes missing)\n", + (unsigned long) remaining_length ); + } + */ + +/* + + ompi_datatype_t *pdt; + struct iovec *iov; + int iov_count = OMPIO_IOVEC_INITIAL_SIZE; + + remote_arch = ompi_mpi_local_arch; + ompi_ddt_create_vector( 10,1,2, MPI_INT, &pdt ); + ompi_ddt_commit( &pdt ); + + iov = (struct iovec*)malloc(iov_num * sizeof(struct iovec)); + + opal_convertor_clone( fh->f_convertor, &convertor, 0 ); + + if( OMPI_SUCCESS != opal_convertor_prepare_for_send( &convertor, pdt, 1, NULL ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OMPI_ERROR; + } + + remaining_length = 1 * pdt->size; + printf ("PDT SIZE: %d\n",pdt->size); + + while ( 0 == opal_convertor_raw(&convertor, iov, &iov_count, &max_data) ) { + printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", + iov_count, max_data ); + for (i = 0; i < iov_count; i++) { + printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); + } + remaining_length -= max_data; + iov_count = iov_num; + } + printf( "LAST Extraction (iov_count = %d, max_data = %zu)\n", + iov_count, max_data ); + for (i = 0; i < iov_count; i++) { + printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); + } + + remaining_length -= max_data; + + if( remaining_length != 0 ) { + printf( "Not all raw description was been extracted (%lu bytes missing)\n", + (unsigned long) remaining_length ); + } + + sleep(3); + exit(0); +*/ diff --git a/ompi/mca/io/ompio/io_ompio_file_write.c b/ompi/mca/io/ompio/io_ompio_file_write.c new file mode 100644 index 0000000000..beec89f8e1 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_file_write.c @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/info/info.h" +#include "ompi/file/file.h" +#include "ompi/mca/fs/fs.h" +#include "ompi/mca/fs/base/base.h" +#include "ompi/mca/fcoll/fcoll.h" +#include "ompi/mca/fcoll/base/base.h" +#include "ompi/mca/fcoll/dynamic/fcoll_dynamic.h" +#include "ompi/mca/fcoll/static/fcoll_static.h" +#include "ompi/mca/fcoll/individual/fcoll_individual.h" +#include "ompi/mca/fcoll/two_phase/fcoll_two_phase.h" +#include "ompi/mca/fcoll/ylib/fcoll_ylib.h" +#include "ompi/mca/fbtl/fbtl.h" +#include "ompi/mca/fbtl/base/base.h" + +#include "io_ompio.h" +#include "math.h" +#include + +int +mca_io_ompio_file_write (ompi_file_t *fp, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + size_t total_bytes_written = 0; /* total bytes that have been written*/ + size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t bytes_per_cycle = 0; /* total written in each cycle by each process*/ + int index = 0; + int cycles = 0; + + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + size_t max_data = 0; + int i = 0; /* index into the decoded iovec of the buffer */ + int j = 0; /* index into the file vie iovec */ + int k = 0; /* index into the io_array */ + size_t sum_previous_counts = 0; + size_t sum_previous_length = 0; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + + bytes_per_cycle = mca_io_ompio_cycle_buffer_size; + cycles = ceil((float)max_data/bytes_per_cycle); + +#if 0 + printf ("Bytes per Cycle: %d Cycles: %d\n",bytes_per_cycle, cycles); +#endif + + sum_previous_length = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + + for (index = 0; index < cycles; index++) { + OPAL_PTRDIFF_TYPE disp; + int block = 1; + + k = 0; + if ((index == cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_write_in_cycle = max_data % bytes_per_cycle; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write_in_cycle) { + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + block * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) <= 0) { + sum_previous_counts += decoded_iov[i].iov_len; + i = i + 1; + } + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + (total_bytes_written - sum_previous_counts); + fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) >= + bytes_to_write_in_cycle) { + fh->f_io_array[k].length = bytes_to_write_in_cycle; + } + else { + fh->f_io_array[k].length = decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts); + } + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) <= 0) { + sum_previous_length += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_length = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + (fh->f_total_bytes - sum_previous_length); + fh->f_io_array[k].offset = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) + < fh->f_io_array[k].length) { + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length); + } + } + + total_bytes_written += fh->f_io_array[k].length; + fh->f_total_bytes += fh->f_io_array[k].length; + bytes_to_write_in_cycle -= fh->f_io_array[k].length; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_num_of_io_entries = k; + +#if 0 + if (fh->f_rank == 0) { + int d; + printf("*************************** %d\n", fh->f_num_of_io_entries); + + for (d=0 ; df_num_of_io_entries ; d++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[d].memory_address, + fh->f_io_array[d].offset, + fh->f_io_array[d].length); + } + } +#endif + + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_pwritev (fh, NULL); + } + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + } + + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + + return ret; +} + +int +mca_io_ompio_file_write_at (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = mca_io_ompio_file_write (fh, + buf, + count, + datatype, + status); + + return ret; +} + +int +mca_io_ompio_file_write_all (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all (&data->ompio_fh, + buf, + count, + datatype, + status); + + return ret; +} + +int +mca_io_ompio_file_write_all_begin (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all_begin (&data->ompio_fh, + buf, + count, + datatype); + + return ret; +} + +int +mca_io_ompio_file_write_all_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all_end (&data->ompio_fh, + buf, + status); + + return ret; +} + +int +mca_io_ompio_file_write_at_all (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all (&data->ompio_fh, + buf, + count, + datatype, + status); + return ret; +} + +int +mca_io_ompio_file_write_at_all_begin (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all_begin (&data->ompio_fh, + buf, + count, + datatype); + + return ret; +} + +int +mca_io_ompio_file_write_at_all_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ret = data->ompio_fh. + f_fcoll->fcoll_file_write_all_end (&data->ompio_fh, + buf, + status); + + return ret; +} + +int +mca_io_ompio_file_iwrite (ompi_file_t *fp, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + size_t total_bytes_written = 0; /* total bytes that have been written*/ + size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/ + size_t bytes_per_cycle = 0; /* total written in each cycle by each process*/ + int index = 0; + int cycles = 0; + + uint32_t iov_count = 0; + struct iovec *decoded_iov = NULL; + + size_t max_data = 0; + int i = 0; /* index into the decoded iovec of the buffer */ + int j = 0; /* index into the file vie iovec */ + int k = 0; /* index into the io_array */ + size_t sum_previous_counts = 0; + size_t sum_previous_length = 0; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + &max_data, + &decoded_iov, + &iov_count); + + bytes_per_cycle = max_data; + cycles = ceil((float)max_data/bytes_per_cycle); + +#if 0 + printf ("Bytes per Cycle: %d Cycles: %d\n",bytes_per_cycle, cycles); +#endif + + sum_previous_length = fh->f_position_in_file_view; + j = fh->f_index_in_file_view; + + for (index = 0; index < cycles; index++) { + OPAL_PTRDIFF_TYPE disp; + int block = 1; + + k = 0; + if ((index == cycles-1) && (max_data % bytes_per_cycle)) { + bytes_to_write_in_cycle = max_data % bytes_per_cycle; + } + else { + bytes_to_write_in_cycle = bytes_per_cycle; + } + + fh->f_io_array = (mca_io_ompio_io_array_t *)malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + while (bytes_to_write_in_cycle) { + /* reallocate if needed */ + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * + block * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) <= 0) { + sum_previous_counts += decoded_iov[i].iov_len; + i = i + 1; + } + + disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base + + (total_bytes_written - sum_previous_counts); + fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp; + + if (decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts) >= + bytes_to_write_in_cycle) { + fh->f_io_array[k].length = bytes_to_write_in_cycle; + } + else { + fh->f_io_array[k].length = decoded_iov[i].iov_len - + (total_bytes_written - sum_previous_counts); + } + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) <= 0) { + sum_previous_length += fh->f_decoded_iov[j].iov_len; + j = j + 1; + if (j == (int)fh->f_iov_count) { + j = 0; + sum_previous_length = 0; + fh->f_offset += fh->f_view_extent; + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_total_bytes = 0; + } + } + } + + disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base + + (fh->f_total_bytes - sum_previous_length); + fh->f_io_array[k].offset = (IOVBASE_TYPE *)(disp + fh->f_offset); + + if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) { + if (fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length) + < fh->f_io_array[k].length) { + fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len - + (fh->f_total_bytes - sum_previous_length); + } + } + + total_bytes_written += fh->f_io_array[k].length; + fh->f_total_bytes += fh->f_io_array[k].length; + bytes_to_write_in_cycle -= fh->f_io_array[k].length; + k = k + 1; + } + fh->f_position_in_file_view = sum_previous_length; + fh->f_index_in_file_view = j; + fh->f_num_of_io_entries = k; + +#if 0 + if (fh->f_rank == 0) { + int d; + printf("*************************** %d\n", fh->f_num_of_io_entries); + + for (d=0 ; df_num_of_io_entries ; d++) { + printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n", + fh->f_io_array[d].memory_address, + fh->f_io_array[d].offset, + fh->f_io_array[d].length); + } + } +#endif + + if (fh->f_num_of_io_entries) { + fh->f_fbtl->fbtl_ipwritev (fh, NULL, request); + } + + fh->f_num_of_io_entries = 0; + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + } + + if (NULL != decoded_iov) { + free (decoded_iov); + decoded_iov = NULL; + } + + return ret; +} + +int +mca_io_ompio_file_iwrite_at (ompi_file_t *fh, + OMPI_MPI_OFFSET_TYPE offset, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + ompi_io_ompio_set_explicit_offset (&data->ompio_fh, offset); + + ret = mca_io_ompio_file_iwrite (fh, + buf, + count, + datatype, + request); + return ret; +} + +int +mca_io_ompio_file_write_shared (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_iwrite_shared (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_request_t **request) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_write_ordered (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype, + ompi_status_public_t * status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_write_ordered_begin (ompi_file_t *fh, + void *buf, + int count, + struct ompi_datatype_t *datatype) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} + +int +mca_io_ompio_file_write_ordered_end (ompi_file_t *fh, + void *buf, + ompi_status_public_t *status) +{ + int ret = OMPI_SUCCESS; + mca_io_ompio_data_t *data; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + + return ret; +} diff --git a/ompi/mca/io/ompio/io_ompio_module.c b/ompi/mca/io/ompio/io_ompio_module.c new file mode 100644 index 0000000000..5f10cfa162 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_module.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/threads/mutex.h" +#include "ompi/mca/io/io.h" +#include "io_ompio.h" + +/* + * The OMPIO module operations + */ +mca_io_base_module_2_0_0_t mca_io_ompio_module = { + + mca_io_ompio_file_open, + mca_io_ompio_file_close, + + mca_io_ompio_file_set_size, + mca_io_ompio_file_preallocate, + mca_io_ompio_file_get_size, + mca_io_ompio_file_get_amode, + mca_io_ompio_file_set_info, + mca_io_ompio_file_get_info, + + mca_io_ompio_file_set_view, + mca_io_ompio_file_get_view, + + /* Index IO operations */ + mca_io_ompio_file_read_at, + mca_io_ompio_file_read_at_all, + mca_io_ompio_file_write_at, + mca_io_ompio_file_write_at_all, + + mca_io_ompio_file_iread_at, + mca_io_ompio_file_iwrite_at, + + /* non-indexed IO operations */ + mca_io_ompio_file_read, + mca_io_ompio_file_read_all, + mca_io_ompio_file_write, + mca_io_ompio_file_write_all, + + mca_io_ompio_file_iread, + mca_io_ompio_file_iwrite, + + mca_io_ompio_file_seek, + mca_io_ompio_file_get_position, + mca_io_ompio_file_get_byte_offset, + + mca_io_ompio_file_read_shared, + mca_io_ompio_file_write_shared, + mca_io_ompio_file_iread_shared, + mca_io_ompio_file_iwrite_shared, + mca_io_ompio_file_read_ordered, + mca_io_ompio_file_write_ordered, + mca_io_ompio_file_seek_shared, + mca_io_ompio_file_get_position_shared, + + /* Split IO operations */ + mca_io_ompio_file_read_at_all_begin, + mca_io_ompio_file_read_at_all_end, + mca_io_ompio_file_write_at_all_begin, + mca_io_ompio_file_write_at_all_end, + mca_io_ompio_file_read_all_begin, + mca_io_ompio_file_read_all_end, + mca_io_ompio_file_write_all_begin, + mca_io_ompio_file_write_all_end, + mca_io_ompio_file_read_ordered_begin, + mca_io_ompio_file_read_ordered_end, + mca_io_ompio_file_write_ordered_begin, + mca_io_ompio_file_write_ordered_end, + + mca_io_ompio_file_get_type_extent, + + /* Sync/atomic IO operations */ + mca_io_ompio_file_set_atomicity, + mca_io_ompio_file_get_atomicity, + mca_io_ompio_file_sync +}; diff --git a/ompi/mca/io/ompio/io_ompio_nbc.c b/ompi/mca/io/ompio/io_ompio_nbc.c new file mode 100644 index 0000000000..5a3b7c4397 --- /dev/null +++ b/ompi/mca/io/ompio/io_ompio_nbc.c @@ -0,0 +1,541 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/info/info.h" +#include "ompi/request/request.h" + +#include +#include +#include "io_ompio.h" + + + +int mca_io_ompio_get_f_aggregator_index (ompi_file_t *fh) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + file = &data->ompio_fh; + + return file->f_aggregator_index; +} + +int mca_io_ompio_get_f_num_of_io_entries(ompi_file_t *fh) +{ + + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + file = &data->ompio_fh; + + return file->f_num_of_io_entries; +} + +int mca_io_ompio_get_fcoll_dynamic_num_io_procs (int *num_procs) +{ + int param; + + param = mca_base_param_find("fcoll", "dynamic", "num_io_procs"); + if (param >= 0){ + mca_base_param_lookup_int(param, num_procs); +/* printf("num procs : %d\n", num_procs);*/ + return OMPI_SUCCESS; + } + else + return -1; + +} + +int mca_io_ompio_get_fcoll_dynamic_constant_cbs (int *constant_cbs) +{ + int param; + + param = mca_base_param_find("fcoll", "dynamic", "constant_cbs"); + if (param >= 0){ + mca_base_param_lookup_int(param, constant_cbs); +/* printf ("constant_cbs: %d\n", constant_cbs);*/ + return OMPI_SUCCESS; + } + else{ + constant_cbs[0] = -1; + return OMPI_SUCCESS; + } + +} + +int mca_io_ompio_get_fcoll_dynamic_cycle_buffer_size (int *cycle_buffer_size) +{ + + int param; + + param = mca_base_param_find("fcoll", "dynamic", "cycle_buffer_size"); + if (param >= 0){ + mca_base_param_lookup_int(param, cycle_buffer_size); +/* printf ("cycle_buffer_size : %d\n", *cycle_buffer_size);*/ + return OMPI_SUCCESS; + } + else + return -1; + +} + +int mca_io_ompio_get_f_io_array(ompi_file_t *fh, + mca_io_ompio_io_array_t **f_io_array) +{ + + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + file = &data->ompio_fh; + *f_io_array = file->f_io_array; + return OMPI_SUCCESS; +} + +int mca_io_ompio_get_f_comm(ompi_file_t *fh, ompi_communicator_t **value) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *)fh->f_io_selected_data; + file = &(data->ompio_fh); + *value = file->f_comm; + + return OMPI_SUCCESS; +} + +int mca_io_ompio_get_iov_type(ompi_file_t *fh, ompi_datatype_t **value) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *)fh->f_io_selected_data; + file = &data->ompio_fh; + + *value = file->f_iov_type; + return OMPI_SUCCESS; +} + +int mca_io_ompio_get_f_procs_in_group(ompi_file_t *fh, int **value) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *)fh->f_io_selected_data; + file = &data->ompio_fh; + + *value = file->f_procs_in_group; + return OMPI_SUCCESS; +} + +int mca_io_ompio_get_f_procs_per_group(ompi_file_t *fh) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *)fh->f_io_selected_data; + file = &data->ompio_fh; + + return file->f_procs_per_group; +} + +signed int mca_io_ompio_get_f_flags(ompi_file_t *fh) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + file = &data->ompio_fh; + + return file->f_flags; +} + +int mca_io_ompio_get_fd(ompi_file_t *fh) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *file; + + data = (mca_io_ompio_data_t *) fh->f_io_selected_data; + file = &data->ompio_fh; + + return file->fd; +} + +int mca_io_ompio_generate_io_array(ompi_file_t *file, + struct iovec *global_fview, + int *tglobal_count, + int *fview_count, + int *bytes_per_process, + char *global_buf, + int *tblocks, + int *sorted, + int *nvalue, + int *bytes_left_ptr, + int *sorted_index) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + int k, j, x=sorted_index[0]; + int blocks = *tblocks; + int bytes_left = bytes_left_ptr[0]; + + + data = (mca_io_ompio_data_t *) file->f_io_selected_data; + fh = &data->ompio_fh; + + + if (fh->f_procs_in_group[fh->f_aggregator_index] == fh->f_rank) { + int global_count = *tglobal_count; + int bytes_to_write = global_count; + int *temp = NULL; + int block = 1; + k = 0; + temp = (int *)malloc (sizeof(int) * fh->f_procs_per_group); + if (NULL == temp) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + memset(temp, 0x0, fh->f_procs_per_group*sizeof(int)); + if (NULL != fh->f_io_array){ + fh->f_num_of_io_entries = 0; + free (fh->f_io_array); + fh->f_io_array = NULL; + } + + fh->f_io_array = (mca_io_ompio_io_array_t *) malloc + (OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + while (bytes_to_write) { + int start = 0; + if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) { + block ++; + fh->f_io_array = (mca_io_ompio_io_array_t *)realloc + (fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE * block * + sizeof(mca_io_ompio_io_array_t)); + if (NULL == fh->f_io_array) { + opal_output(1, "OUT OF MEMORY\n"); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } + blocks= fview_count[0]; + for (j=0 ; jf_procs_per_group ; j++) { + if (sorted[x] < blocks) { + nvalue[0] = j; + break; + } + else { + blocks += fview_count[j+1]; + } + } + for (j=0 ; jf_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + + (global_fview[sorted[x]].iov_len - bytes_left)); + + fh->f_io_array[k].length = bytes_left; + fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; +/* printf("global_buf[%d] : %d\n", + (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ + + temp[nvalue[0]] += (int)fh->f_io_array[k].length; + bytes_to_write -= bytes_left; + bytes_left = 0; + k ++; + x ++; + continue; + } + else { + fh->f_io_array[k].offset = (IOVBASE_TYPE *) + ((OPAL_PTRDIFF_TYPE)global_fview[sorted[x]].iov_base + (global_fview[sorted[x]].iov_len - bytes_left)); + + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = + &global_buf[start+temp[nvalue[0]]]; +/* printf("global_buf[%d] : %d\n", + (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ + + temp[nvalue[0]] += (int)fh->f_io_array[k].length; + bytes_left -= bytes_to_write; + bytes_to_write = 0;; + k ++; + break; + } + } + else { + if (bytes_to_write < (int)global_fview[sorted[x]].iov_len) { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + + fh->f_io_array[k].length = bytes_to_write; + fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; +/* printf("global_buf[%d] : %d\n", + (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ + + bytes_left = + global_fview[sorted[x]].iov_len - bytes_to_write; + bytes_to_write = 0; + k ++; + break; + } + else { + fh->f_io_array[k].offset = global_fview[sorted[x]].iov_base; + + fh->f_io_array[k].length = global_fview[sorted[x]].iov_len; + fh->f_io_array[k].memory_address = &global_buf[start+temp[nvalue[0]]]; + temp[nvalue[0]] += (int)fh->f_io_array[k].length; +/* printf("global_buf[%d] : %d\n", + (start+temp[nvalue[0]]),(int)global_buf[start+temp[nvalue[0]]]);*/ + + bytes_to_write -= global_fview[sorted[x]].iov_len; + k ++; + x ++; + continue; + } + } + } + fh->f_num_of_io_entries = k; +/* for (i=0 ; if_num_of_io_entries ; i++) { + printf("OFFSET: %lu LENGTH: %d\n", + fh->f_io_array[i].offset, + fh->f_io_array[i].length); + }*/ + + + bytes_left_ptr[0] = bytes_left; + sorted_index[0] = x; + + if (NULL != temp) { + free (temp); + temp = NULL; + } + + } + return OMPI_SUCCESS; +} + +int mca_io_ompio_non_contiguous_create_receive_buf(int *bytes_received, + struct iovec *decoded_iov, + char *receive_buf) +{ + + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + int current_position = 0, iov_index = 0; + + remaining = *bytes_received; + + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy ((IOVBASE_TYPE *) mem_address, + receive_buf+temp_position, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + if (NULL != receive_buf) { + free (receive_buf); + receive_buf = NULL; + } + return OMPI_SUCCESS; +} + + +int mca_io_ompio_non_contiguous_create_send_buf(int *bytes_sent, + struct iovec *decoded_iov, + char *send_buf) +{ + + OPAL_PTRDIFF_TYPE mem_address; + size_t remaining = 0; + size_t temp_position = 0; + int current_position = 0, iov_index = 0; + + remaining = *bytes_sent; + while (remaining) { + mem_address = (OPAL_PTRDIFF_TYPE) + (decoded_iov[iov_index].iov_base) + current_position; + if (remaining >= + (decoded_iov[iov_index].iov_len - current_position)) { + memcpy (send_buf+temp_position, + (IOVBASE_TYPE *)mem_address, + decoded_iov[iov_index].iov_len - current_position); + remaining = remaining - + (decoded_iov[iov_index].iov_len - current_position); + temp_position = temp_position + + (decoded_iov[iov_index].iov_len - current_position); + iov_index = iov_index + 1; + current_position = 0; + } + else { + memcpy (send_buf+temp_position, (IOVBASE_TYPE *) mem_address, + remaining); + current_position = current_position + remaining; + remaining = 0; + } + } + return OMPI_SUCCESS; +} + + + +int mca_io_ompio_get_datatype_size (ompi_datatype_t * datatype) +{ + return datatype->super.size; +} + +int mca_io_ompio_decode_datatype_external (ompi_file_t *fp, + ompi_datatype_t *datatype, + int count, + void *buf, + size_t *max_data, + struct iovec **iov, + uint32_t *iovec_count) +{ + + int res; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + res = ompi_io_ompio_decode_datatype (fh, + datatype, + count, + buf, + max_data, + iov, + iovec_count); + if(res != OMPI_SUCCESS){ + printf("Error in ompio decode datatype\n"); + return res; + } + return OMPI_SUCCESS; + +} + +int mca_io_ompio_datatype_is_contiguous(ompi_datatype_t *datatype, + ompi_file_t *fp) +{ + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + if (opal_datatype_is_contiguous_memory_layout(&datatype->super,1)){ + fh->f_flags |= OMPIO_CONTIGUOUS_MEMORY; + return 1; + } + else + return 0; +} + + +int mca_io_ompio_set_aggregator_props (ompi_file_t *fp, + int num_aggregators, + size_t bytes_per_proc) +{ + int res; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + res = ompi_io_ompio_set_aggregator_props (fh, + num_aggregators, + bytes_per_proc); + if(res != OMPI_SUCCESS){ + printf("Error in aggregator props external\n"); + return res; + } + + return OMPI_SUCCESS; +} + +int mca_io_ompio_generate_current_file_view (ompi_file_t *fp, + size_t max_data, + struct iovec **f_iov, + int *iov_count) +{ + int res; + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + res = ompi_io_ompio_generate_current_file_view (fh, + max_data, + f_iov, + iov_count); + if(res != OMPI_SUCCESS){ + printf("Error in ompi_io_generate_current_file_view\n"); + return res; + } + + return OMPI_SUCCESS; +} + +int mca_io_ompio_free_f_io_array (ompi_file_t *fp){ + + mca_io_ompio_data_t *data; + mca_io_ompio_file_t *fh; + + data = (mca_io_ompio_data_t *) fp->f_io_selected_data; + fh = &data->ompio_fh; + + if (NULL != fh->f_io_array) { + free (fh->f_io_array); + fh->f_io_array = NULL; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/io/romio/src/io_romio_component.c b/ompi/mca/io/romio/src/io_romio_component.c index 9024d3a8cb..9ed7ad7cd3 100644 --- a/ompi/mca/io/romio/src/io_romio_component.c +++ b/ompi/mca/io/romio/src/io_romio_component.c @@ -57,8 +57,8 @@ static int register_datarep(char *, /* * Private variables */ -static int priority_param = -1; -static int delete_priority_param = -1; +static int priority_param = 20; +static int delete_priority_param = 20; /* diff --git a/ompi/mca/sharedfp/Makefile.am b/ompi/mca/sharedfp/Makefile.am new file mode 100644 index 0000000000..011b8037db --- /dev/null +++ b/ompi/mca/sharedfp/Makefile.am @@ -0,0 +1,40 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) + +# main library setup +noinst_LTLIBRARIES = libmca_sharedfp.la +libmca_sharedfp_la_SOURCES = + +# local files +headers = sharedfp.h +libmca_sharedfp_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +nobase_ompi_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/ompi/mca/sharedfp/base/Makefile.am b/ompi/mca/sharedfp/base/Makefile.am new file mode 100644 index 0000000000..a3362bae86 --- /dev/null +++ b/ompi/mca/sharedfp/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_sharedfp_la_SOURCES += \ + base/sharedfp_base_close.c \ + base/sharedfp_base_file_select.c \ + base/sharedfp_base_file_unselect.c \ + base/sharedfp_base_find_available.c \ + base/sharedfp_base_open.c diff --git a/ompi/mca/sharedfp/base/base.h b/ompi/mca/sharedfp/base/base.h new file mode 100644 index 0000000000..15f053413b --- /dev/null +++ b/ompi/mca/sharedfp/base/base.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * MCA sharedfp base framework public interface functions. + */ + +#ifndef MCA_SHAREDFP_BASE_H +#define MCA_SHAREDFP_BASE_H + +#include "ompi_config.h" + +#include "mpi.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "opal/mca/mca.h" + + +BEGIN_C_DECLS + +OMPI_DECLSPEC int mca_sharedfp_base_open(void); + +OMPI_DECLSPEC int mca_sharedfp_base_close(void); + +OMPI_DECLSPEC int mca_sharedfp_base_file_select(struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred); + +OMPI_DECLSPEC int mca_sharedfp_base_file_unselect(struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_sharedfp_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads); + +OMPI_DECLSPEC int mca_sharedfp_base_init_file (struct mca_io_ompio_file_t *file); + +OMPI_DECLSPEC int mca_sharedfp_base_get_param (struct mca_io_ompio_file_t *file, int keyval); +/* + * Globals + */ + +OMPI_DECLSPEC extern int mca_sharedfp_base_param; +OMPI_DECLSPEC extern int mca_sharedfp_base_output; + +OMPI_DECLSPEC extern bool mca_sharedfp_base_components_opened_valid; +OMPI_DECLSPEC extern bool mca_sharedfp_base_components_available_valid; + +OMPI_DECLSPEC extern opal_list_t mca_sharedfp_base_components_opened; +OMPI_DECLSPEC extern opal_list_t mca_sharedfp_base_components_available; + +END_C_DECLS + +#endif /* MCA_BASE_SHAREDFP_H */ diff --git a/ompi/mca/sharedfp/base/sharedfp_base_close.c b/ompi/mca/sharedfp/base/sharedfp_base_close.c new file mode 100644 index 0000000000..f660f34522 --- /dev/null +++ b/ompi/mca/sharedfp/base/sharedfp_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHTOB$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "ompi/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" + +int mca_sharedfp_base_close(void) +{ + /* + Close all components that are still open. This may be the opened + list (if we're in ompi_info), or it may be the available list (if + we're anywhere else). + */ + + if (mca_sharedfp_base_components_opened_valid) { + mca_base_components_close(mca_sharedfp_base_output, + &mca_sharedfp_base_components_opened, NULL); + OBJ_DESTRUCT(&mca_sharedfp_base_components_opened); + mca_sharedfp_base_components_opened_valid = false; + } else if (mca_sharedfp_base_components_available_valid) { + mca_base_components_close(mca_sharedfp_base_output, + &mca_sharedfp_base_components_available, NULL); + OBJ_DESTRUCT(&mca_sharedfp_base_components_available); + mca_sharedfp_base_components_available_valid = false; + } + + /* Close the output stream for this framework */ + opal_output_close (mca_sharedfp_base_output); + + /* All done */ + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/base/sharedfp_base_file_select.c b/ompi/mca/sharedfp/base/sharedfp_base_file_select.c new file mode 100644 index 0000000000..305903875f --- /dev/null +++ b/ompi/mca/sharedfp/base/sharedfp_base_file_select.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/class/opal_list.h" +#include "opal/util/argv.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "orte/util/show_help.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" +#include "ompi/mca/io/ompio/io_ompio.h" + +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a opal_list_t which contains + * these modules + */ +struct queried_module_t { + opal_list_item_t super; + mca_sharedfp_base_component_t *om_component; + mca_sharedfp_base_module_t *om_module; +}; +typedef struct queried_module_t queried_module_t; +static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); + + +/* + * Only one sharedfp module can be attached to each file. + * + * This module calls the query funtion on all the components that were + * detected by sharedfp_base_open. This function is called on a + * per-file basis. This function has the following function. + * + * 1. Iterate over the list of available_components + * 2. Call the query function on each of these components. + * 3. query function returns the structure containing pointers + * to its module and its priority + * 4. Select the module with the highest priority + * 5. Call the init function on the selected module so that it does the + * right setup for the file + * 6. Call finalize on all the other modules which returned + * their module but were unfortunate to not get selected + */ + +int mca_sharedfp_base_file_select (struct mca_io_ompio_file_t *file, + mca_base_component_t *preferred) +{ + int priority; + int best_priority; + opal_list_item_t *item; + opal_list_item_t *next_item; + mca_base_component_priority_list_item_t *selectable_item; + char *names, **name_array; + int num_names; + mca_base_component_priority_list_item_t *cpli; + mca_sharedfp_base_component_t *component; + mca_sharedfp_base_component_t *best_component; + mca_sharedfp_base_module_t *module; + opal_list_t queried; + queried_module_t *om; + opal_list_t *selectable; + char *str; + int err = MPI_SUCCESS; + int i; + bool was_selectable_constructed = false; + + /* Check and see if a preferred component was provided. If it was + provided then it should be used (if possible) */ + + if (NULL != preferred) { + + /* We have a preferred component. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_component_name[0]); + + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:base:file_select: Checking preferred component: %s", + str); + + /* query the component for its priority and get its module + structure. This is necessary to proceed */ + + component = (mca_sharedfp_base_component_t *)preferred; + module = component->sharedfpm_file_query (&priority); + if (NULL != module && + NULL != module->sharedfp_module_init) { + + /* this query seems to have returned something legitimate + * and we can now go ahead and initialize the + * file with it * but first, the functions which + * are null need to be filled in */ + + /*fill_null_pointers (module);*/ + file->f_sharedfp = module; + file->f_sharedfp_component = preferred; + + return module->sharedfp_module_init(file); + } + /* His preferred component is present, but is unable to + * run. This is not a good sign. We should try selecting + * some other component We let it fall through and select + * from the list of available components + */ + } /*end of selection for preferred component */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred component was provided but for some reason was + * not able to be selected + * 2. No preferred component was provided + * + * All we need to do is to go through the list of available + * components and find the one which has the highest priority and + * use that for this file + */ + + /* Check if anything was requested by means on the name parameters */ + names = NULL; + mca_base_param_lookup_string (mca_sharedfp_base_param, &names); + + if (NULL != names && 0 < strlen(names)) { + name_array = opal_argv_split (names, ','); + num_names = opal_argv_count (name_array); + + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:base:file_Select: Checking all available module"); + + /* since there are somethings which the mca requested through the + if the intersection is NULL, then we barf saying that the requested + modules are not being available */ + + selectable = OBJ_NEW(opal_list_t); + was_selectable_constructed = true; + + /* go through the compoents_available list and check against the names + * to see whether this can be added or not */ + + for (item = opal_list_get_first(&mca_sharedfp_base_components_available); + item != opal_list_get_end(&mca_sharedfp_base_components_available); + item = opal_list_get_next(item)) { + /* convert the opal_list_item_t returned into the proper type */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_sharedfp_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_sharedfp_base_output, + "select: initialising %s component %s", + component->sharedfpm_version.mca_type_name, + component->sharedfpm_version.mca_component_name); + + /* check if this name is present in the mca_base_params */ + for (i=0; i < num_names; i++) { + if (0 == strcmp(name_array[i], component->sharedfpm_version.mca_component_name)) { + /* this is present, and should be added o the selectable list */ + + /* We need to create a seperate object to initialise this list with + * since we cannot have the same item in 2 lists */ + + selectable_item = OBJ_NEW (mca_base_component_priority_list_item_t); + *selectable_item = *cpli; + opal_list_append (selectable, (opal_list_item_t *)selectable_item); + break; + } + } + } + + /* check for a NULL intersection between the available list and the + * list which was asked for */ + + if (0 == opal_list_get_size(selectable)) { + was_selectable_constructed = true; + OBJ_RELEASE (selectable); + opal_output_verbose (10, mca_sharedfp_base_output, + "sharedfp:base:file_select: preferred modules were not available"); + return OMPI_ERROR; + } + } else { /* if there was no name_array, then we need to simply initialize + selectable to mca_sharedfp_base_components_available */ + selectable = &mca_sharedfp_base_components_available; + } + + best_component = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, opal_list_t); + + for (item = opal_list_get_first(selectable); + item != opal_list_get_end(selectable); + item = opal_list_get_next(item)) { + /* + * convert the opal_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + component = (mca_sharedfp_base_component_t *) cpli->super.cli_component; + opal_output_verbose(10, mca_sharedfp_base_output, + "select: initialising %s component %s", + component->sharedfpm_version.mca_type_name, + component->sharedfpm_version.mca_component_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == component->sharedfpm_file_query) { + opal_output_verbose(10, mca_sharedfp_base_output, + "select: no query, ignoring the component"); + } else { + /* + * call the query function and see what it returns + */ + module = component->sharedfpm_file_query (&priority); + + if (NULL == module || + NULL == module->sharedfp_module_init) { + /* + * query did not return any action which can be used + */ + opal_output_verbose(10, mca_sharedfp_base_output, + "select: query returned failure"); + } else { + opal_output_verbose(10, mca_sharedfp_base_output, + "select: query returned priority %d", + priority); + /* + * is this the best component we have found till now? + */ + if (priority > best_priority) { + best_priority = priority; + best_component = component; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_component = component; + om->om_module = module; + opal_list_append(&queried, (opal_list_item_t *)om); + } /* end else of if (NULL == module) */ + } /* end else of if (NULL == component->sharedfpm_init) */ + } /* end for ... end of traversal */ + + /* We have to remove empty out the selectable list if the selectable + * list was constructed as a duplicate and not as a pointer to the + * mca_base_components_available list. So, check and destroy */ + + if (was_selectable_constructed) { + + /* remove all the items first */ + for (item = opal_list_get_first(&mca_sharedfp_base_components_available); + item != opal_list_get_end(&mca_sharedfp_base_components_available); + item = next_item) { + next_item = opal_list_get_next(item); + OBJ_RELEASE (item); + } + + /* release the list itself */ + OBJ_RELEASE (selectable); + was_selectable_constructed = false; + } + + /* + * Now we have alist of components which successfully returned + * their module struct. One of these components has the best + * priority. The rest have to be comm_unqueried to counter the + * effects of file_query'ing them. Finalize happens only on + * components which should are initialized. + */ + if (NULL == best_component) { + /* + * This typically means that there was no component which was + * able to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of components which have successfully + * returned their priorities from the query. We now have to + * unquery() those components which have not been selected and + * init() the component which was selected + */ + for (item = opal_list_remove_first(&queried); + NULL != item; + item = opal_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_component == best_component) { + /* + * this is the chosen component, we have to initialise the + * module of this component. + * + * ANJU: a component might not have all the functions + * defined. Whereever a function pointer is null in the + * module structure we need to fill it in with the base + * structure function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through and + * elease the other objects + */ + + /*fill_null_pointers (om->om_module);*/ + file->f_sharedfp = om->om_module; + err = om->om_module->sharedfp_module_init(file); + file->f_sharedfp_component = (mca_base_component_t *)best_component; + + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_component->sharedfpm_file_unquery) { + /* unquery the component only if they have some clean + * up job to do. Components which are queried but do + * not actually do anything typically do not have a + * unquery. Hence this check is necessary + */ + (void) om->om_component->sharedfpm_file_unquery(file); + opal_output_verbose(10, mca_sharedfp_base_output, + "select: component %s is not selected", + om->om_component->sharedfpm_version.mca_component_name); + } /* end if */ + } /* if not best component */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + opal_output_verbose(10, mca_sharedfp_base_output, + "select: component %s selected", + best_component->sharedfpm_version.mca_component_name); + + OBJ_DESTRUCT(&queried); + + return err; +} diff --git a/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c b/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c new file mode 100644 index 0000000000..1776caa4b5 --- /dev/null +++ b/ompi/mca/sharedfp/base/sharedfp_base_file_unselect.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "mpi.h" +#include "ompi/mca/io/ompio/io_ompio.h" +#include "opal/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" + +int mca_sharedfp_base_file_unselect(mca_io_ompio_file_t *file) +{ + if (NULL != file->f_sharedfp && NULL != file->f_sharedfp->sharedfp_module_finalize) { + return file->f_sharedfp->sharedfp_module_finalize(file); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/base/sharedfp_base_find_available.c b/ompi/mca/sharedfp/base/sharedfp_base_find_available.c new file mode 100644 index 0000000000..65b61b944f --- /dev/null +++ b/ompi/mca/sharedfp/base/sharedfp_base_find_available.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "ompi/constants.h" +#include "opal/class/opal_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_component_repository.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" + +opal_list_t mca_sharedfp_base_modules_available; +bool mca_sharedfp_base_modules_available_valid = false; + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads); + +int mca_sharedfp_base_find_available(bool enable_progress_threads, + bool enable_mpi_threads) +{ + bool found = false; + mca_base_component_priority_list_item_t *entry; + opal_list_item_t *p; + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_sharedfp_base_components_available, opal_list_t); + mca_sharedfp_base_components_available_valid = true; + + /* The list of components which we should check is already present + in mca_sharedfp_base_components_opened, which was established in + mca_sharedfp_base_open */ + + for (found = false, + p = opal_list_remove_first (&mca_sharedfp_base_components_opened); + NULL != p; + p = opal_list_remove_first (&mca_sharedfp_base_components_opened)) { + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->super.cli_component = + ((mca_base_component_list_item_t *)p)->cli_component; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->super.cli_component, entry, + enable_progress_threads, + enable_mpi_threads)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + opal_list_append (&mca_sharedfp_base_components_available, + (opal_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_component_repository_release(entry->super.cli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_sharedfp_base_components_opened); + mca_sharedfp_base_components_opened_valid = false; + + /* There should atleast be one sharedfp component which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_sharedfp_base_components_available); + mca_sharedfp_base_components_available_valid = false; + opal_output_verbose (10, mca_sharedfp_base_output, + "sharedfp:find_available: no sharedfp components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_component_t *m, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:find_available: querying sharedfp component %s", + m->mca_component_name); + + /* This component has been successfully opened, now try to query it */ + if (2 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_2_0_0(m, entry, enable_progress_threads, + enable_mpi_threads); + } else { + /* unrecognised API version */ + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:find_available:unrecognised sharedfp API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:find_available sharedfp component %s is not available", + m->mca_component_name); + if (NULL != m->mca_close_component) { + m->mca_close_component(); + } + } else { + opal_output_verbose(10, mca_sharedfp_base_output, + "sharedfp:find_avalable: sharedfp component %s is available", + m->mca_component_name); + + } + /* All done */ + return ret; +} + + +static int init_query_2_0_0(const mca_base_component_t *component, + mca_base_component_priority_list_item_t *entry, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + mca_sharedfp_base_component_2_0_0_t *sharedfp = + (mca_sharedfp_base_component_2_0_0_t *) component; + + return sharedfp->sharedfpm_init_query(enable_progress_threads, + enable_mpi_threads); +} diff --git a/ompi/mca/sharedfp/base/sharedfp_base_open.c b/ompi/mca/sharedfp/base/sharedfp_base_open.c new file mode 100644 index 0000000000..c6d4bd8d48 --- /dev/null +++ b/ompi/mca/sharedfp/base/sharedfp_base_open.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include + +#include "ompi/class/ompi_free_list.h" +#include "orte/util/show_help.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/show_help.h" + +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#ifdef __WINDOWS__ + const mca_base_component_t *mca_sharedfp_base_static_components[] = {NULL}; +#else +#include "ompi/mca/sharedfp/base/static-components.h" +#endif + +/* + * Global variables; most of which are loaded by back-ends of MCA + * variables + */ +int mca_sharedfp_base_param = -1; +int mca_sharedfp_base_output = -1; + +opal_list_t mca_sharedfp_base_components_opened; +opal_list_t mca_sharedfp_base_components_available; + +bool mca_sharedfp_base_components_available_valid = false; +bool mca_sharedfp_base_components_opened_valid = false; + +mca_sharedfp_base_component_t mca_sharedfp_base_selected_component; +mca_sharedfp_base_module_t mca_sharedfp; + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int mca_sharedfp_base_open(void) +{ + /* Open an output stream for this framework */ + + mca_sharedfp_base_output = opal_output_open(NULL); + + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("sharedfp", mca_sharedfp_base_output, + mca_sharedfp_base_static_components, + &mca_sharedfp_base_components_opened, true)) { + return OMPI_ERROR; + } + mca_sharedfp_base_components_opened_valid = true; + + /* Find the index of the MCA "sharedfp" param for selection */ + + mca_sharedfp_base_param = mca_base_param_find("sharedfp", "base", NULL); + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/base/static-components.h b/ompi/mca/sharedfp/base/static-components.h new file mode 100644 index 0000000000..79164d2bab --- /dev/null +++ b/ompi/mca/sharedfp/base/static-components.h @@ -0,0 +1,18 @@ +/* + * $HEADER$ + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + + +const mca_base_component_t *mca_sharedfp_base_static_components[] = { + + NULL +}; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + diff --git a/ompi/mca/sharedfp/dummy/Makefile.am b/ompi/mca/sharedfp/dummy/Makefile.am new file mode 100644 index 0000000000..91319edaca --- /dev/null +++ b/ompi/mca/sharedfp/dummy/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2011 University of Houston. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_ompi_sharedfp_dummy_DSO +component_noinst = +component_install = mca_sharedfp_dummy.la +else +component_noinst = libmca_sharedfp_dummy.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_sharedfp_dummy_la_SOURCES = $(sources) +mca_sharedfp_dummy_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_sharedfp_dummy_la_SOURCES = $(sources) +libmca_sharedfp_dummy_la_LDFLAGS = -module -avoid-version + +# Source files + +sources = \ + sharedfp_dummy.h \ + sharedfp_dummy.c \ + sharedfp_dummy_component.c \ + sharedfp_dummy_update.c \ + sharedfp_dummy_seek.c diff --git a/ompi/mca/sharedfp/dummy/sharedfp_dummy.c b/ompi/mca/sharedfp/dummy/sharedfp_dummy.c new file mode 100644 index 0000000000..157f511194 --- /dev/null +++ b/ompi/mca/sharedfp/dummy/sharedfp_dummy.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object fules, + * keeping these symbols as the only symbols in this file prevents + * utility programs such as "ompi_info" from having to import entire + * modules just to query their version and parameters + */ + +#include "ompi_config.h" +#include "mpi.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/sharedfp/dummy/sharedfp_dummy.h" + +/* + * ******************************************************************* + * ************************ actions structure ************************ + * ******************************************************************* + */ +static mca_sharedfp_base_module_1_0_0_t dummy = { + mca_sharedfp_dummy_module_init, /* initalise after being selected */ + mca_sharedfp_dummy_module_finalize, /* close a module on a communicator */ + mca_sharedfp_dummy_update, + mca_sharedfp_dummy_seek +}; +/* + * ******************************************************************* + * ************************* structure ends ************************** + * ******************************************************************* + */ + +int mca_sharedfp_dummy_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* Nothing to do */ + + return OMPI_SUCCESS; +} + +struct mca_sharedfp_base_module_1_0_0_t * +mca_sharedfp_dummy_component_file_query (int *priority) +{ + *priority = 20; + + return &dummy; +} + +int mca_sharedfp_dummy_component_file_unquery (mca_io_ompio_file_t *file) +{ + /* This function might be needed for some purposes later. for now it + * does not have anything to do since there are no steps which need + * to be undone if this module is not selected */ + + return OMPI_SUCCESS; +} + +int mca_sharedfp_dummy_module_init (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} + + +int mca_sharedfp_dummy_module_finalize (mca_io_ompio_file_t *file) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/dummy/sharedfp_dummy.h b/ompi/mca/sharedfp/dummy/sharedfp_dummy.h new file mode 100644 index 0000000000..d5d427f1b7 --- /dev/null +++ b/ompi/mca/sharedfp/dummy/sharedfp_dummy.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SHAREDFP_DUMMY_H +#define MCA_SHAREDFP_DUMMY_H + +#include "ompi_config.h" +#include "opal/mca/mca.h" +#include "ompi/mca/sharedfp/sharedfp.h" +#include "ompi/mca/io/ompio/io_ompio.h" + + +BEGIN_C_DECLS + +int mca_sharedfp_dummy_component_init_query(bool enable_progress_threads, + bool enable_mpi_threads); +struct mca_sharedfp_base_module_1_0_0_t * +mca_sharedfp_dummy_component_file_query (int *priority); +int mca_sharedfp_dummy_component_file_unquery (mca_io_ompio_file_t *file); + +int mca_sharedfp_dummy_module_init (mca_io_ompio_file_t *file); +int mca_sharedfp_dummy_module_finalize (mca_io_ompio_file_t *file); + +OMPI_MODULE_DECLSPEC extern mca_sharedfp_base_component_2_0_0_t mca_sharedfp_dummy_component; +/* + * ****************************************************************** + * ********* functions which are implemented in this module ********* + * ****************************************************************** + */ + +int mca_sharedfp_dummy_update (mca_io_ompio_file_t *fh, + int num_bytes, + OMPI_MPI_OFFSET_TYPE current_position); +int mca_sharedfp_dummy_seek (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE position); + +/* + * ****************************************************************** + * ************ functions implemented in this module end ************ + * ****************************************************************** + */ + +END_C_DECLS + +#endif /* MCA_SHAREDFP_DUMMY_H */ diff --git a/ompi/mca/sharedfp/dummy/sharedfp_dummy_component.c b/ompi/mca/sharedfp/dummy/sharedfp_dummy_component.c new file mode 100644 index 0000000000..a8109f3264 --- /dev/null +++ b/ompi/mca/sharedfp/dummy/sharedfp_dummy_component.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "ompi_config.h" +#include "sharedfp_dummy.h" +#include "mpi.h" + +/* + * Public string showing the sharedfp dummy component version number + */ +const char *mca_sharedfp_dummy_component_version_string = + "OMPI/MPI dummy SHAREDFP MCA component version " OMPI_VERSION; + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +mca_sharedfp_base_component_2_0_0_t mca_sharedfp_dummy_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + MCA_SHAREDFP_BASE_VERSION_2_0_0, + + /* Component name and version */ + "dummy", + OMPI_MAJOR_VERSION, + OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION, + NULL, + NULL + }, + { + /* This component is checkpointable */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mca_sharedfp_dummy_component_init_query, /* get thread level */ + mca_sharedfp_dummy_component_file_query, /* get priority and actions */ + mca_sharedfp_dummy_component_file_unquery /* undo what was done by previous function */ +}; diff --git a/ompi/mca/sharedfp/dummy/sharedfp_dummy_seek.c b/ompi/mca/sharedfp/dummy/sharedfp_dummy_seek.c new file mode 100644 index 0000000000..d1341849ce --- /dev/null +++ b/ompi/mca/sharedfp/dummy/sharedfp_dummy_seek.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "sharedfp_dummy.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/sharedfp/sharedfp.h" + +int +mca_sharedfp_dummy_seek (mca_io_ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE position) +{ + printf ("DUMMY SEEK\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/dummy/sharedfp_dummy_update.c b/ompi/mca/sharedfp/dummy/sharedfp_dummy_update.c new file mode 100644 index 0000000000..586c618a0b --- /dev/null +++ b/ompi/mca/sharedfp/dummy/sharedfp_dummy_update.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "sharedfp_dummy.h" + +#include "mpi.h" +#include "ompi/constants.h" +#include "orte/util/show_help.h" +#include "ompi/mca/sharedfp/sharedfp.h" + +int +mca_sharedfp_dummy_update (mca_io_ompio_file_t *fh, + int num_bytes, + OMPI_MPI_OFFSET_TYPE current_position) +{ + printf ("DUMMY UPDATING\n"); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/sharedfp/sharedfp.h b/ompi/mca/sharedfp/sharedfp.h new file mode 100644 index 0000000000..edea6828f7 --- /dev/null +++ b/ompi/mca/sharedfp/sharedfp.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2011 University of Houston. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_SHAREDFP_H +#define OMPI_MCA_SHAREDFP_H + +#include "ompi_config.h" +#include "mpi.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +BEGIN_C_DECLS + +struct mca_io_ompio_file_t; + +/* + * Macro for use in components that are of type coll + */ +#define MCA_SHAREDFP_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "sharedfp", 2, 0, 0 + +/* + * These are the component function prototypes. These function pointers + * go into the component structure. These functions (query() and finalize() + * are called during sharedfp_base_select(). Each component is query() ied + * and subsequently, all the unselected components are finalize() 'ed + * so that any *stuff* they did during query() can be undone. By + * similar logic, finalize() is also called on the component which + * was selected when the communicator is being destroyed. + * + * So, to sum it up, every component carries 4 functions: + * 1. open() - called during MPI_INIT + * 2. close() - called during MPI_FINALIZE + * 3. query() - called to select a particular component + * 4. finalize() - called when actions taken during query have + * to be undone + */ + +/* + * **************** component struct ******************************* + * *********** These functions go in the component struct ********** + * **************** component struct ******************************* + */ + +typedef int (*mca_sharedfp_base_component_init_query_1_0_0_fn_t) + (bool enable_progress_threads, + bool enable_mpi_threads); + +typedef struct mca_sharedfp_base_module_1_0_0_t * +(*mca_sharedfp_base_component_file_query_1_0_0_fn_t) (int *priority); + +typedef int (*mca_sharedfp_base_component_file_unquery_1_0_0_fn_t) + (struct mca_io_ompio_file_t *file); + +/* + * ****************** component struct ****************************** + * Structure for sharedfp v2.0.0 components.This is chained to MCA v2.0.0 + * ****************** component struct ****************************** + */ +struct mca_sharedfp_base_component_2_0_0_t { + mca_base_component_t sharedfpm_version; + mca_base_component_data_t sharedfpm_data; + + mca_sharedfp_base_component_init_query_1_0_0_fn_t sharedfpm_init_query; + mca_sharedfp_base_component_file_query_1_0_0_fn_t sharedfpm_file_query; + mca_sharedfp_base_component_file_unquery_1_0_0_fn_t sharedfpm_file_unquery; +}; +typedef struct mca_sharedfp_base_component_2_0_0_t mca_sharedfp_base_component_2_0_0_t; +typedef struct mca_sharedfp_base_component_2_0_0_t mca_sharedfp_base_component_t; + +/* + * *********************************************************************** + * ************************ Interface function definitions ************** + * These are the typedesharedfp for the function pointers to various sharedfp + * backend functions which will be used by the various sharedfp components + * *********************************************************************** + */ + +typedef int (*mca_sharedfp_base_module_init_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_sharedfp_base_module_finalize_1_0_0_fn_t) +(struct mca_io_ompio_file_t *file); + +typedef int (*mca_sharedfp_base_module_update_fn_t)( + struct mca_io_ompio_file_t *fh, int num_bytes, + OMPI_MPI_OFFSET_TYPE current_position); +typedef int (*mca_sharedfp_base_module_seek_fn_t)( + struct mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE position); + +/* + * *********************************************************************** + * *************************** module structure ************************* + * *********************************************************************** + */ +struct mca_sharedfp_base_module_1_0_0_t { + /* + * Per-file initialization function. This is called only + * on the module which is selected. The finalize corresponding to + * this function is present on the component struct above + */ + mca_sharedfp_base_module_init_1_0_0_fn_t sharedfp_module_init; + mca_sharedfp_base_module_finalize_1_0_0_fn_t sharedfp_module_finalize; + + /* SHAREDFP function pointers */ + mca_sharedfp_base_module_update_fn_t sharedfp_update; + mca_sharedfp_base_module_seek_fn_t sharedfp_seek; +}; +typedef struct mca_sharedfp_base_module_1_0_0_t mca_sharedfp_base_module_1_0_0_t; +typedef mca_sharedfp_base_module_1_0_0_t mca_sharedfp_base_module_t; + +END_C_DECLS + +#endif /* OMPI_MCA_SHAREDFP_H */