1
1
openmpi/ompi/mca/sharedfp/individual/sharedfp_individual_iwrite.c
George Bosilca 366d64b7e5 Move the collective structure outside the communicator.
As we changed the ABI (forcing a major release), we can limit
the size of the predefined communicators by moving the collective
structure outside the communicator. This might have a minimal,
but unnoticeable, impact on performance. This approach has been
discussed during the January 2017 devel meeting.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Signed-off-by: Joshua Hursey <jhursey@us.ibm.com>
2017-02-27 11:54:17 -06:00

227 строки
8.2 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "sharedfp_individual.h"
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/mca/sharedfp/sharedfp.h"
#include "ompi/mca/sharedfp/base/base.h"
int mca_sharedfp_individual_iwrite(mca_io_ompio_file_t *fh,
const void *buf,
int count,
ompi_datatype_t *datatype,
MPI_Request * request)
{
int ret = OMPI_SUCCESS;
size_t numofbytes = 0;
OMPI_MPI_OFFSET_TYPE totalbytes = 0;
mca_sharedfp_individual_header_record *headnode = NULL;
struct mca_sharedfp_base_data_t *sh = NULL;
mca_sharedfp_base_module_t * shared_fp_base_module = NULL;
if(fh->f_sharedfp_data==NULL){
if ( mca_sharedfp_individual_verbose ) {
opal_output(ompi_sharedfp_base_framework.framework_output,
"mca_sharedfp_individual_iwrite: opening the shared file pointer\n");
}
shared_fp_base_module = fh->f_sharedfp;
ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm,
fh->f_filename,
fh->f_amode,
fh->f_info,
fh);
if (ret != OMPI_SUCCESS) {
opal_output(0,"mca_sharedfp_individual_iwrite - error opening the shared file pointer\n");
return ret;
}
}
/* Calculate the number of bytes of data that needs to be written*/
opal_datatype_type_size ( &datatype->super, &numofbytes);
totalbytes = count * numofbytes;
sh = fh->f_sharedfp_data;
headnode = (mca_sharedfp_individual_header_record*)sh->selected_module_data;
if ( NULL == headnode) {
opal_output (0, "sharedfp_individual_iwrite: headnode is NULL but file is open\n");
return OMPI_ERROR;
}
/*Insert metadata record into a queue*/
ret = mca_sharedfp_individual_insert_metadata(OMPI_FILE_WRITE_SHARED,totalbytes,sh);
/*Write the data into individual file*/
ret = mca_common_ompio_file_iwrite_at ( headnode->datafilehandle, headnode->datafile_offset,
buf, count, datatype, request);
if ( OMPI_SUCCESS != ret ) {
opal_output(0,"sharedfp_individual_iwrite: Error while iwriting the datafile \n");
return ret;
}
/* Update the datafileoffset */
headnode->datafile_offset = headnode->datafile_offset + totalbytes;
return ret;
}
int mca_sharedfp_individual_write_ordered_begin(mca_io_ompio_file_t *fh,
const void *buf,
int count,
struct ompi_datatype_t *datatype)
{
int ret = OMPI_SUCCESS;
int size = 0, rank = 0;
int i = 0;
size_t numofbytes = 0;
size_t totalbytes = 0;
OMPI_MPI_OFFSET_TYPE *offbuff=NULL;
OMPI_MPI_OFFSET_TYPE global_offset = 0;
OMPI_MPI_OFFSET_TYPE prev_offset = 0;
OMPI_MPI_OFFSET_TYPE temp = 0, offset = 0;
mca_sharedfp_individual_header_record *headnode = NULL;
struct mca_sharedfp_base_data_t *sh = NULL;
mca_sharedfp_base_module_t * shared_fp_base_module = NULL;
if(fh->f_sharedfp_data==NULL){
if ( mca_sharedfp_individual_verbose ) {
opal_output(ompi_sharedfp_base_framework.framework_output,
"sharedfp_individual_write_ordered_begin - opening the shared file pointer\n");
}
shared_fp_base_module = fh->f_sharedfp;
ret = shared_fp_base_module->sharedfp_file_open(fh->f_comm,
fh->f_filename,
fh->f_amode,
fh->f_info,
fh);
if ( OMPI_SUCCESS != ret ) {
opal_output(0,"sharedfp_individual_write_ordered_begin - error opening the shared file pointer\n");
return ret;
}
}
if ( true == fh->f_split_coll_in_use ) {
opal_output(0, "Only one split collective I/O operation allowed per file handle at any given point in time!\n");
return MPI_ERR_REQUEST;
}
/*Retrieve the sharedfp data structures*/
sh = fh->f_sharedfp_data;
rank = ompi_comm_rank ( sh->comm );
size = ompi_comm_size ( sh->comm );
/* Calculate the number of bytes of data that needs to be written*/
opal_datatype_type_size ( &datatype->super, &numofbytes);
totalbytes = count * numofbytes;
headnode = (mca_sharedfp_individual_header_record*)sh->selected_module_data;
if ( NULL == headnode) {
opal_output (0, "sharedfp_individual_write_ordered_begin: headnode is NULL but file is open\n");
return OMPI_ERROR;
}
/* Data from all the metadata is combined and written to the main file */
ret = mca_sharedfp_individual_collaborate_data ( sh );
if ( OMPI_SUCCESS != ret) {
return ret;
}
if ( 0 == rank ) {
offbuff = (OMPI_MPI_OFFSET_TYPE *)malloc ( sizeof(OMPI_MPI_OFFSET_TYPE) * size);
if (NULL == offbuff ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
/*collect the total bytes to be written*/
sh->comm->c_coll->coll_gather ( &totalbytes, 1, OMPI_OFFSET_DATATYPE,
offbuff, 1, OMPI_OFFSET_DATATYPE, 0,
sh->comm, sh->comm->c_coll->coll_gather_module );
if ( 0 == rank ) {
prev_offset = offbuff[0];
offbuff[0] = sh->global_offset;
for (i = 1; i < size ; i++){
temp = offbuff[i];
offbuff[i] = offbuff[i - 1] + prev_offset;
prev_offset = temp;
}
for (i = 0; i < size; i++){
global_offset = offbuff[size - 1] + prev_offset;
}
}
/* Scatter the results to the other processes */
ret = sh->comm->c_coll->coll_scatter ( offbuff, 1, OMPI_OFFSET_DATATYPE,
&offset, 1, OMPI_OFFSET_DATATYPE, 0,
sh->comm, sh->comm->c_coll->coll_scatter_module );
if ( OMPI_SUCCESS != ret ) {
opal_output(0,"sharedfp_individual_write_ordered_begin: Error in scattering offsets \n");
goto exit;
}
ret = sh->comm->c_coll->coll_bcast ( &global_offset, 1, OMPI_OFFSET_DATATYPE,
0, sh->comm, sh->comm->c_coll->coll_bcast_module );
if ( OMPI_SUCCESS != ret ) {
opal_output(0,"sharedfp_individual_write_ordered_begin: Error while bcasting global offset \n");
goto exit;
}
sh->global_offset = global_offset;
/*use file_write_at_all to ensure the order*/
ret = mca_common_ompio_file_iwrite_at_all(sh->sharedfh,offset, buf,count,datatype,
&fh->f_split_coll_req);
fh->f_split_coll_in_use = true;
if ( OMPI_SUCCESS != ret ) {
opal_output(0,"sharedfp_individual_write_ordered_begin: Error while writing the datafile \n");
}
exit:
if ( NULL != offbuff ) {
free ( offbuff);
}
return ret;
}
int mca_sharedfp_individual_write_ordered_end(mca_io_ompio_file_t *fh,
const void *buf,
ompi_status_public_t *status)
{
int ret = OMPI_SUCCESS;
ret = ompi_request_wait ( &fh->f_split_coll_req, status );
/* remove the flag again */
fh->f_split_coll_in_use = false;
return ret;
}