1
1
openmpi/ompi/mca/coll/ml/coll_ml_copy_fns.c
2015-09-11 13:20:44 +09:00

132 строки
4.4 KiB
C

/*
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file */
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/threads/mutex.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/bcol/bcol.h"
#include "opal/sys/atomic.h"
#include "ompi/mca/coll/ml/coll_ml.h"
#include "ompi/mca/coll/ml/coll_ml_inlines.h"
#include "ompi/mca/coll/ml/coll_ml_allocation.h"
#include "coll_ml_colls.h"
#include <unistd.h>
#include <sys/uio.h>
/* This routine re-orders and packs user data. The assumption is that
* there is per-process data, the amount of data is the same for all
* ranks, and the user data is contigous.
*/
int mca_coll_ml_pack_reorder_contiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
{
int i, rank;
void *user_buf, *library_buf;
size_t bytes_per_proc;
mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
coll_op->coll_module;
mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
ptrdiff_t ptr_dif;
/* get the offset into each processes data. The assumption is that
* we are manipulating the same amount of data for each process.
*/
/* figure out how much data per-proc to copy */
bytes_per_proc=coll_op->fragment_data.per_rank_fragment_size;
/* loop over all the ranks in the communicator */
for( i=0 ; i < ompi_comm_size(ml_module->comm) ; i++ ) {
/* look up the rank of the i'th element in the sorted list */
rank = topo_info->sort_list[i];
/* get the pointer to user data */
user_buf=(void *)coll_op->full_message.src_user_addr;
/* compute offset into the user buffer */
/* offset for data already processed */
ptr_dif=rank*coll_op->full_message.n_bytes_per_proc_total+
coll_op->fragment_data.offset_into_user_buffer_per_proc;
user_buf=(void *) ((char *)user_buf+ptr_dif);
/*
rank*coll_op->full_message.n_bytes_per_proc_total+
coll_op->fragment_data.offset_into_user_buffer_per_proc);
*/
/* get the pointer to the ML buffer */
library_buf= (void *)
((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
/* copy the data */
memcpy(library_buf, user_buf, bytes_per_proc);
}
return OMPI_SUCCESS;
}
/* This routine re-orders and packs user data. The assumption is that
* there is per-process data, the amount of data is the same for all
* ranks, and the user data is contigous.
*/
int mca_coll_ml_pack_reorder_noncontiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
{
int i, rank;
void *user_buf, *library_buf;
size_t bytes_per_proc;
ptrdiff_t ptr_dif;
mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
coll_op->coll_module;
mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
/* get the offset into each processes data. The assumption is that
* we are manipulating the same amount of data for each process.
*/
/* figure out how much data per-proc to copy */
bytes_per_proc = coll_op->fragment_data.per_rank_fragment_size;
/* loop over all the ranks in the communicator */
for(i = 0; i < ompi_comm_size(ml_module->comm); i++ ) {
/* look up the rank of the i'th element in the sorted list */
rank = topo_info->sort_list[i];
/* get the pointer to user data */
user_buf=(void *)coll_op->full_message.src_user_addr;
/* compute offset into the user buffer */
/* offset for data already processed */
ptr_dif=rank*coll_op->full_message.send_count*
coll_op->full_message.send_extent+
coll_op->fragment_data.offset_into_user_buffer_per_proc;
user_buf=(void *) ((char *)user_buf+ptr_dif);
/* get the pointer to the ML buffer */
library_buf= (void *)
((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
/* copy the data */
memcpy(library_buf, user_buf, bytes_per_proc);
}
return OMPI_SUCCESS;
}