1
1
openmpi/orte/test/system/ofi_stress.c
Ralph Castain a2919174d0 Bring the RML modifications across. This is the first step in a revamp of the ORTE messaging subsystem to support fabric-based communications during launch and wireup phases. When completed, the grpcomm and plm frameworks will each have their own "conduit" for communication - each conduit corresponds to a particular RML messaging transport. This can be the active OOB-based component, or a provider from within the RML/OFI component. Messages sent down the conduit will flow across the associated transport.
Multiple conduits can exist at the same time, and can even point to the same base transport. Each conduit can have its own characteristics (e.g., flow control) based on the info keys provided to the "open_conduit" call. For ease during the transition period, the "legacy" RML interfaces remain as wrappers over the new conduit-based APIs using a default conduit opened during orte_init - this default conduit is tied to the OOB framework so that current behaviors are preserved. Once the transition has been completed, a one-time cleanup will be done to update all RML calls to the new APIs and the "legacy" interfaces will be deleted.

While we are at it: Remove oob/usock component to eliminate the TMPDIR length problem - get all working, including oob_stress
2016-10-11 16:01:02 -07:00

122 строки
3.7 KiB
C

#include "orte_config.h"
#include <stdio.h>
#include <signal.h>
#include <math.h>
#include <sys/time.h>
#include "opal/runtime/opal_progress.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_wait.h"
#define MY_TAG 12345
#define MAX_COUNT 3
static bool msg_recvd;
static volatile bool msg_active;
static void send_callback(int status, orte_process_name_t *peer,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
OBJ_RELEASE(buffer);
if (ORTE_SUCCESS != status) {
exit(1);
}
msg_active = false;
}
int
main(int argc, char *argv[]){
int count;
int msgsize;
uint8_t *msg;
int i, j, rc;
orte_process_name_t peer;
double maxpower;
opal_buffer_t *buf;
orte_rml_recv_cb_t blob;
int sock_conduit_id = 0; //use the first conduit
struct timeval start, end;
/*
* Init
*/
orte_init(&argc, &argv, ORTE_PROC_NON_MPI);
if (argc > 1) {
count = atoi(argv[1]);
if (count < 0) {
count = INT_MAX-1;
}
} else {
count = MAX_COUNT;
}
peer.jobid = ORTE_PROC_MY_NAME->jobid;
peer.vpid = ORTE_PROC_MY_NAME->vpid + 1;
if (peer.vpid == orte_process_info.num_procs) {
peer.vpid = 0;
}
gettimeofday(&start, NULL);
for (j=1; j < count+1; j++) {
/* rank0 starts ring */
if (ORTE_PROC_MY_NAME->vpid == 0) {
/* setup the initiating buffer - put random sized message in it */
buf = OBJ_NEW(opal_buffer_t);
maxpower = (double)(j%7);
msgsize = (int)pow(10.0, maxpower);
opal_output(0, "Ring %d message size %d bytes", j, msgsize);
msg = (uint8_t*)malloc(msgsize);
opal_dss.pack(buf, msg, msgsize, OPAL_BYTE);
free(msg);
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, orte_rml_send_callback, NULL);
/* wait for it to come around */
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
blob.active = true;
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
ORTE_RML_NON_PERSISTENT,
orte_rml_recv_callback, &blob);
ORTE_WAIT_FOR_COMPLETION(blob.active);
OBJ_DESTRUCT(&blob);
opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
} else {
/* wait for msg */
OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t);
blob.active = true;
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG,
ORTE_RML_NON_PERSISTENT,
orte_rml_recv_callback, &blob);
ORTE_WAIT_FOR_COMPLETION(blob.active);
opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, ORTE_NAME_PRINT(&blob.name));
/* send it along */
buf = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(buf, &blob.data);
OBJ_DESTRUCT(&blob);
msg_active = true;
orte_rml.send_buffer_transport_nb(sock_conduit_id,&peer, buf, MY_TAG, send_callback, NULL);
ORTE_WAIT_FOR_COMPLETION(msg_active);
}
}
gettimeofday(&end, NULL);
orte_finalize();
printf("start: %d secs, %d usecs\n",start.tv_sec,start.tv_usec);
printf("end: %d secs, %d usecs\n",end.tv_sec,end.tv_usec);
printf("Total minutes = %d, Total seconds = %d", (end.tv_sec - start.tv_sec)/60, (end.tv_sec - start.tv_sec) );
return 0;
}