1
1

Bring orte tests up to date with revised rml system.

Make first cut at fixing non-direct xcast modes

This commit was SVN r15553.
Этот коммит содержится в:
Ralph Castain 2007-07-23 13:05:34 +00:00
родитель c3be7376c5
Коммит 6c800d452d
4 изменённых файлов: 18 добавлений и 31 удалений

Просмотреть файл

@ -478,42 +478,28 @@ static int xcast_linear(orte_jobid_t job,
*/ */
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active += range; orte_grpcomm_basic.num_active += range;
if (orte_process_info.daemon ||
orte_process_info.seed ||
orte_process_info.singleton) {
/* we never send to ourselves,
* so we need to adjust the number of sends
* we are expecting to complete
*/
orte_grpcomm_basic.num_active--;
if (orte_grpcomm_basic.num_active <= 0) {
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
rc = ORTE_SUCCESS;
goto CLEANUP;
}
}
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
/* send the message to each daemon as fast as we can */ /* send the message to each daemon as fast as we can */
dummy.jobid = 0; dummy.jobid = 0;
for (i=0; i < range; i++) { for (i=0; i < range; i++) {
if (ORTE_PROC_MY_NAME->vpid != i) { /* don't send to myself */ dummy.vpid = i;
dummy.vpid = i; opal_output(orte_grpcomm_basic.output, "%s xcast to %s",
if (0 > (rc = orte_rml.send_buffer_nb(&dummy, buf, ORTE_RML_TAG_ORTED_ROUTED, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&dummy));
0, xcast_send_cb, NULL))) { if (0 > (rc = orte_rml.send_buffer_nb(&dummy, buf, ORTE_RML_TAG_ORTED_ROUTED,
if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) { 0, xcast_send_cb, NULL))) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) {
rc = ORTE_ERR_COMM_FAILURE; ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); rc = ORTE_ERR_COMM_FAILURE;
orte_grpcomm_basic.num_active -= (range-i);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
goto CLEANUP;
}
/* decrement the number we are waiting to see */
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex); OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active--; orte_grpcomm_basic.num_active -= (range-i);
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
goto CLEANUP;
} }
/* decrement the number we are waiting to see */
OPAL_THREAD_LOCK(&orte_grpcomm_basic.mutex);
orte_grpcomm_basic.num_active--;
OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex);
} }
} }
rc = ORTE_SUCCESS; rc = ORTE_SUCCESS;

Просмотреть файл

@ -91,7 +91,7 @@ main(int argc, char *argv[]){
&msg, &msg,
1, 1,
MY_TAG, MY_TAG,
MCA_OOB_ALLOC) ) { ORTE_RML_ALLOC) ) {
printf("error A... %d\n", __LINE__); printf("error A... %d\n", __LINE__);
} }

Просмотреть файл

@ -20,6 +20,7 @@
#include "orte/mca/smr/smr.h" #include "orte/mca/smr/smr.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/grpcomm/grpcomm.h"
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
@ -93,7 +94,7 @@ int main(int argc, char* argv[])
} }
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */ /* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
if (ORTE_SUCCESS != (rc = orte_rml.xcast_gate(orte_gpr.deliver_notify_msg))) { if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast_gate(orte_gpr.deliver_notify_msg))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
error = "failed to see all procs register\n"; error = "failed to see all procs register\n";
goto error; goto error;

Просмотреть файл

@ -30,7 +30,7 @@ int main(int argc, char* argv[])
pid = getpid(); pid = getpid();
/* wait for message from our parent */ /* wait for message from our parent */
if (0 > orte_rml.recv(ORTE_NAME_WILDCARD, &msg, 1, MY_TAG, MCA_OOB_ALLOC)) { if (0 > orte_rml.recv(ORTE_NAME_WILDCARD, &msg, 1, MY_TAG, ORTE_RML_ALLOC)) {
printf("error at line %d\n", __LINE__); printf("error at line %d\n", __LINE__);
} }