diff --git a/ompi/mca/pml/ob1/Makefile.am b/ompi/mca/pml/ob1/Makefile.am index d0044bb6b6..3364d001b1 100644 --- a/ompi/mca/pml/ob1/Makefile.am +++ b/ompi/mca/pml/ob1/Makefile.am @@ -47,7 +47,14 @@ ob1_sources = \ pml_ob1_recvreq.h \ pml_ob1_sendreq.c \ pml_ob1_sendreq.h \ - pml_ob1_start.c + pml_ob1_start.c \ + custommatch/pml_ob1_custom_match.h \ + custommatch/pml_ob1_custom_match_arrays.h \ + custommatch/pml_ob1_custom_match_vectors.h \ + custommatch/pml_ob1_custom_match_linkedlist.h \ + custommatch/pml_ob1_custom_match_fuzzy512-byte.h \ + custommatch/pml_ob1_custom_match_fuzzy512-short.h \ + custommatch/pml_ob1_custom_match_fuzzy512-word.h # If we have CUDA support requested, build the CUDA file also if OPAL_cuda_support diff --git a/ompi/mca/pml/ob1/configure.m4 b/ompi/mca/pml/ob1/configure.m4 index a8a8ad1e04..a05ce86156 100644 --- a/ompi/mca/pml/ob1/configure.m4 +++ b/ompi/mca/pml/ob1/configure.m4 @@ -22,6 +22,44 @@ AC_DEFUN([MCA_ompi_pml_ob1_POST_CONFIG], [ # ------------------------------------------------ # We can always build, unless we were explicitly disabled. AC_DEFUN([MCA_ompi_pml_ob1_CONFIG],[ + OPAL_VAR_SCOPE_PUSH([pml_ob1_matching_engine]) + AC_ARG_WITH([pml-ob1-matching], [AC_HELP_STRING([--with-pml-ob1-matching=type], + [Configure pml/ob1 to use an alternate matching engine. Only valid on x86_64 systems. + Valid values are: none, default, arrays, fuzzy-byte, fuzzy-short, fuzzy-word, vector (default: none)])]) + + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_NONE + + if test -n "$with_pml_ob1_matching" ; then + case $with_pml_ob1_matching in + none) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_NONE + ;; + default) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_LINKEDLIST + ;; + arrays) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_ARRAYS + ;; + fuzzy-byte) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_BYTE + ;; + fuzzy-short) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_SHORT + ;; + fuzzy-word) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_WORD + ;; + vector) + pml_ob1_matching_engine=MCA_PML_OB1_CUSTOM_MATCHING_VECTOR + ;; + *) + AC_ERROR([invalid matching type specified for --pml-ob1-matching: $with_pml_ob1_matching]) + ;; + esac + fi + + AC_DEFINE_UNQUOTED([MCA_PML_OB1_CUSTOM_MATCHING], [$pml_ob1_matching_engine], [Custom matching engine to use in pml/ob1]) + AC_CONFIG_FILES([ompi/mca/pml/ob1/Makefile]) [$1] ])dnl diff --git a/ompi/mca/pml/ob1/custommatch/arrays.h b/ompi/mca/pml/ob1/custommatch/arrays.h deleted file mode 100644 index fad65b800c..0000000000 --- a/ompi/mca/pml/ob1/custommatch/arrays.h +++ /dev/null @@ -1,590 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define PRQ_SIZE 2 - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - int32_t tags[PRQ_SIZE]; - int32_t tmask[PRQ_SIZE]; - int32_t srcs[PRQ_SIZE]; - int32_t smask[PRQ_SIZE]; - struct custom_match_prq_node* next; - int8_t start, end; - void* value[PRQ_SIZE]; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i] == req) - { -#if CUSTOM_MATCH_DEBUG - printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); -#endif - elem->tags[i] = ~0; - elem->tmask[i] = ~0; - elem->srcs[i] = ~0; - elem->smask[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - return 1; - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ - int result; -#if CUSTOM_MATCH_DEBUG - // printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - custom_match_prq_node* elem = list->head; - int i; - - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i]) - { - result = ((elem->tags[i] & elem->tmask[i]) == (tag & elem->tmask[i])) && ((elem->srcs[i] & elem->smask[i]) == (peer & elem->smask[i])); - if(result) - { - return elem->value[i]; - } - } - elem = elem->next; - } - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ - int result; -#if CUSTOM_MATCH_DEBUG - // printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i]) - { - result = ((elem->tags[i] & elem->tmask[i]) == (tag & elem->tmask[i])) && ((elem->srcs[i] & elem->smask[i]) == (peer & elem->smask[i])); - if(result) - { - void* payload = elem->value[i]; - elem->tags[i] = ~0; - elem->tmask[i] = ~0; - elem->srcs[i] = ~0; - elem->smask[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - //printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); - return payload; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int32_t mask_tag, mask_src; - if(source == OMPI_ANY_SOURCE) - { - mask_src = 0; - } - else - { - mask_src = ~0; - } - if(tag == OMPI_ANY_TAG) - { - mask_tag = 0; - } - else - { - mask_tag = ~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_append list: %x mask: %x tag: %x peer: %x\n", list, mask_tag, tag, source); -#endif - int i; - custom_match_prq_node* elem; - if((!list->tail) || list->tail->end == PRQ_SIZE-1) - { - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = malloc(sizeof(custom_match_prq_node)); - } - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < PRQ_SIZE; i++) - { - elem->value[i] = 0; - elem->tags[i] = ~0; // TODO: we only have to do this type of initialization for freshly malloc'd entries. - elem->tmask[i] = ~0; - elem->srcs[i] = ~0; - elem->smask[i] = ~0; - } - - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - elem->tags[elem->end] = tag; - elem->tmask[elem->end] = mask_tag; - elem->srcs[elem->end] = source; - elem->smask[elem->end] = mask_src; - elem->value[elem->end] = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG - printf("Exiting custom_match_prq_append\n"); -#endif -} - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = malloc(sizeof(custom_match_prq)); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - free(elem); - } - free(list); -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list (this is currenly only partialy implemented):\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < PRQ_SIZE; j++) - { - printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, elem->tags[j], elem->tmask[j], elem->value[j]); - } - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < PRQ_SIZE; j++) - { - if(elem->value[j]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } - } -} - - -// UMQ below. - -#define UMQ_SIZE 3 - -typedef struct custom_match_umq_node -{ - int32_t tags[UMQ_SIZE]; - int32_t srcs[UMQ_SIZE]; - struct custom_match_umq_node* next; - int8_t start, end; - void* value[UMQ_SIZE]; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ - int result; -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -#endif - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int i; - - int tmask = ~0; - int smask = ~0; - if(peer == OMPI_ANY_SOURCE) - { - smask = 0; - } - - if(tag == OMPI_ANY_TAG) - { - tmask = 0; - } - - - tag = tag & tmask; - peer = peer & smask; - - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i]) - { - result = (tag == (elem->tags[i] & tmask)) && (peer == (elem->srcs[i] & smask)); - if(result) - { - *hold_prev = prev; - *hold_elem = elem; - *hold_index = i; - return elem->value[i]; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - elem->tags[i] = ~0; - elem->srcs[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_append list: %x payload: %x tag: %d src: %d\n", list, payload, tag, source); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if((!list->tail) || list->tail->end == UMQ_SIZE-1) - { - if(list->pool) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Grab an element from the pool\n"); -#endif - elem = list->pool; - list->pool = list->pool->next; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Make a new element\n"); -#endif - elem = malloc(sizeof(custom_match_umq_node)); - } - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < UMQ_SIZE; i++) - { - elem->tags[i] = 0; - elem->srcs[i] = 0; - elem->value[i] = 0; - } - if(list->tail) - { - //printf("Append to list of elems\n"); - list->tail->next = elem; - list->tail = elem; - } - else - { - //printf("New Elem is only Elem\n"); - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - elem->tags[elem->end] = tag; - elem->srcs[elem->end] = source; - elem->value[elem->end] = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = malloc(sizeof(custom_match_umq)); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - free(elem); - } - free(list); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < UMQ_SIZE; j++) - { - if(elem->value[j]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; - printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/custommatch/fuzzy512-byte.h b/ompi/mca/pml/ob1/custommatch/fuzzy512-byte.h deleted file mode 100644 index b6970e8b28..0000000000 --- a/ompi/mca/pml/ob1/custommatch/fuzzy512-byte.h +++ /dev/null @@ -1,595 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - __m512i keys; - __m512i mask; - struct custom_match_prq_node* next; - int start, end; - void* value[64]; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - __mmask64 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i] == req) - { -#if CUSTOM_MATCH_DEBUG - printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); -#endif - void* payload = elem->value[i]; - ((int8_t*)(&(elem->keys)))[i] = ~0; - ((int8_t*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - return 1; - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - __mmask64 result = 0; - custom_match_prq_node* elem = list->head; - int i; - int8_t key = peer ^ tag; - __m512i search = _mm512_set1_epi8(key); - while(elem) - { - result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return elem->value[i]; - } - } - } - } - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - __mmask64 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - int8_t key = peer ^ tag; - __m512i search = _mm512_set1_epi8(key); - while(elem) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - for(int iter = elem->start; iter <= elem->end; iter++) - { - printf("Search = %x, Element Key = %x, Element mask = %x\n", ((int8_t*) &search)[iter], ((int8_t*) &elem->keys)[iter], ((int8_t*) &elem->mask)[iter]); - } -#endif - result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Search Result: %lx\n",result); -#endif - if(result) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Search Result: %lx\n",result); -#endif - for(i = elem->start; i <= elem->end; i++) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if(((0x1l << i) & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) - { - void* payload = elem->value[i]; - ((int8_t*)(&(elem->keys)))[i] = ~0; - ((int8_t*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Index: %d Found list: %x tag: %x peer: %x\n", i, list, req->req_tag, req->req_peer); -#endif - return payload; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int8_t key, mask; - key = source ^ tag; - if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) - { - mask = 0; - } - else - { - mask = ~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); -#endif - int i; - custom_match_prq_node* elem; - if((!list->tail) || list->tail->end == 63) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Need a new element\n"); -#endif - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = _mm_malloc(sizeof(custom_match_prq_node),64); - } - elem->keys = _mm512_set1_epi8(~0); - elem->mask = _mm512_set1_epi8(~0); - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 64; i++) elem->value[i] = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int8_t*)(&(elem->keys)))[elem->end] = key; - ((int8_t*)(&(elem->mask)))[elem->end] = mask; - elem->value[elem->end] = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Exiting custom_match_prq_append\n"); -#endif -} - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 64; j++) - { - printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((int8_t*)(&(elem->keys)))[j], ((int8_t*)(&(elem->mask)))[j], elem->value[j]); - } - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 64; j++) - { - if(elem->value[j]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } - } -} - - -// UMQ below. - -typedef struct custom_match_umq_node -{ - __m512i keys; - struct custom_match_umq_node* next; - int start, end; - void* value[64]; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -#endif - __mmask64 result = 0; - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int i; - int8_t key = peer ^ tag; - int8_t mask; - if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) - { - mask = 0; - } - else - { - mask = ~0; - } - __m512i search = _mm512_set1_epi8(key); - __m512i msearch = _mm512_set1_epi8(mask); - search = _mm512_and_epi32(search, msearch); - - while(elem) - { - result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys,msearch), search); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1l << i & result) && elem->value[i]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; - if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - *hold_prev = prev; - *hold_elem = elem; - *hold_index = i; - return elem->value[i]; - } - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - ((int8_t*)(&(elem->keys)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ - int8_t key = source ^ tag; -#if CUSTOM_MATCH_DEBUG - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; - printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if((!list->tail) || list->tail->end == 63) - { - if(list->pool) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Grab an element from the pool\n"); -#endif - elem = list->pool; - list->pool = list->pool->next; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Make a new element\n"); -#endif - elem = _mm_malloc(sizeof(custom_match_umq_node),64); - } - elem->keys = _mm512_set1_epi8(~0); // TODO: we may only have to do this type of initialization for freshly malloc'd entries. - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 64; i++) elem->value[i] = 0; - if(list->tail) - { - //printf("Append to list of elems\n"); - list->tail->next = elem; - list->tail = elem; - } - else - { - //printf("New Elem is only Elem\n"); - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int8_t*)(&(elem->keys)))[elem->end] = key; - elem->value[elem->end] = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 64; j++) - { - if(elem->value[j]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; - printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/custommatch/fuzzy512-short.h b/ompi/mca/pml/ob1/custommatch/fuzzy512-short.h deleted file mode 100644 index edad1378dd..0000000000 --- a/ompi/mca/pml/ob1/custommatch/fuzzy512-short.h +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - - - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - __m512i keys; - __m512i mask; - struct custom_match_prq_node* next; - int start, end; - void* value[32]; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - __mmask32 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i] == req) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); -#endif - void* payload = elem->value[i]; - ((short*)(&(elem->keys)))[i] = ~0; - ((short*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - return 1; - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - __mmask32 result = 0; - custom_match_prq_node* elem = list->head; - int i; - int16_t key = peer ^ tag; - __m512i search = _mm512_set1_epi16(key); - while(elem) - { - result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return elem->value[i]; - } - } - } - } - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - __mmask32 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - int16_t key = peer ^ tag; - __m512i search = _mm512_set1_epi16(key); - while(elem) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - for(int iter = elem->start; iter <= elem->end; iter++) - { - printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); - } -#endif - result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) - { - void* payload = elem->value[i]; - ((short*)(&(elem->keys)))[i] = ~0; - ((short*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return payload; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int16_t key, mask; - key = source ^ tag; - if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) - { - mask = 0; - } - else - { - mask = ~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); -#endif - int i; - custom_match_prq_node* elem; - if((!list->tail) || list->tail->end == 31) - { - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = _mm_malloc(sizeof(custom_match_prq_node),64); - } - elem->keys = _mm512_set1_epi16(~0); - elem->mask = _mm512_set1_epi16(~0); - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 32; i++) elem->value[i] = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((short*)(&(elem->keys)))[elem->end] = key; - ((short*)(&(elem->mask)))[elem->end] = mask; - elem->value[elem->end] = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Exiting custom_match_prq_append\n"); -#endif -} - - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 32; j++) - { - printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((short*)(&(elem->keys)))[j], ((short*)(&(elem->mask)))[j], elem->value[j]); - } - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 32; j++) - { - if(elem->value[j]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } - } -} - - -// UMQ below. - -typedef struct custom_match_umq_node -{ - __m512i keys; - struct custom_match_umq_node* next; - int start, end; - void* value[32]; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); - custom_match_umq_dump(list); -#endif - __mmask32 result = 0; - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int i; - int16_t key = peer ^ tag; - __m512i search = _mm512_set1_epi16(key); - - int16_t mask = ~0; - if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) - { - mask = 0; - } - else - { - mask = ~0; - } - __m512i msearch = _mm512_set1_epi16(mask); - search = _mm512_and_epi32(search, msearch); - - while(elem) - { - result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys,msearch), search); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; - if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - *hold_prev = prev; - *hold_elem = elem; - *hold_index = i; - return elem->value[i]; - } - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - ((short*)(&(elem->keys)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ - int16_t key = source ^ tag; -#if CUSTOM_MATCH_DEBUG_VERBOSE - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; - printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if((!list->tail) || list->tail->end == 31) - { - if(list->pool) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Grab an element from the pool\n"); -#endif - elem = list->pool; - list->pool = list->pool->next; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Make a new element\n"); -#endif - elem = _mm_malloc(sizeof(custom_match_umq_node),64); - } - elem->keys = _mm512_set1_epi16(~0); // TODO: we may only have to do this type of initialization for freshly malloc'd entries. - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 32; i++) elem->value[i] = 0; - if(list->tail) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Append to list of elems\n"); -#endif - list->tail->next = elem; - list->tail = elem; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("New Elem is only Elem\n"); -#endif - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((short*)(&(elem->keys)))[elem->end] = key; - elem->value[elem->end] = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 32; j++) - { - if(elem->value[j]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; - printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/custommatch/fuzzy512-word.h b/ompi/mca/pml/ob1/custommatch/fuzzy512-word.h deleted file mode 100644 index a74c3a7347..0000000000 --- a/ompi/mca/pml/ob1/custommatch/fuzzy512-word.h +++ /dev/null @@ -1,610 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - __m512i keys; - __m512i mask; - struct custom_match_prq_node* next; - int start, end; - void* value[16]; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - __mmask16 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i] == req) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); -#endif - void* payload = elem->value[i]; - ((int*)(&(elem->keys)))[i] = ~0; - ((int*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - return 1; - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - __mmask16 result = 0; - custom_match_prq_node* elem = list->head; - int i; - int32_t key = peer; - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... - __m512i search = _mm512_set1_epi32(key); - while(elem) - { - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return elem->value[i]; - } - } - } - } - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - __mmask16 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - int32_t key = peer; - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... - __m512i search = _mm512_set1_epi32(key); - while(elem) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - for(int iter = elem->start; iter <= elem->end; iter++) - { - printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); - } -#endif - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) - { - void* payload = elem->value[i]; - ((int*)(&(elem->keys)))[i] = ~0; - ((int*)(&(elem->mask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return payload; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int32_t key, mask; - if(source == OMPI_ANY_SOURCE) - { - key = source; - mask = 0; - } - else - { - key = source; - mask = ~0; - } - if(tag == OMPI_ANY_TAG) - { - ((int8_t*)&key)[3] = (int8_t)tag; - ((int8_t*)&mask)[3] = (int8_t)0; - } - else - { - ((int8_t*)&key)[3] = (int8_t)tag; - ((int8_t*)&mask)[3] = (int8_t)~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); -#endif - int i; - custom_match_prq_node* elem; - if((!list->tail) || list->tail->end == 15) - { - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = _mm_malloc(sizeof(custom_match_prq_node),64); - } - elem->keys = _mm512_set1_epi32(~0); - elem->mask = _mm512_set1_epi32(~0); - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 16; i++) elem->value[i] = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int*)(&(elem->keys)))[elem->end] = key; - ((int*)(&(elem->mask)))[elem->end] = mask; - elem->value[elem->end] = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Exiting custom_match_prq_append\n"); -#endif -} - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((int*)(&(elem->keys)))[j], ((int*)(&(elem->mask)))[j], elem->value[j]); - } - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - if(elem->value[j]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } - } -} - - -// UMQ below. - -typedef struct custom_match_umq_node -{ - __m512i keys; - struct custom_match_umq_node* next; - int start, end; - void* value[16]; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); - custom_match_umq_dump(list); -#endif - __mmask16 result = 0; - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int i; - int32_t key = peer; - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... - __m512i search = _mm512_set1_epi32(key); - - int32_t mask = ~0; - if(peer == OMPI_ANY_SOURCE) - { - mask = 0; - } - else - { - mask = ~0; - } - if(tag == OMPI_ANY_TAG) - { - ((int8_t*)&mask)[3] = (int8_t)0; - } - else - { - ((int8_t*)&mask)[3] = (int8_t)~0; - } - __m512i msearch = _mm512_set1_epi32(mask); - search = _mm512_and_epi32(search, msearch); - - while(elem) - { - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys,msearch), search); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; - if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - *hold_prev = prev; - *hold_elem = elem; - *hold_index = i; - return elem->value[i]; - } - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - ((int*)(&(elem->keys)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ - int32_t key = source; - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... -#if CUSTOM_MATCH_DEBUG_VERBOSE - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; - printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if((!list->tail) || list->tail->end == 15) - { - if(list->pool) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Grab an element from the pool\n"); -#endif - elem = list->pool; - list->pool = list->pool->next; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Make a new element\n"); -#endif - elem = _mm_malloc(sizeof(custom_match_umq_node),64); - } - elem->keys = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 16; i++) elem->value[i] = 0; - if(list->tail) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Append to list of elems\n"); -#endif - list->tail->next = elem; - list->tail = elem; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("New Elem is only Elem\n"); -#endif - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int*)(&(elem->keys)))[elem->end] = key; - elem->value[elem->end] = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - if(elem->value[j]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; - printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/custommatch/linkedlist.h b/ompi/mca/pml/ob1/custommatch/linkedlist.h deleted file mode 100644 index c1db925d62..0000000000 --- a/ompi/mca/pml/ob1/custommatch/linkedlist.h +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - int tag; - int tmask; - int src; - int smask; - struct custom_match_prq_node* next; - void* value; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - if(elem->value == req) - { - // printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); - elem->tag = ~0; - elem->tmask = ~0; - elem->src = ~0; - elem->smask = ~0; - elem->value = 0; - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - list->size--; - return 1; - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - custom_match_prq_node* elem = list->head; - int result; - - while(elem) - { - result = ((elem->tag & elem->tmask) == (tag & elem->tmask)) && - ((elem->src & elem->smask) == (peer & elem->smask)); - if(result) - { - return elem->value; - } - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int result; - while(elem) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - for(int iter = elem->start; iter <= elem->end; iter++) - { - printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); - } -#endif - result = ((elem->tag & elem->tmask) == (tag & elem->tmask)) && - ((elem->src & elem->smask) == (peer & elem->smask)); - if(result) - { - void* payload = elem->value; - elem->tag = ~0; - elem->tmask = ~0; - elem->src = ~0; - elem->smask = ~0; - elem->value = 0; - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("%x == %x added to the pool\n", elem, list->pool); -#endif - list->size--; - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return payload; - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int32_t mask_tag, mask_src; - if(source == OMPI_ANY_SOURCE) - { - mask_src = 0; - } - else - { - mask_src = ~0; - } - if(tag == OMPI_ANY_TAG) - { - mask_tag = 0; - } - else - { - mask_tag = ~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_append list: %x tag: %x soruce: %x tag: %x peer: %x\n", list, tag, source, req->req_tag, req->req_peer); -#endif - int i; - custom_match_prq_node* elem; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("%x next elem in the pool\n", list->pool); -#endif - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = malloc(sizeof(custom_match_prq_node)); - } - elem->next = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - - elem = list->tail; - elem->tag = tag; - elem->tmask = mask_tag; - elem->src = source; - elem->smask = mask_src; - elem->value = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Exiting custom_match_prq_append\n"); -#endif -} - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = malloc(sizeof(custom_match_prq)); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - int i = 0; - int j = 0; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - free(elem); - i++; - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - free(elem); - j++; - } - free(list); -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Number of prq elements destroyed = %d %d\n", i, j); -#endif -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list (this is currenly only partialy implemented):\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - printf("%d The key is %d, the mask is %d, the value is %ld\n", i, elem->tag, elem->tmask, elem->value); - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - if(elem->value) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } -} - - -// UMQ below. - -typedef struct custom_match_umq_node -{ - int tag; - int src; - struct custom_match_umq_node* next; - void* value; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); - custom_match_umq_dump(list); -#endif - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int result; - - int tmask = ~0; - int smask = ~0; - if(peer == OMPI_ANY_SOURCE) - { - smask = 0; - } - - if(tag == OMPI_ANY_TAG) - { - tmask = 0; - } - - tag = tag & tmask; - peer = peer & smask; - - while(elem) - { - result = ((elem->tag & tmask) == tag) && - ((elem->src & smask) == peer); - if(result) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - *hold_prev = prev; - *hold_elem = elem; - *hold_index = 0; - return elem->value; - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - elem->tag = ~0; - elem->src = ~0; - elem->value = 0; - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_append list: %x payload: %x tag: %d src: %d\n", list, payload, tag, source); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if(list->pool) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Grab an element from the pool\n"); -#endif - elem = list->pool; - list->pool = list->pool->next; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Make a new element\n"); -#endif - elem = malloc(sizeof(custom_match_umq_node)); - } - elem->next = 0; - if(list->tail) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Append to list of elems\n"); -#endif - list->tail->next = elem; - list->tail = elem; - } - else - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("New Elem is only Elem\n"); -#endif - list->head = elem; - list->tail = elem; - } - - elem = list->tail; - elem->tag = tag; - elem->src = source; - elem->value = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = malloc(sizeof(custom_match_umq)); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - int i = 0; - int j = 0; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - free(elem); - i++; - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - free(elem); - j++; - } - free(list); - printf("Number of umq elements destroyed = %d %d\n", i, j); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - if(elem->value) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value; - printf("%x %x %x\n", elem->value, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match.h new file mode 100644 index 0000000000..5a2fba20a3 --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match.h @@ -0,0 +1,58 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_H +#define PML_OB1_CUSTOM_MATCH_H + +#include "ompi_config.h" +#include "ompi/mca/pml/ob1/pml_ob1.h" + +#define CUSTOM_MATCH_DEBUG 1 +#define CUSTOM_MATCH_DEBUG_VERBOSE 1 + +/** + * Custom match types + */ +#define MCA_PML_OB1_CUSTOM_MATCHING_NONE 0 +#define MCA_PML_OB1_CUSTOM_MATCHING_LINKEDLIST 1 +#define MCA_PML_OB1_CUSTOM_MATCHING_ARRAYS 2 +#define MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_BYTE 3 +#define MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_SHORT 4 +#define MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_WORD 5 +#define MCA_PML_OB1_CUSTOM_MATCHING_VECTOR 6 + +#if MCA_PML_OB1_CUSTOM_MATCHING != MCA_PML_OB1_CUSTOM_MATCHING_NONE + +#define MCA_PML_OB1_CUSTOM_MATCH 1 + +#if MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_LINKEDLIST +#include "pml_ob1_custom_match_linkedlist.h" +#elif MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_ARRAYS +#include "pml_ob1_custom_match_arrays.h" +#elif MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_BYTE +#include "pml_ob1_custom_match_fuzzy512-byte.h" +#elif MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_SHORT +#include "pml_ob1_custom_match_fuzzy512-short.h" +#elif MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_FUZZY_WORD +#include "pml_ob1_custom_match_fuzzy512-word.h" +#elif MCA_PML_OB1_CUSTOM_MATCHING == MCA_PML_OB1_CUSTOM_MATCHING_VECTOR +#include "pml_ob1_custom_match_vectors.h" +#endif + +#else + +#define MCA_PML_OB1_CUSTOM_MATCH 0 + +#endif + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_arrays.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_arrays.h new file mode 100644 index 0000000000..3e59a5a35e --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_arrays.h @@ -0,0 +1,589 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_ARRAYS_H +#define PML_OB1_CUSTOM_MATCH_ARRAYS_H + +#include + +#include "../pml_ob1_recvreq.h" +#include "../pml_ob1_recvfrag.h" + +#define PRQ_SIZE 2 + +typedef struct custom_match_prq_node +{ + int32_t tags[PRQ_SIZE]; + int32_t tmask[PRQ_SIZE]; + int32_t srcs[PRQ_SIZE]; + int32_t smask[PRQ_SIZE]; + struct custom_match_prq_node* next; + int8_t start, end; + void* value[PRQ_SIZE]; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i] == req) + { +#if CUSTOM_MATCH_DEBUG + printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); +#endif + elem->tags[i] = ~0; + elem->tmask[i] = ~0; + elem->srcs[i] = ~0; + elem->smask[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + return 1; + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ + int result; +#if CUSTOM_MATCH_DEBUG + // printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + custom_match_prq_node* elem = list->head; + int i; + + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i]) + { + result = ((elem->tags[i] & elem->tmask[i]) == (tag & elem->tmask[i])) && ((elem->srcs[i] & elem->smask[i]) == (peer & elem->smask[i])); + if(result) + { + return elem->value[i]; + } + } + elem = elem->next; + } + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ + int result; +#if CUSTOM_MATCH_DEBUG + // printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i]) + { + result = ((elem->tags[i] & elem->tmask[i]) == (tag & elem->tmask[i])) && ((elem->srcs[i] & elem->smask[i]) == (peer & elem->smask[i])); + if(result) + { + void* payload = elem->value[i]; + elem->tags[i] = ~0; + elem->tmask[i] = ~0; + elem->srcs[i] = ~0; + elem->smask[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + //printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); + return payload; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int32_t mask_tag, mask_src; + if(source == OMPI_ANY_SOURCE) + { + mask_src = 0; + } + else + { + mask_src = ~0; + } + if(tag == OMPI_ANY_TAG) + { + mask_tag = 0; + } + else + { + mask_tag = ~0; + } + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_append list: %x mask: %x tag: %x peer: %x\n", list, mask_tag, tag, source); +#endif + int i; + custom_match_prq_node* elem; + if((!list->tail) || list->tail->end == PRQ_SIZE-1) + { + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = malloc(sizeof(custom_match_prq_node)); + } + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < PRQ_SIZE; i++) + { + elem->value[i] = 0; + elem->tags[i] = ~0; // TODO: we only have to do this type of initialization for freshly malloc'd entries. + elem->tmask[i] = ~0; + elem->srcs[i] = ~0; + elem->smask[i] = ~0; + } + + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + elem->tags[elem->end] = tag; + elem->tmask[elem->end] = mask_tag; + elem->srcs[elem->end] = source; + elem->smask[elem->end] = mask_src; + elem->value[elem->end] = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG + printf("Exiting custom_match_prq_append\n"); +#endif +} + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = malloc(sizeof(custom_match_prq)); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + free(elem); + } + free(list); +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list (this is currenly only partialy implemented):\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < PRQ_SIZE; j++) + { + printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, elem->tags[j], elem->tmask[j], elem->value[j]); + } + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + opal_list_item_t* item; + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < PRQ_SIZE; j++) + { + if(elem->value[j]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } + } +} + + +// UMQ below. + +#define UMQ_SIZE 3 + +typedef struct custom_match_umq_node +{ + int32_t tags[UMQ_SIZE]; + int32_t srcs[UMQ_SIZE]; + struct custom_match_umq_node* next; + int8_t start, end; + void* value[UMQ_SIZE]; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ + int result; +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +#endif + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int i; + + int tmask = ~0; + int smask = ~0; + if(peer == OMPI_ANY_SOURCE) + { + smask = 0; + } + + if(tag == OMPI_ANY_TAG) + { + tmask = 0; + } + + + tag = tag & tmask; + peer = peer & smask; + + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i]) + { + result = (tag == (elem->tags[i] & tmask)) && (peer == (elem->srcs[i] & smask)); + if(result) + { + *hold_prev = prev; + *hold_elem = elem; + *hold_index = i; + return elem->value[i]; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); +#endif + elem->tags[i] = ~0; + elem->srcs[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_append list: %x payload: %x tag: %d src: %d\n", list, payload, tag, source); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if((!list->tail) || list->tail->end == UMQ_SIZE-1) + { + if(list->pool) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Grab an element from the pool\n"); +#endif + elem = list->pool; + list->pool = list->pool->next; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Make a new element\n"); +#endif + elem = malloc(sizeof(custom_match_umq_node)); + } + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < UMQ_SIZE; i++) + { + elem->tags[i] = 0; + elem->srcs[i] = 0; + elem->value[i] = 0; + } + if(list->tail) + { + //printf("Append to list of elems\n"); + list->tail->next = elem; + list->tail = elem; + } + else + { + //printf("New Elem is only Elem\n"); + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + elem->tags[elem->end] = tag; + elem->srcs[elem->end] = source; + elem->value[elem->end] = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = malloc(sizeof(custom_match_umq)); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + free(elem); + } + free(list); +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_umq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < UMQ_SIZE; j++) + { + if(elem->value[j]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; + printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-byte.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-byte.h new file mode 100644 index 0000000000..7ca59d89ab --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-byte.h @@ -0,0 +1,594 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_BYTE_H +#define PML_OB1_CUSTOM_MATCH_FUZZY512_BYTE_H + +#include + +#include "../pml_ob1_recvreq.h" +#include "../pml_ob1_recvfrag.h" + +typedef struct custom_match_prq_node +{ + __m512i keys; + __m512i mask; + struct custom_match_prq_node* next; + int start, end; + void* value[64]; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); +#endif + __mmask64 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i] == req) + { +#if CUSTOM_MATCH_DEBUG + printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); +#endif + void* payload = elem->value[i]; + ((int8_t*)(&(elem->keys)))[i] = ~0; + ((int8_t*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + return 1; + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + __mmask64 result = 0; + custom_match_prq_node* elem = list->head; + int i; + int8_t key = peer ^ tag; + __m512i search = _mm512_set1_epi8(key); + while(elem) + { + result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return elem->value[i]; + } + } + } + } + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#endif + __mmask64 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + int8_t key = peer ^ tag; + __m512i search = _mm512_set1_epi8(key); + while(elem) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + for(int iter = elem->start; iter <= elem->end; iter++) + { + printf("Search = %x, Element Key = %x, Element mask = %x\n", ((int8_t*) &search)[iter], ((int8_t*) &elem->keys)[iter], ((int8_t*) &elem->mask)[iter]); + } +#endif + result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Search Result: %lx\n",result); +#endif + if(result) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Search Result: %lx\n",result); +#endif + for(i = elem->start; i <= elem->end; i++) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if(((0x1l << i) & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) + { + void* payload = elem->value[i]; + ((int8_t*)(&(elem->keys)))[i] = ~0; + ((int8_t*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Index: %d Found list: %x tag: %x peer: %x\n", i, list, req->req_tag, req->req_peer); +#endif + return payload; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int8_t key, mask; + key = source ^ tag; + if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) + { + mask = 0; + } + else + { + mask = ~0; + } + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); +#endif + int i; + custom_match_prq_node* elem; + if((!list->tail) || list->tail->end == 63) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Need a new element\n"); +#endif + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = _mm_malloc(sizeof(custom_match_prq_node),64); + } + elem->keys = _mm512_set1_epi8(~0); + elem->mask = _mm512_set1_epi8(~0); + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 64; i++) elem->value[i] = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int8_t*)(&(elem->keys)))[elem->end] = key; + ((int8_t*)(&(elem->mask)))[elem->end] = mask; + elem->value[elem->end] = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Exiting custom_match_prq_append\n"); +#endif +} + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 64; j++) + { + printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((int8_t*)(&(elem->keys)))[j], ((int8_t*)(&(elem->mask)))[j], elem->value[j]); + } + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + opal_list_item_t* item; + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 64; j++) + { + if(elem->value[j]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } + } +} + + +// UMQ below. + +typedef struct custom_match_umq_node +{ + __m512i keys; + struct custom_match_umq_node* next; + int start, end; + void* value[64]; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +#endif + __mmask64 result = 0; + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int i; + int8_t key = peer ^ tag; + int8_t mask; + if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) + { + mask = 0; + } + else + { + mask = ~0; + } + __m512i search = _mm512_set1_epi8(key); + __m512i msearch = _mm512_set1_epi8(mask); + search = _mm512_and_epi32(search, msearch); + + while(elem) + { + result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys,msearch), search); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1l << i & result) && elem->value[i]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; + if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + *hold_prev = prev; + *hold_elem = elem; + *hold_index = i; + return elem->value[i]; + } + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); +#endif + ((int8_t*)(&(elem->keys)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ + int8_t key = source ^ tag; +#if CUSTOM_MATCH_DEBUG + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; + printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if((!list->tail) || list->tail->end == 63) + { + if(list->pool) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Grab an element from the pool\n"); +#endif + elem = list->pool; + list->pool = list->pool->next; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Make a new element\n"); +#endif + elem = _mm_malloc(sizeof(custom_match_umq_node),64); + } + elem->keys = _mm512_set1_epi8(~0); // TODO: we may only have to do this type of initialization for freshly malloc'd entries. + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 64; i++) elem->value[i] = 0; + if(list->tail) + { + //printf("Append to list of elems\n"); + list->tail->next = elem; + list->tail = elem; + } + else + { + //printf("New Elem is only Elem\n"); + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int8_t*)(&(elem->keys)))[elem->end] = key; + elem->value[elem->end] = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_umq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 64; j++) + { + if(elem->value[j]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; + printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-short.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-short.h new file mode 100644 index 0000000000..73d47debf1 --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-short.h @@ -0,0 +1,589 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H +#define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H + +#include + +#include "../pml_ob1_recvreq.h" +#include "../pml_ob1_recvfrag.h" + +typedef struct custom_match_prq_node +{ + __m512i keys; + __m512i mask; + struct custom_match_prq_node* next; + int start, end; + void* value[32]; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); +#endif + __mmask32 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i] == req) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); +#endif + void* payload = elem->value[i]; + ((short*)(&(elem->keys)))[i] = ~0; + ((short*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + return 1; + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + __mmask32 result = 0; + custom_match_prq_node* elem = list->head; + int i; + int16_t key = peer ^ tag; + __m512i search = _mm512_set1_epi16(key); + while(elem) + { + result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return elem->value[i]; + } + } + } + } + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#endif + __mmask32 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + int16_t key = peer ^ tag; + __m512i search = _mm512_set1_epi16(key); + while(elem) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + for(int iter = elem->start; iter <= elem->end; iter++) + { + printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); + } +#endif + result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) + { + void* payload = elem->value[i]; + ((short*)(&(elem->keys)))[i] = ~0; + ((short*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return payload; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int16_t key, mask; + key = source ^ tag; + if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) + { + mask = 0; + } + else + { + mask = ~0; + } + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); +#endif + int i; + custom_match_prq_node* elem; + if((!list->tail) || list->tail->end == 31) + { + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = _mm_malloc(sizeof(custom_match_prq_node),64); + } + elem->keys = _mm512_set1_epi16(~0); + elem->mask = _mm512_set1_epi16(~0); + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 32; i++) elem->value[i] = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((short*)(&(elem->keys)))[elem->end] = key; + ((short*)(&(elem->mask)))[elem->end] = mask; + elem->value[elem->end] = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Exiting custom_match_prq_append\n"); +#endif +} + + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 32; j++) + { + printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((short*)(&(elem->keys)))[j], ((short*)(&(elem->mask)))[j], elem->value[j]); + } + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + opal_list_item_t* item; + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 32; j++) + { + if(elem->value[j]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } + } +} + + +// UMQ below. + +typedef struct custom_match_umq_node +{ + __m512i keys; + struct custom_match_umq_node* next; + int start, end; + void* value[32]; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); + custom_match_umq_dump(list); +#endif + __mmask32 result = 0; + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int i; + int16_t key = peer ^ tag; + __m512i search = _mm512_set1_epi16(key); + + int16_t mask = ~0; + if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG) + { + mask = 0; + } + else + { + mask = ~0; + } + __m512i msearch = _mm512_set1_epi16(mask); + search = _mm512_and_epi32(search, msearch); + + while(elem) + { + result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys,msearch), search); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; + if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + *hold_prev = prev; + *hold_elem = elem; + *hold_index = i; + return elem->value[i]; + } + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); +#endif + ((short*)(&(elem->keys)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ + int16_t key = source ^ tag; +#if CUSTOM_MATCH_DEBUG_VERBOSE + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; + printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if((!list->tail) || list->tail->end == 31) + { + if(list->pool) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Grab an element from the pool\n"); +#endif + elem = list->pool; + list->pool = list->pool->next; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Make a new element\n"); +#endif + elem = _mm_malloc(sizeof(custom_match_umq_node),64); + } + elem->keys = _mm512_set1_epi16(~0); // TODO: we may only have to do this type of initialization for freshly malloc'd entries. + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 32; i++) elem->value[i] = 0; + if(list->tail) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Append to list of elems\n"); +#endif + list->tail->next = elem; + list->tail = elem; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("New Elem is only Elem\n"); +#endif + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((short*)(&(elem->keys)))[elem->end] = key; + elem->value[elem->end] = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_umq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 32; j++) + { + if(elem->value[j]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; + printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-word.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-word.h new file mode 100644 index 0000000000..35ff477f2a --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-word.h @@ -0,0 +1,607 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H +#define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H + +#include + +#include "ompi/mca/pml/ob1/pml_ob1_recvfrag.h" +#include "ompi/mca/pml/ob1/pml_ob1_recvreq.h" + +typedef struct custom_match_prq_node +{ + __m512i keys; + __m512i mask; + struct custom_match_prq_node* next; + int start, end; + void* value[16]; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i] == req) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); +#endif + ((int*)(&(elem->keys)))[i] = ~0; + ((int*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + return 1; + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + __mmask16 result = 0; + custom_match_prq_node* elem = list->head; + int i; + int32_t key = peer; + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... + __m512i search = _mm512_set1_epi32(key); + while(elem) + { + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return elem->value[i]; + } + } + } + } + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#endif + __mmask16 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + int32_t key = peer; + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... + __m512i search = _mm512_set1_epi32(key); + while(elem) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + for(int iter = elem->start; iter <= elem->end; iter++) + { + printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); + } +#endif + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) + { + void* payload = elem->value[i]; + ((int*)(&(elem->keys)))[i] = ~0; + ((int*)(&(elem->mask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return payload; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int32_t key, mask; + if(source == OMPI_ANY_SOURCE) + { + key = source; + mask = 0; + } + else + { + key = source; + mask = ~0; + } + if(tag == OMPI_ANY_TAG) + { + ((int8_t*)&key)[3] = (int8_t)tag; + ((int8_t*)&mask)[3] = (int8_t)0; + } + else + { + ((int8_t*)&key)[3] = (int8_t)tag; + ((int8_t*)&mask)[3] = (int8_t)~0; + } +#if CUSTOM_MATCH_DEBUG_VERBOSE + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; + printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); +#endif + int i; + custom_match_prq_node* elem; + if((!list->tail) || list->tail->end == 15) + { + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = _mm_malloc(sizeof(custom_match_prq_node),64); + } + elem->keys = _mm512_set1_epi32(~0); + elem->mask = _mm512_set1_epi32(~0); + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 16; i++) elem->value[i] = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int*)(&(elem->keys)))[elem->end] = key; + ((int*)(&(elem->mask)))[elem->end] = mask; + elem->value[elem->end] = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Exiting custom_match_prq_append\n"); +#endif +} + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 16; j++) + { + printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->keys)))[j], ((int*)(&(elem->mask)))[j], (uintptr_t) elem->value[j]); + } + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 16; j++) + { + if(elem->value[j]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } + } +} + + +// UMQ below. + +typedef struct custom_match_umq_node +{ + __m512i keys; + struct custom_match_umq_node* next; + int start, end; + void* value[16]; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); + custom_match_umq_dump(list); +#endif + __mmask16 result = 0; + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int i; + int32_t key = peer; + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... + __m512i search = _mm512_set1_epi32(key); + + int32_t mask = ~0; + if(peer == OMPI_ANY_SOURCE) + { + mask = 0; + } + else + { + mask = ~0; + } + if(tag == OMPI_ANY_TAG) + { + ((int8_t*)&mask)[3] = (int8_t)0; + } + else + { + ((int8_t*)&mask)[3] = (int8_t)~0; + } + __m512i msearch = _mm512_set1_epi32(mask); + search = _mm512_and_epi32(search, msearch); + + while(elem) + { + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys,msearch), search); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i]; + if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG)) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + *hold_prev = prev; + *hold_elem = elem; + *hold_index = i; + return elem->value[i]; + } + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); +#endif + ((int*)(&(elem->keys)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ + int32_t key = source; + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... +#if CUSTOM_MATCH_DEBUG_VERBOSE + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; + printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if((!list->tail) || list->tail->end == 15) + { + if(list->pool) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Grab an element from the pool\n"); +#endif + elem = list->pool; + list->pool = list->pool->next; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Make a new element\n"); +#endif + elem = _mm_malloc(sizeof(custom_match_umq_node),64); + } + elem->keys = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 16; i++) elem->value[i] = 0; + if(list->tail) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Append to list of elems\n"); +#endif + list->tail->next = elem; + list->tail = elem; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("New Elem is only Elem\n"); +#endif + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int*)(&(elem->keys)))[elem->end] = key; + elem->value[elem->end] = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_umq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 16; j++) + { + if(elem->value[j]) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; + printf("%lx %x %x\n", (uintptr_t) elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_linkedlist.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_linkedlist.h new file mode 100644 index 0000000000..042959c9c3 --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_linkedlist.h @@ -0,0 +1,526 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_LINKEDLIST_H +#define PML_OB1_CUSTOM_MATCH_LINKEDLIST_H + +#include "../pml_ob1_recvreq.h" +#include "../pml_ob1_recvfrag.h" + +typedef struct custom_match_prq_node +{ + int tag; + int tmask; + int src; + int smask; + struct custom_match_prq_node* next; + void* value; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + if(elem->value == req) + { + // printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); + elem->tag = ~0; + elem->tmask = ~0; + elem->src = ~0; + elem->smask = ~0; + elem->value = 0; + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + list->size--; + return 1; + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + custom_match_prq_node* elem = list->head; + int result; + + while(elem) + { + result = ((elem->tag & elem->tmask) == (tag & elem->tmask)) && + ((elem->src & elem->smask) == (peer & elem->smask)); + if(result) + { + return elem->value; + } + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int result; + + while(elem) + { + result = ((elem->tag & elem->tmask) == (tag & elem->tmask)) && + ((elem->src & elem->smask) == (peer & elem->smask)); + if(result) + { + void* payload = elem->value; + elem->tag = ~0; + elem->tmask = ~0; + elem->src = ~0; + elem->smask = ~0; + elem->value = 0; + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("%x == %x added to the pool\n", elem, list->pool); +#endif + list->size--; + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); +#endif + return payload; + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int32_t mask_tag, mask_src; + if(source == OMPI_ANY_SOURCE) + { + mask_src = 0; + } + else + { + mask_src = ~0; + } + if(tag == OMPI_ANY_TAG) + { + mask_tag = 0; + } + else + { + mask_tag = ~0; + } + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_append list: %x tag: %x soruce: %x tag: %x peer: %x\n", list, tag, source, req->req_tag, req->req_peer); +#endif + int i; + custom_match_prq_node* elem; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("%x next elem in the pool\n", list->pool); +#endif + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = malloc(sizeof(custom_match_prq_node)); + } + elem->next = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + + elem = list->tail; + elem->tag = tag; + elem->tmask = mask_tag; + elem->src = source; + elem->smask = mask_src; + elem->value = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Exiting custom_match_prq_append\n"); +#endif +} + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = malloc(sizeof(custom_match_prq)); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + int i = 0; + int j = 0; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + free(elem); + i++; + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + free(elem); + j++; + } + free(list); +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Number of prq elements destroyed = %d %d\n", i, j); +#endif +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list (this is currenly only partialy implemented):\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + printf("%d The key is %d, the mask is %d, the value is %ld\n", i, elem->tag, elem->tmask, elem->value); + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + opal_list_item_t* item; + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + if(elem->value) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } +} + + +// UMQ below. + +typedef struct custom_match_umq_node +{ + int tag; + int src; + struct custom_match_umq_node* next; + void* value; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); + custom_match_umq_dump(list); +#endif + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int result; + + int tmask = ~0; + int smask = ~0; + if(peer == OMPI_ANY_SOURCE) + { + smask = 0; + } + + if(tag == OMPI_ANY_TAG) + { + tmask = 0; + } + + tag = tag & tmask; + peer = peer & smask; + + while(elem) + { + result = ((elem->tag & tmask) == tag) && + ((elem->src & smask) == peer); + if(result) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %x tag: %x peer: %x\n", list, tag, peer); +#endif + *hold_prev = prev; + *hold_elem = elem; + *hold_index = 0; + return elem->value; + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); +#endif + elem->tag = ~0; + elem->src = ~0; + elem->value = 0; + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_append list: %x payload: %x tag: %d src: %d\n", list, payload, tag, source); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if(list->pool) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Grab an element from the pool\n"); +#endif + elem = list->pool; + list->pool = list->pool->next; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Make a new element\n"); +#endif + elem = malloc(sizeof(custom_match_umq_node)); + } + elem->next = 0; + if(list->tail) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Append to list of elems\n"); +#endif + list->tail->next = elem; + list->tail = elem; + } + else + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("New Elem is only Elem\n"); +#endif + list->head = elem; + list->tail = elem; + } + + elem = list->tail; + elem->tag = tag; + elem->src = source; + elem->value = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = malloc(sizeof(custom_match_umq)); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + int i = 0; + int j = 0; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + free(elem); + i++; + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + free(elem); + j++; + } + free(list); +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Number of umq elements destroyed = %d %d\n", i, j); +#endif +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_umq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + if(elem->value) + { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value; + printf("%x %x %x\n", elem->value, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_vectors.h b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_vectors.h new file mode 100644 index 0000000000..bde8e881c3 --- /dev/null +++ b/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_vectors.h @@ -0,0 +1,597 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_OB1_CUSTOM_MATCH_VECTORS_H +#define PML_OB1_CUSTOM_MATCH_VECTORS_H + +#include + +#include "../pml_ob1_recvreq.h" +#include "../pml_ob1_recvfrag.h" + +typedef struct custom_match_prq_node +{ + __m512i tags; + __m512i tmask; + __m512i srcs; + __m512i smask; + struct custom_match_prq_node* next; + int start, end; + void* value[16]; +} custom_match_prq_node; + +typedef struct custom_match_prq +{ + custom_match_prq_node* head; + custom_match_prq_node* tail; + custom_match_prq_node* pool; + int size; +} custom_match_prq; + +static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_cancel - list: %p req: %p\n", (void *) list, req); +#endif + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + while(elem) + { + for(i = elem->start; i <= elem->end; i++) + { + if(elem->value[i] == req) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); +#endif + ((int*)(&(elem->tags)))[i] = ~0; + ((int*)(&(elem->tmask)))[i] = ~0; + ((int*)(&(elem->srcs)))[i] = ~0; + ((int*)(&(elem->smask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; + return 1; + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_verify list: %p tag: %x peer: %x\n", (void *) list, tag, peer); +#endif + __mmask16 result = 0; + custom_match_prq_node* elem = list->head; + int i; + __m512i tsearch = _mm512_set1_epi32(tag); + __m512i ssearch = _mm512_set1_epi32(peer); + + while(elem) + { + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) & + _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer); +#endif + return elem->value[i]; + } + } + } + elem = elem->next; + } + return 0; +} + +static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_find_dequeue_verify list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer); +#endif + __mmask16 result = 0; + custom_match_prq_node* prev = 0; + custom_match_prq_node* elem = list->head; + int i; + __m512i tsearch = _mm512_set1_epi32(tag); + __m512i ssearch = _mm512_set1_epi32(peer); + while(elem) + { +#if CUSTOM_MATCH_DEBUG_VERBOSE + for(int iter = elem->start; iter <= elem->end; iter++) + { + //printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); + } +#endif + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) & + _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask)); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; + if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) + { + void* payload = elem->value[i]; + ((int*)(&(elem->tags)))[i] = ~0; + ((int*)(&(elem->tmask)))[i] = ~0; + ((int*)(&(elem->srcs)))[i] = ~0; + ((int*)(&(elem->smask)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer); +#endif + return payload; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) +{ + int32_t mask_tag, mask_src; + if(source == OMPI_ANY_SOURCE) + { + mask_src = 0; + } + else + { + mask_src = ~0; + } + if(tag == OMPI_ANY_TAG) + { + mask_tag = 0; + } + else + { + mask_tag = ~0; + } +#if CUSTOM_MATCH_DEBUG_VERBOSE + mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; + printf("custom_match_prq_append list: %p mask_src: %x mask_tag: %x tag: %x peer: %x\n", (void *) list, + mask_src, mask_tag, req->req_tag, req->req_peer); +#endif + int i; + custom_match_prq_node* elem; + if((!list->tail) || list->tail->end == 15) + { + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = _mm_malloc(sizeof(custom_match_prq_node),64); + //if(!elem) + //{ + // printf("Error: Couldn't create memory\n"); + //} + } + elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. + elem->tmask = _mm512_set1_epi32(~0); + elem->srcs = _mm512_set1_epi32(~0); + elem->smask = _mm512_set1_epi32(~0); + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 16; i++) elem->value[i] = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int*)(&(elem->tags)))[elem->end] = tag; + ((int*)(&(elem->tmask)))[elem->end] = mask_tag; + ((int*)(&(elem->srcs)))[elem->end] = source; + ((int*)(&(elem->smask)))[elem->end] = mask_src; + elem->value[elem->end] = payload; + list->size++; +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("Exiting custom_match_prq_append\n"); +#endif +} + +static inline int custom_match_prq_size(custom_match_prq* list) +{ + return list->size; +} + +static inline custom_match_prq* custom_match_prq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_init\n"); +#endif + custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_prq_destroy(custom_match_prq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_prq_destroy\n"); +#endif + custom_match_prq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline void custom_match_print(custom_match_prq* list) +{ + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list (this is currenly only partialy implemented):\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 16; j++) + { + printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->tags)))[j], + ((int*)(&(elem->tmask)))[j], (uintptr_t) elem->value[j]); + } + i++; + } +} + +static inline void custom_match_prq_dump(custom_match_prq* list) +{ + char cpeer[64], ctag[64]; + + custom_match_prq_node* elem; + int i = 0; + int j = 0; + printf("Elements in the list:\n"); + for(elem = list->head; elem; elem = elem->next) + { + printf("This is the %d linked list element\n", ++i); + for(j = 0; j < 16; j++) + { + if(elem->value[j]) + { + mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; + if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->req_peer); + if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->req_tag); + opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + (void*) req, cpeer, ctag, + (void*) req->req_addr, req->req_count, + (0 != req->req_count ? req->req_datatype->name : "N/A"), + (void*) req->req_datatype, + (req->req_pml_complete ? "pml_complete" : ""), + (req->req_free_called ? "freed" : ""), + req->req_sequence); + + } + } + } +} + + +// UMQ below. + +typedef struct custom_match_umq_node +{ + __m512i tags; + __m512i srcs; + struct custom_match_umq_node* next; + int start, end; + void* value[16]; +} custom_match_umq_node; + +typedef struct custom_match_umq +{ + custom_match_umq_node* head; + custom_match_umq_node* tail; + custom_match_umq_node* pool; + int size; +} custom_match_umq; + +static inline void custom_match_umq_dump(custom_match_umq* list); + +static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_verify_hold list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer); + custom_match_umq_dump(list); +#endif + __mmask16 result = 0; + custom_match_umq_node* prev = 0; + custom_match_umq_node* elem = list->head; + int i; + __m512i tsearch = _mm512_set1_epi32(tag); + __m512i ssearch = _mm512_set1_epi32(peer); + + int tmask = ~0; + int smask = ~0; + if(peer == OMPI_ANY_SOURCE) + { + smask = 0; + } + + if(tag == OMPI_ANY_TAG) + { + tmask = 0; + } + + __m512i tmasks = _mm512_set1_epi32(tmask); + __m512i smasks = _mm512_set1_epi32(smask); + + tsearch = _mm512_and_epi32(tsearch, tmasks); + ssearch = _mm512_and_epi32(ssearch, smasks); + + while(elem) + { + result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags,tmasks), tsearch) & + _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs,smasks), ssearch); + if(result) + { + for(i = elem->start; i <= elem->end; i++) + { + if((0x1 << i & result) && elem->value[i]) + { + *hold_prev = prev; + *hold_elem = elem; + *hold_index = i; + return elem->value[i]; + } + } + } + prev = elem; + elem = elem->next; + } + return 0; +} + + +static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_find_remove_hold %p %p %x\n", (void *) prev, (void *) elem, i); +#endif + ((int*)(&(elem->tags)))[i] = ~0; + ((int*)(&(elem->srcs)))[i] = ~0; + elem->value[i] = 0; + if(i == elem->start || i == elem->end) + { + while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; + while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; + if(elem->start > elem->end) + { + if(prev) + { + prev->next = elem->next; + } + else + { + list->head = elem->next; + } + if(!elem->next) + { + list->tail = prev; + } + elem->next = list->pool; + list->pool = elem; + } + } + list->size--; +} + +static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + int32_t key = source; + ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... +#endif +#if CUSTOM_MATCH_DEBUG_VERBOSE + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; + printf("custom_match_umq_append list: %p payload: %p tag: %d src: %d\n", (void *) list, payload, + req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); +#endif + int i; + custom_match_umq_node* elem; + list->size++; + if((!list->tail) || list->tail->end == 15) + { + if(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + } + else + { + elem = _mm_malloc(sizeof(custom_match_umq_node),64); + } + elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. + elem->srcs = _mm512_set1_epi32(~0); + elem->next = 0; + elem->start = 0; + elem->end = -1; // we don't have an element yet + for(i = 0; i < 16; i++) elem->value[i] = 0; + if(list->tail) + { + list->tail->next = elem; + list->tail = elem; + } + else + { + list->head = elem; + list->tail = elem; + } + } + + elem = list->tail; + elem->end++; + ((int*)(&(elem->tags)))[elem->end] = tag; + ((int*)(&(elem->srcs)))[elem->end] = source; + elem->value[elem->end] = payload; +#if CUSTOM_MATCH_DEBUG_VERBOSE + custom_match_umq_dump(list); +#endif +} + +static inline custom_match_umq* custom_match_umq_init() +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_init\n"); +#endif + custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); + list->head = 0; + list->tail = 0; + list->pool = 0; + list->size = 0; + return list; +} + +static inline void custom_match_umq_destroy(custom_match_umq* list) +{ +#if CUSTOM_MATCH_DEBUG_VERBOSE + printf("custom_match_umq_destroy\n"); +#endif + custom_match_umq_node* elem; + while(list->head) + { + elem = list->head; + list->head = list->head->next; + _mm_free(elem); + } + while(list->pool) + { + elem = list->pool; + list->pool = list->pool->next; + _mm_free(elem); + } + _mm_free(list); +} + +static inline int custom_match_umq_size(custom_match_umq* list) +{ + return list->size; +} + +static inline void custom_match_umq_dump(custom_match_umq* list) +{ + char cpeer[64], ctag[64]; + + //printf("Elements in the list:\n"); + for (custom_match_umq_node *elem = list->head; elem; elem = elem->next) { + //printf("This is the %d linked list element\n", ++i); + for (int j = 0; j < 16; j++) { + if (elem->value[j]) { + mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; + //printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); + else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); + if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); + else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); + // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, + // /*(void*) req,*/ cpeer, ctag, + //(void*) req->req_addr, req->req_count, + //(0 != req->req_count ? req->req_datatype->name : "N/A"), + //(void*) req->req_datatype, + //(req->req_pml_complete ? "pml_complete" : ""), + //(req->req_free_called ? "freed" : ""), + //req->req_sequence); + // ); + + } + } + } +} + +#endif diff --git a/ompi/mca/pml/ob1/custommatch/vectors.h b/ompi/mca/pml/ob1/custommatch/vectors.h deleted file mode 100644 index 473ab12da7..0000000000 --- a/ompi/mca/pml/ob1/custommatch/vectors.h +++ /dev/null @@ -1,601 +0,0 @@ -/* - * Copyright (c) 2018 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#ifndef __CUSTOM_MATCH -#define __CUSTOM_MATCH - -#include "../pml_ob1_recvreq.h" -#include "../pml_ob1_recvfrag.h" - -#define CUSTOM_MATCH_DEBUG 0 -#define CUSTOM_MATCH_DEBUG_VERBOSE 0 - -typedef struct custom_match_prq_node -{ - __m512i tags; - __m512i tmask; - __m512i srcs; - __m512i smask; - struct custom_match_prq_node* next; - int start, end; - void* value[16]; -} custom_match_prq_node; - -typedef struct custom_match_prq -{ - custom_match_prq_node* head; - custom_match_prq_node* tail; - custom_match_prq_node* pool; - int size; -} custom_match_prq; - -static inline int custom_match_prq_cancel(custom_match_prq* list, void* req) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_cancel - list: %x req: %x\n", list, req); -#endif - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - while(elem) - { - for(i = elem->start; i <= elem->end; i++) - { - if(elem->value[i] == req) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer); -#endif - void* payload = elem->value[i]; - ((int*)(&(elem->tags)))[i] = ~0; - ((int*)(&(elem->tmask)))[i] = ~0; - ((int*)(&(elem->srcs)))[i] = ~0; - ((int*)(&(elem->smask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; - return 1; - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer); -#endif - __mmask16 result = 0; - custom_match_prq_node* elem = list->head; - int i; - __m512i tsearch = _mm512_set1_epi32(tag); - __m512i ssearch = _mm512_set1_epi32(peer); - - while(elem) - { - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) & - _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return elem->value[i]; - } - } - } - elem = elem->next; - } - return 0; -} - -static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); -#endif - __mmask16 result = 0; - custom_match_prq_node* prev = 0; - custom_match_prq_node* elem = list->head; - int i; - __m512i tsearch = _mm512_set1_epi32(tag); - __m512i ssearch = _mm512_set1_epi32(peer); - while(elem) - { -#if CUSTOM_MATCH_DEBUG_VERBOSE - for(int iter = elem->start; iter <= elem->end; iter++) - { - //printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]); - } -#endif - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) & - _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask)); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i]; - if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))) - { - void* payload = elem->value[i]; - ((int*)(&(elem->tags)))[i] = ~0; - ((int*)(&(elem->tmask)))[i] = ~0; - ((int*)(&(elem->srcs)))[i] = ~0; - ((int*)(&(elem->smask)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer); -#endif - return payload; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source) -{ - int32_t mask_tag, mask_src; - if(source == OMPI_ANY_SOURCE) - { - mask_src = 0; - } - else - { - mask_src = ~0; - } - if(tag == OMPI_ANY_TAG) - { - mask_tag = 0; - } - else - { - mask_tag = ~0; - } - mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer); -#endif - int i; - custom_match_prq_node* elem; - if((!list->tail) || list->tail->end == 15) - { - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = _mm_malloc(sizeof(custom_match_prq_node),64); - //if(!elem) - //{ - // printf("Error: Couldn't create memory\n"); - //} - } - elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. - elem->tmask = _mm512_set1_epi32(~0); - elem->srcs = _mm512_set1_epi32(~0); - elem->smask = _mm512_set1_epi32(~0); - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 16; i++) elem->value[i] = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int*)(&(elem->tags)))[elem->end] = tag; - ((int*)(&(elem->tmask)))[elem->end] = mask_tag; - ((int*)(&(elem->srcs)))[elem->end] = source; - ((int*)(&(elem->smask)))[elem->end] = mask_src; - elem->value[elem->end] = payload; - list->size++; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("Exiting custom_match_prq_append\n"); -#endif -} - -static inline int custom_match_prq_size(custom_match_prq* list) -{ - return list->size; -} - -static inline custom_match_prq* custom_match_prq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_init\n"); -#endif - custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_prq_destroy(custom_match_prq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_prq_destroy\n"); -#endif - custom_match_prq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline void custom_match_print(custom_match_prq* list) -{ - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list (this is currenly only partialy implemented):\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((int*)(&(elem->tags)))[j], ((int*)(&(elem->tmask)))[j], elem->value[j]); - } - i++; - } -} - -static inline void custom_match_prq_dump(custom_match_prq* list) -{ - opal_list_item_t* item; - char cpeer[64], ctag[64]; - - custom_match_prq_node* elem; - int i = 0; - int j = 0; - printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - if(elem->value[j]) - { - mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j]; - if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->req_peer); - if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->req_tag); - opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - (void*) req, cpeer, ctag, - (void*) req->req_addr, req->req_count, - (0 != req->req_count ? req->req_datatype->name : "N/A"), - (void*) req->req_datatype, - (req->req_pml_complete ? "pml_complete" : ""), - (req->req_free_called ? "freed" : ""), - req->req_sequence); - - } - } - } -} - - -// UMQ below. - -typedef struct custom_match_umq_node -{ - __m512i tags; - __m512i srcs; - struct custom_match_umq_node* next; - int start, end; - void* value[16]; -} custom_match_umq_node; - -typedef struct custom_match_umq -{ - custom_match_umq_node* head; - custom_match_umq_node* tail; - custom_match_umq_node* pool; - int size; -} custom_match_umq; - -static inline void custom_match_umq_dump(custom_match_umq* list); - -static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer); - custom_match_umq_dump(list); -#endif - __mmask16 result = 0; - custom_match_umq_node* prev = 0; - custom_match_umq_node* elem = list->head; - int i; - __m512i tsearch = _mm512_set1_epi32(tag); - __m512i ssearch = _mm512_set1_epi32(peer); - - int tmask = ~0; - int smask = ~0; - if(peer == OMPI_ANY_SOURCE) - { - smask = 0; - } - - if(tag == OMPI_ANY_TAG) - { - tmask = 0; - } - - __m512i tmasks = _mm512_set1_epi32(tmask); - __m512i smasks = _mm512_set1_epi32(smask); - - tsearch = _mm512_and_epi32(tsearch, tmasks); - ssearch = _mm512_and_epi32(ssearch, smasks); - - while(elem) - { - result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags,tmasks), tsearch) & - _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs,smasks), ssearch); - if(result) - { - for(i = elem->start; i <= elem->end; i++) - { - if((0x1 << i & result) && elem->value[i]) - { - *hold_prev = prev; - *hold_elem = elem; - *hold_index = i; - return elem->value[i]; - } - } - } - prev = elem; - elem = elem->next; - } - return 0; -} - - -static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i); -#endif - ((int*)(&(elem->tags)))[i] = ~0; - ((int*)(&(elem->srcs)))[i] = ~0; - elem->value[i] = 0; - if(i == elem->start || i == elem->end) - { - while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++; - while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--; - if(elem->start > elem->end) - { - if(prev) - { - prev->next = elem->next; - } - else - { - list->head = elem->next; - } - if(!elem->next) - { - list->tail = prev; - } - elem->next = list->pool; - list->pool = elem; - } - } - list->size--; -} - -static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - int32_t key = source; - ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits... -#endif - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_append list: %x payload: %x tag: %d src: %d\n", list, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); -#endif - int i; - custom_match_umq_node* elem; - list->size++; - if((!list->tail) || list->tail->end == 15) - { - if(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - } - else - { - elem = _mm_malloc(sizeof(custom_match_umq_node),64); - } - elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries. - elem->srcs = _mm512_set1_epi32(~0); - elem->next = 0; - elem->start = 0; - elem->end = -1; // we don't have an element yet - for(i = 0; i < 16; i++) elem->value[i] = 0; - if(list->tail) - { - list->tail->next = elem; - list->tail = elem; - } - else - { - list->head = elem; - list->tail = elem; - } - } - - elem = list->tail; - elem->end++; - ((int*)(&(elem->tags)))[elem->end] = tag; - ((int*)(&(elem->srcs)))[elem->end] = source; - elem->value[elem->end] = payload; -#if CUSTOM_MATCH_DEBUG_VERBOSE - custom_match_umq_dump(list); -#endif -} - -static inline custom_match_umq* custom_match_umq_init() -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_init\n"); -#endif - custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64); - list->head = 0; - list->tail = 0; - list->pool = 0; - list->size = 0; - return list; -} - -static inline void custom_match_umq_destroy(custom_match_umq* list) -{ -#if CUSTOM_MATCH_DEBUG_VERBOSE - printf("custom_match_umq_destroy\n"); -#endif - custom_match_umq_node* elem; - while(list->head) - { - elem = list->head; - list->head = list->head->next; - _mm_free(elem); - } - while(list->pool) - { - elem = list->pool; - list->pool = list->pool->next; - _mm_free(elem); - } - _mm_free(list); -} - -static inline int custom_match_umq_size(custom_match_umq* list) -{ - return list->size; -} - -static inline void custom_match_umq_dump(custom_match_umq* list) -{ - char cpeer[64], ctag[64]; - - custom_match_umq_node* elem; - int i = 0; - int j = 0; - //printf("Elements in the list:\n"); - for(elem = list->head; elem; elem = elem->next) - { - //printf("This is the %d linked list element\n", ++i); - for(j = 0; j < 16; j++) - { - if(elem->value[j]) - { - mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j]; - //printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE"); - else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src); - if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG"); - else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag); - // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64, - // /*(void*) req,*/ cpeer, ctag, - //(void*) req->req_addr, req->req_count, - //(0 != req->req_count ? req->req_datatype->name : "N/A"), - //(void*) req->req_datatype, - //(req->req_pml_complete ? "pml_complete" : ""), - //(req->req_free_called ? "freed" : ""), - //req->req_sequence); - // ); - - } - } - } -} - -#endif diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index a665ccf9f3..54ef5a951a 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -261,7 +261,11 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) add_fragment_to_unexpected: /* We're now expecting the next sequence number. */ pml_proc->expected_sequence++; +#if !MCA_PML_OB1_CUSTOM_MATCH opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); +#else + custom_match_umq_append(pml_comm->umq, hdr->hdr_tag, hdr->hdr_src, frag); +#endif PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); /* And now the ugly part. As some fragments can be inserted in the cant_match list, @@ -533,6 +537,7 @@ static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr) header); } +#if !MCA_PML_OB1_CUSTOM_MATCH static void mca_pml_ob1_dump_frag_list(opal_list_t* queue, bool is_req) { opal_list_item_t* item; @@ -565,6 +570,7 @@ static void mca_pml_ob1_dump_frag_list(opal_list_t* queue, bool is_req) } } } +#endif void mca_pml_ob1_dump_cant_match(mca_pml_ob1_recv_frag_t* queue) { @@ -603,7 +609,7 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) #if MCA_PML_OB1_CUSTOM_MATCH opal_output(0, "expected receives\n"); - custom_match_prq_dump(pml_comm->pro); + custom_match_prq_dump(pml_comm->prq); opal_output(0, "unexpected frag\n"); custom_match_umq_dump(pml_comm->umq); #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.h b/ompi/mca/pml/ob1/pml_ob1_comm.h index 56086af012..fc5f6fa4c1 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.h +++ b/ompi/mca/pml/ob1/pml_ob1_comm.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. @@ -31,30 +31,11 @@ #include "ompi/proc/proc.h" #include "ompi/communicator/communicator.h" -#define MCA_PML_OB1_CUSTOM_MATCH 1/** TODO: move this to config parameter */ -#define MCA_PML_OB1_CUSTOM_MATCH_ARRAYS 0 -#define MCA_PML_OB1_CUSTOM_MATCH_FUZZY_BYTE 0 -#define MCA_PML_OB1_CUSTOM_MATCH_FUZZY_SHORT 0 -#define MCA_PML_OB1_CUSTOM_MATCH_FUZZY_WORD 0 -#define MCA_PML_OB1_CUSTOM_MATCH_VECTOR 0 +/* NTH: at some point we need to untangle the headers. this declaration is needed + * for headers included by the custom match code. */ +typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t; -#if MCA_PML_OB1_CUSTOM_MATCH - -#if MCA_PML_OB1_CUSTOM_MATCH_ARRAYS -#include "ompi/mca/pml/ob1/custommatch/arrays.h" -#elif MCA_PML_OB1_CUSTOM_MATCH_FUZZY_BYTE -#include "ompi/mca/pml/ob1/custommatch/fuzzy512-byte.h" -#elif MCA_PML_OB1_CUSTOM_MATCH_FUZZY_SHORT -#include "ompi/mca/pml/ob1/custommatch/fuzzy512-short.h" -#elif MCA_PML_OB1_CUSTOM_MATCH_FUZZY_WORD -#include "ompi/mca/pml/ob1/custommatch/fuzzy512-word.h" -#elif MCA_PML_OB1_CUSTOM_MATCH_VECTOR -#include "ompi/mca/pml/ob1/custommatch/vectors.h" -#else -#include "ompi/mca/pml/ob1/custommatch/linkedlist.h" //Default Custom Match is single linked list -#endif - -#endif +#include "custommatch/pml_ob1_custom_match.h" BEGIN_C_DECLS @@ -74,7 +55,6 @@ struct mca_pml_ob1_comm_proc_t { opal_list_t unexpected_frags; /**< unexpected fragment queues */ #endif }; -typedef struct mca_pml_ob1_comm_proc_t mca_pml_ob1_comm_proc_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t); diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 8cdfe23b16..d0471eb5a0 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -728,13 +728,11 @@ static mca_pml_ob1_recv_request_t *match_incomming( mca_pml_ob1_match_hdr_t *hdr, mca_pml_ob1_comm_t *comm, mca_pml_ob1_comm_proc_t *proc) { +#if !MCA_PML_OB1_CUSTOM_MATCH mca_pml_ob1_recv_request_t *specific_recv, *wild_recv; mca_pml_sequence_t wild_recv_seq, specific_recv_seq; int tag = hdr->hdr_tag; -#if MCA_PML_OB1_CUSTOM_MATCH - return custom_match_prq_find_dequeue_verify(comm->prq, hdr->hdr_tag, hdr->hdr_src); -#else specific_recv = get_posted_recv(&proc->specific_receives); wild_recv = get_posted_recv(&comm->wild_receives); @@ -773,11 +771,12 @@ static mca_pml_ob1_recv_request_t *match_incomming( } return NULL; +#else + return custom_match_prq_find_dequeue_verify(comm->prq, hdr->hdr_tag, hdr->hdr_src); #endif } - -#if MCA_PML_OB1_CUSTOM_MATCH +#if !MCA_PML_OB1_CUSTOM_MATCH static mca_pml_ob1_recv_request_t *match_incomming_no_any_source ( mca_pml_ob1_match_hdr_t *hdr, mca_pml_ob1_comm_t *comm, mca_pml_ob1_comm_proc_t *proc) @@ -816,7 +815,7 @@ match_one(mca_btl_base_module_t *btl, mca_pml_ob1_comm_t *comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; do { -#if !MCA_PML_OB1_CUSTOM_MATCH +#if MCA_PML_OB1_CUSTOM_MATCH match = match_incomming(hdr, comm, proc); #else if (!OMPI_COMM_CHECK_ASSERT_NO_ANY_SOURCE (comm_ptr)) { diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 49c9e89dc8..ea5fa6849d 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -1108,22 +1108,13 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, } #if !MCA_PML_OB1_CUSTOM_MATCH - opal_list_t* unexpected_frags = &proc->unexpected_frags; -#endif - - mca_pml_ob1_recv_frag_t* frag; int tag = req->req_recv.req_base.req_tag; + opal_list_t* unexpected_frags = &proc->unexpected_frags; + mca_pml_ob1_recv_frag_t* frag; - -#if MCA_PML_OB1_CUSTOM_MATCH - return custom_match_umq_find_verify_hold(req->req_recv.req_base.req_comm->c_pml_comm->umq, - req->req_recv.req_base.req_tag, - req->req_recv.req_base.req_peer, - hold_prev, hold_elem, hold_index); -#endif - - if(opal_list_get_size(unexpected_frags) == 0) + if(opal_list_get_size(unexpected_frags) == 0) { return NULL; + } if( OMPI_ANY_TAG == tag ) { OPAL_LIST_FOREACH(frag, unexpected_frags, mca_pml_ob1_recv_frag_t) { @@ -1137,6 +1128,12 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, } } return NULL; +#else + return custom_match_umq_find_verify_hold(req->req_recv.req_base.req_comm->c_pml_comm->umq, + req->req_recv.req_base.req_tag, + req->req_recv.req_base.req_peer, + hold_prev, hold_elem, hold_index); +#endif } /* @@ -1146,7 +1143,7 @@ recv_req_match_specific_proc( const mca_pml_ob1_recv_request_t *req, #if MCA_PML_OB1_CUSTOM_MATCH static mca_pml_ob1_recv_frag_t* recv_req_match_wild( mca_pml_ob1_recv_request_t* req, - mca_pml_ob1_comm_proc_t **p + mca_pml_ob1_comm_proc_t **p, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index) @@ -1158,27 +1155,23 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, { mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; mca_pml_ob1_comm_proc_t **procp = comm->procs; - size_t i; - #if MCA_PML_OB1_CUSTOM_MATCH mca_pml_ob1_recv_frag_t* frag; - frag = custom_match_umq_find_verify_hold(req->req_recv.req_base.req_comm->c_pml_comm->umq, - req->req_recv.req_base.req_tag, - req->req_recv.req_base.req_peer, - hold_prev, hold_elem, hold_index); + frag = custom_match_umq_find_verify_hold (comm->umq, req->req_recv.req_base.req_tag, + req->req_recv.req_base.req_peer, + hold_prev, hold_elem, hold_index); - *p = NULL; - if(frag) - { - *p = procp[frag->hdr.hdr_match.hdr_src]; - req->req_recv.req_base.req_proc = procp[frag->hdr.hdr_match.hdr_src]->ompi_proc; - prepare_recv_req_converter(req); + if (frag) { + *p = procp[frag->hdr.hdr_match.hdr_src]; + req->req_recv.req_base.req_proc = procp[frag->hdr.hdr_match.hdr_src]->ompi_proc; + prepare_recv_req_converter(req); + } else { + *p = NULL; } - return frag; -#endif - + return frag; +#else /* * Loop over all the outstanding messages to find one that matches. @@ -1188,7 +1181,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, * * In order to avoid starvation do this in a round-robin fashion. */ - for (i = comm->last_probed + 1; i < comm->num_procs; i++) { + for (size_t i = comm->last_probed + 1; i < comm->num_procs; i++) { mca_pml_ob1_recv_frag_t* frag; /* loop over messages from the current proc */ @@ -1200,7 +1193,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, return frag; /* match found */ } } - for (i = 0; i <= comm->last_probed; i++) { + for (size_t i = 0; i <= comm->last_probed; i++) { mca_pml_ob1_recv_frag_t* frag; /* loop over messages from the current proc */ @@ -1215,6 +1208,7 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, *p = NULL; return NULL; +#endif } @@ -1224,8 +1218,14 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) mca_pml_ob1_comm_t *ob1_comm = comm->c_pml_comm; mca_pml_ob1_comm_proc_t* proc; mca_pml_ob1_recv_frag_t* frag; - opal_list_t *queue; mca_pml_ob1_hdr_t* hdr; +#if MCA_PML_OB1_CUSTOM_MATCH + custom_match_umq_node* hold_prev; + custom_match_umq_node* hold_elem; + int hold_index; +#else + opal_list_t *queue; +#endif /* init/re-init the request */ req->req_lock = 0; @@ -1237,12 +1237,6 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) req->req_pending = false; req->req_ack_sent = false; -#if MCA_PML_OB1_CUSTOM_MATCH - custom_match_umq_node* hold_prev; - custom_match_umq_node* hold_elem; - custom_match_hold_index; -#endif - MCA_PML_BASE_RECV_START(&req->req_recv); OB1_MATCHING_LOCK(&ob1_comm->matching_lock); @@ -1262,8 +1256,8 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) frag = recv_req_match_wild(req, &proc, &hold_prev, &hold_elem, &hold_index); #else frag = recv_req_match_wild(req, &proc); -#endif queue = &ob1_comm->wild_receives; +#endif #if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT /* As we are in a homogeneous environment we know that all remote * architectures are exactly the same as the local one. Therefore, @@ -1282,8 +1276,8 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) frag = recv_req_match_specific_proc(req, proc, &hold_prev, &hold_elem, &hold_index); #else frag = recv_req_match_specific_proc(req, proc); -#endif queue = &proc->specific_receives; +#endif /* wildcard recv will be prepared on match */ prepare_recv_req_converter(req); } @@ -1296,9 +1290,9 @@ void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req) if(OPAL_LIKELY(req->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE && req->req_recv.req_base.req_type != MCA_PML_REQUEST_IMPROBE)) #if MCA_PML_OB1_CUSTOM_MATCH - custom_match_prq_append(ob1_comm->prq, req, - req->req_recv.req_base.req_tag, - req->req_recv.req_base.req_peer); + custom_match_prq_append(ob1_comm->prq, req, + req->req_recv.req_base.req_tag, + req->req_recv.req_base.req_peer); #else append_recv_req_to_queue(queue, req); #endif