Nearly a complete rewrite. Not the logic is more clean, and the overall execution time a lot smaller. And I think

I remove all (hmmm almost) bugs. This commit was SVN r1089.
2004-04-27 18:04:17 +00:00 · 2004-04-27 18:04:17 +00:00 · 1fb66840e1
--- a/src/datatype/fake_stack.c
+++ b/src/datatype/fake_stack.c
@ -1,281 +1,283 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-
-#include "lam_config.h"
-
-#include "datatype.h"
-#include "datatype_internal.h"
-
-#ifdef HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-#include <stdlib.h>
-
-static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
-{
-   while( _pElem->type == DT_LOOP ) ++_pElem;
-   return _pElem->disp;
-}
-
-int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
-                               int starting_point, int* sizes );
-int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
-                                       int starting_point, int* sizes );
-
-int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
-                                       int starting_point, int* sizes )
-{
-    dt_stack_t* pStack;   /* pointer to the position on the stack */
-    int pos_desc;         /* actual position in the description of the derived datatype */
-    int type, lastLength = 0;
-    long totalDisp;
-    lam_datatype_t* pData = pConvertor->pDesc;
-    int* remoteLength;
-    int loop_length;
-    int resting_place = starting_point;
-    dt_elem_desc_t* pElems;
-
-    if( starting_point == 0 ) {
-        
-        pConvertor->stack_pos = 1;
-        pConvertor->pStack[0].index = 0;
-        pConvertor->pStack[0].count = pConvertor->count;
-        pConvertor->pStack[0].disp  = 0;
-        /* first here we should select which data representation will be used for
-         * this operation: normal one or the optimized version ? */
-        if( pData->opt_desc.used > 0 ) {
-            pElems = pData->opt_desc.desc;
-            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
-        } else {
-            pElems = pData->desc.desc;
-            pConvertor->pStack[0].end_loop = pData->desc.used;
-        }
-        pConvertor->pStack[1].index = 0;
-        pConvertor->pStack[1].count = pElems->count;
-        pConvertor->pStack[1].disp = pElems->disp;
-        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
-        return 0;
-    }
-    /* if the convertor continue from the last position
-     * there is nothing to do.
-     */
-    if( pConvertor->bConverted == starting_point ) return 0;
-
-    remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
-    pStack = pConvertor->pStack;
-    pStack->count = pConvertor->count;
-    pStack->index = -1;
-    pStack->end_loop = pData->desc.used - 1;
-    pStack->disp = 0;
-    pos_desc  = 0;
-    remoteLength[0] = 0;  /* initial value set to ZERO */
-    pConvertor->stack_pos = 0;
-    pElems = &(pData->desc.desc[pos_desc]);
-
-  next_loop:
-    totalDisp = pStack->disp;
-    loop_length = remoteLength[pConvertor->stack_pos];
-    while( pConvertor->stack_pos >= 0 ) {
-        if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
-            /* now we know the length of the loop. We can compute
-             * if the the starting_position will happend in one of the
-             * iterations of this loop.
-             */
-            remoteLength[pConvertor->stack_pos] = loop_length;
-            if( (loop_length * pStack->count) > resting_place ) {
-                /* OK here we stop in this loop. First save the loop
-                 * on the stack, then save the position of the last 
-                 * data */
-                int cnt = resting_place / loop_length;
-                pStack->count -= cnt;
-                resting_place -= cnt * loop_length;
-                pStack->disp += cnt * pElems->extent;
-                pConvertor->bConverted += (cnt * loop_length);
-                goto next_loop;
-            }
-            /* Not in this loop. Cleanup the stack and advance to the
-             * next data description.
-             */
-            pConvertor->stack_pos--;
-            pStack--;
-            pos_desc++;
-            pElems++;
-            goto next_loop;
-        }
-        if( pElems->type == DT_LOOP ) {
-            remoteLength[pConvertor->stack_pos + 1] = 0;
-            totalDisp = pElems->disp;
-            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
-                        pData->desc.desc[pos_desc].count,
-                        totalDisp, pos_desc + pElems->disp );
-            pos_desc++;
-            pElems++;
-            loop_length = 0;  /* starting a new loop */
-            goto next_loop;
-        }
-        /* now here we have a basic datatype */
-        type = pElems->type;
-        lastLength = pElems->count * basicDatatypes[type].size;
-        if( resting_place > lastLength ) {
-            resting_place -= lastLength;
-            loop_length += lastLength;
-        } else {
-            int cnt = resting_place / basicDatatypes[type].size;
-            resting_place -= cnt * basicDatatypes[type].size;
-            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, 
-                        pElems->count - cnt,
-                        totalDisp + pElems->disp + cnt * pElems->extent,
-                        pos_desc );
-            pConvertor->bConverted += (starting_point - resting_place);
-            return 0;
-        }
-        pos_desc++;  /* advance to the next data */
-        pElems++;
-    }
-
-    return 0;
-}
-
-/* This function works for homogeneous architectures. As we keep
- * trace of the size inside the loop in the END_LOOP element
- * we can easily jump directly where we need. It works only
- * because we can split a basic data in the middle if we
- * have a optimized representation.
- */
-int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
-                               int starting_point, int* sizes )
-{
-    dt_stack_t* pStack;   /* pointer to the position on the stack */
-    int pos_desc;         /* actual position in the description of the derived datatype */
-    int type, lastLength = 0;
-    long totalDisp;
-    lam_datatype_t* pData = pConvertor->pDesc;
-    int* remoteLength;
-    int loop_length;
-    int resting_place = starting_point;
-    dt_elem_desc_t* pElems;
-
-    if( starting_point == 0 ) {
-        pConvertor->stack_pos = 1;
-        pConvertor->pStack[0].index = 0;
-        pConvertor->pStack[0].count = pConvertor->count;
-        pConvertor->pStack[0].disp  = 0;
-        /* first here we should select which data representation will be used for
-         * this operation: normal one or the optimized version ? */
-        if( pData->opt_desc.used > 0 ) {
-            pElems = pData->opt_desc.desc;
-            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
-        } else {
-            pElems = pData->desc.desc;
-            pConvertor->pStack[0].end_loop = pData->desc.used;
-        }
-        pConvertor->pStack[1].index = 0;
-        pConvertor->pStack[1].count = pElems->count;
-        pConvertor->pStack[1].disp = pElems->disp;
-        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
-	pConvertor->converted = 0;
-	pConvertor->bConverted = 0;
-        return 0;
-    }
-    /* if the convertor continue from the last position
-     * there is nothing to do.
-     */
-    if( pConvertor->bConverted == starting_point ) return 0;
-    if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
-        int cnt;
-
-        cnt = starting_point / pData->size;
-        pConvertor->stack_pos = 1;
-        pConvertor->pStack[0].index = 0;
-        pConvertor->pStack[0].count = pConvertor->count - cnt;
-        pConvertor->pStack[0].disp  = 0;
-        /* first here we should select which data representation will be used for
-         * this operation: normal one or the optimized version ? */
-        if( pData->opt_desc.used > 0 ) {
-            pElems = pData->opt_desc.desc;
-            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
-        } else {
-            pElems = pData->desc.desc;
-            pConvertor->pStack[0].end_loop = pData->desc.used;
-        }
-        cnt = starting_point - cnt * pData->size;
-        pConvertor->pStack[1].index = 0;
-        pConvertor->pStack[1].count = pElems->count - cnt;
-        pConvertor->pStack[1].disp = pElems->disp + cnt;
-        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
-	pConvertor->bConverted = starting_point;
-        return 0;
-    }
-    remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
-    pStack = pConvertor->pStack;
-    pStack->count = pConvertor->count;
-    pStack->index = -1;
-    pStack->end_loop = pData->desc.used - 1;
-    pStack->disp = 0;
-    pos_desc  = 0;
-    remoteLength[0] = 0;  /* initial value set to ZERO */
-    pConvertor->stack_pos = 0;
-    pElems = &(pData->desc.desc[pos_desc]);
-
-  next_loop:
-    totalDisp = pStack->disp;
-    loop_length = remoteLength[pConvertor->stack_pos];
-    while( pos_desc <= pStack->end_loop )  {
-        if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
-            /* now we know the length of the loop. We can compute
-             * if the the starting_position will happend in one of the
-             * iterations of this loop.
-             */
-            remoteLength[pConvertor->stack_pos] = loop_length;
-            if( (loop_length * pStack->count) > resting_place ) {
-                /* OK here we stop in this loop. First save the loop
-                 * on the stack, then save the position of the last 
-                 * data */
-                int cnt = resting_place / loop_length;
-                pStack->count -= cnt;
-                resting_place -= cnt * loop_length;
-                pStack->disp += cnt * pElems->extent;
-                pConvertor->bConverted += (cnt * loop_length);
-                goto next_loop;
-            }
-            /* Not in this loop. Cleanup the stack and advance to the
-             * next data description.
-             */
-            pConvertor->stack_pos--;
-            pStack--;
-            pos_desc++;
-            pElems++;
-            goto next_loop;
-        }
-        if( pElems->type == DT_LOOP ) {
-            remoteLength[pConvertor->stack_pos + 1] = 0;
-            totalDisp = pElems->disp;
-            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
-                        pData->desc.desc[pos_desc].count,
-                        totalDisp, pos_desc + pElems->disp );
-            pos_desc++;
-            pElems++;
-            loop_length = 0;  /* starting a new loop */
-            goto next_loop;
-        }
-        /* now here we have a basic datatype */
-        type = pElems->type;
-        lastLength = pElems->count * basicDatatypes[type].size;
-        if( resting_place > lastLength ) {
-            resting_place -= lastLength;
-            loop_length += lastLength;
-        } else {
-            int cnt = resting_place / basicDatatypes[type].size;
-            resting_place -= cnt * basicDatatypes[type].size;
-            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, 
-                        pElems->count - cnt,
-                        totalDisp + pElems->disp + cnt * pElems->extent,
-                        pos_desc );
-            pConvertor->bConverted += (starting_point - resting_place);
-            return 0;
-        }
-        pos_desc++;  /* advance to the next data */
-        pElems++;
-    }
-
-    return 0;
-}
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+
+#include "lam_config.h"
+
+#include "datatype.h"
+#include "datatype_internal.h"
+
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+#include <stdlib.h>
+
+static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
+{
+   while( _pElem->type == DT_LOOP ) ++_pElem;
+   return _pElem->disp;
+}
+
+int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
+                               int starting_point, int* sizes );
+int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
+                                       int starting_point, int* sizes );
+
+int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
+                                       int starting_point, int* sizes )
+{
+    dt_stack_t* pStack;   /* pointer to the position on the stack */
+    int pos_desc;         /* actual position in the description of the derived datatype */
+    int type, lastLength = 0;
+    long totalDisp;
+    lam_datatype_t* pData = pConvertor->pDesc;
+    int* remoteLength;
+    int loop_length;
+    int resting_place = starting_point;
+    dt_elem_desc_t* pElems;
+
+    if( starting_point == 0 ) {
+        
+        pConvertor->stack_pos = 1;
+        pConvertor->pStack[0].index = -1;
+        pConvertor->pStack[0].count = pConvertor->count;
+        pConvertor->pStack[0].disp  = 0;
+        /* first here we should select which data representation will be used for
+         * this operation: normal one or the optimized version ? */
+        if( pData->opt_desc.used > 0 ) {
+            pElems = pData->opt_desc.desc;
+            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
+        } else {
+            pElems = pData->desc.desc;
+            pConvertor->pStack[0].end_loop = pData->desc.used;
+        }
+        pConvertor->pStack[1].index = 0;
+        pConvertor->pStack[1].count = pElems->count;
+        pConvertor->pStack[1].disp = pElems->disp;
+        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
+        return 0;
+    }
+    /* if the convertor continue from the last position
+     * there is nothing to do.
+     */
+    if( pConvertor->bConverted == starting_point ) return 0;
+
+    remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
+    pStack = pConvertor->pStack;
+    pStack->count = pConvertor->count;
+    pStack->index = -1;
+    pStack->end_loop = pData->desc.used;
+    pStack->disp = 0;
+    pos_desc  = 0;
+    remoteLength[0] = 0;  /* initial value set to ZERO */
+    pConvertor->stack_pos = 0;
+    pElems = &(pData->desc.desc[pos_desc]);
+
+  next_loop:
+    totalDisp = pStack->disp;
+    loop_length = remoteLength[pConvertor->stack_pos];
+    while( pos_desc < pStack->end_loop ) {
+        if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
+            /* now we know the length of the loop. We can compute
+             * if the the starting_position will happend in one of the
+             * iterations of this loop.
+             */
+            remoteLength[pConvertor->stack_pos] = loop_length;
+            if( (loop_length * pStack->count) > resting_place ) {
+                /* OK here we stop in this loop. First save the loop
+                 * on the stack, then save the position of the last 
+                 * data */
+                int cnt = resting_place / loop_length;
+                pStack->count -= cnt;
+                resting_place -= cnt * loop_length;
+                pStack->disp += cnt * pElems->extent;
+                pConvertor->bConverted += (cnt * loop_length);
+                goto next_loop;
+            }
+            /* Not in this loop. Cleanup the stack and advance to the
+             * next data description.
+             */
+            pConvertor->stack_pos--;
+            pStack--;
+            pos_desc++;
+            pElems++;
+            goto next_loop;
+        }
+        if( pElems->type == DT_LOOP ) {
+            remoteLength[pConvertor->stack_pos + 1] = 0;
+            totalDisp = pElems->disp;
+            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
+                        pData->desc.desc[pos_desc].count,
+                        totalDisp, pos_desc + pElems->disp );
+            pos_desc++;
+            pElems++;
+            loop_length = 0;  /* starting a new loop */
+            goto next_loop;
+        }
+        /* now here we have a basic datatype */
+        type = pElems->type;
+        lastLength = pElems->count * basicDatatypes[type].size;
+        if( resting_place > lastLength ) {
+            resting_place -= lastLength;
+            loop_length += lastLength;
+        } else {
+            int cnt = resting_place / basicDatatypes[type].size;
+            resting_place -= cnt * basicDatatypes[type].size;
+            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, 
+                        pElems->count - cnt,
+                        totalDisp + pElems->disp + cnt * pElems->extent,
+                        pos_desc );
+            pConvertor->bConverted += (starting_point - resting_place);
+            return 0;
+        }
+        pos_desc++;  /* advance to the next data */
+        pElems++;
+    }
+
+    return 0;
+}
+
+/* This function works for homogeneous architectures. As we keep
+ * trace of the size inside the loop in the END_LOOP element
+ * we can easily jump directly where we need. It works only
+ * because we can split a basic data in the middle if we
+ * have a optimized representation.
+ */
+int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
+                               int starting_point, int* sizes )
+{
+    dt_stack_t* pStack;   /* pointer to the position on the stack */
+    int pos_desc;         /* actual position in the description of the derived datatype */
+    int type, lastLength = 0;
+    long totalDisp;
+    lam_datatype_t* pData = pConvertor->pDesc;
+    int* remoteLength;
+    int loop_length;
+    int resting_place = starting_point;
+    dt_elem_desc_t* pElems;
+
+    if( starting_point == 0 ) {
+        pConvertor->stack_pos = 1;
+        pConvertor->pStack[0].index = -1;
+        pConvertor->pStack[0].count = pConvertor->count;
+        pConvertor->pStack[0].disp  = 0;
+        /* first here we should select which data representation will be used for
+         * this operation: normal one or the optimized version ? */
+        if( pData->opt_desc.used > 0 ) {
+            pElems = pData->opt_desc.desc;
+            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
+        } else {
+            pElems = pData->desc.desc;
+            pConvertor->pStack[0].end_loop = pData->desc.used;
+        }
+        pConvertor->pStack[1].index = 0;
+        pConvertor->pStack[1].count = pElems->count;
+        if( pElems->flags & DT_FLAG_DATA )
+            pConvertor->pStack[1].count *= basicDatatypes[pElems->type].size;
+        pConvertor->pStack[1].disp = pElems->disp;
+        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
+	pConvertor->converted = 0;
+	pConvertor->bConverted = 0;
+        return 0;
+    }
+    /* if the convertor continue from the last position
+     * there is nothing to do.
+     */
+    if( pConvertor->bConverted == starting_point ) return 0;
+    if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
+        int cnt;
+
+        cnt = starting_point / pData->size;
+        pConvertor->stack_pos = 1;
+        pConvertor->pStack[0].index = 0;
+        pConvertor->pStack[0].count = pConvertor->count - cnt;
+        pConvertor->pStack[0].disp  = 0;
+        /* first here we should select which data representation will be used for
+         * this operation: normal one or the optimized version ? */
+        if( pData->opt_desc.used > 0 ) {
+            pElems = pData->opt_desc.desc;
+            pConvertor->pStack[0].end_loop = pData->opt_desc.used;
+        } else {
+            pElems = pData->desc.desc;
+            pConvertor->pStack[0].end_loop = pData->desc.used;
+        }
+        cnt = starting_point - cnt * pData->size;
+        pConvertor->pStack[1].index = 0;
+        pConvertor->pStack[1].count = pElems->count - cnt;
+        pConvertor->pStack[1].disp = pElems->disp + cnt;
+        pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
+	pConvertor->bConverted = starting_point;
+        return 0;
+    }
+    remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
+    pStack = pConvertor->pStack;
+    pStack->count = pConvertor->count;
+    pStack->index = -1;
+    pStack->end_loop = pData->desc.used;
+    pStack->disp = 0;
+    pos_desc  = 0;
+    remoteLength[0] = 0;  /* initial value set to ZERO */
+    pConvertor->stack_pos = 0;
+    pElems = &(pData->desc.desc[pos_desc]);
+
+  next_loop:
+    totalDisp = pStack->disp;
+    loop_length = remoteLength[pConvertor->stack_pos];
+    while( pos_desc < pStack->end_loop ) {
+        if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
+            /* now we know the length of the loop. We can compute
+             * if the the starting_position will happend in one of the
+             * iterations of this loop.
+             */
+            remoteLength[pConvertor->stack_pos] = loop_length;
+            if( (loop_length * pStack->count) > resting_place ) {
+                /* OK here we stop in this loop. First save the loop
+                 * on the stack, then save the position of the last 
+                 * data */
+                int cnt = resting_place / loop_length;
+                pStack->count -= cnt;
+                resting_place -= cnt * loop_length;
+                pStack->disp += cnt * pElems->extent;
+                pConvertor->bConverted += (cnt * loop_length);
+                goto next_loop;
+            }
+            /* Not in this loop. Cleanup the stack and advance to the
+             * next data description.
+             */
+            pConvertor->stack_pos--;
+            pStack--;
+            pos_desc++;
+            pElems++;
+            goto next_loop;
+        }
+        if( pElems->type == DT_LOOP ) {
+            remoteLength[pConvertor->stack_pos + 1] = 0;
+            totalDisp = pElems->disp;
+            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
+                        pData->desc.desc[pos_desc].count,
+                        totalDisp, pos_desc + pElems->disp );
+            pos_desc++;
+            pElems++;
+            loop_length = 0;  /* starting a new loop */
+            goto next_loop;
+        }
+        /* now here we have a basic datatype */
+        type = pElems->type;
+        lastLength = pElems->count * basicDatatypes[type].size;
+        if( resting_place > lastLength ) {
+            resting_place -= lastLength;
+            loop_length += lastLength;
+        } else {
+            int cnt = resting_place / basicDatatypes[type].size;
+            resting_place -= cnt * basicDatatypes[type].size;
+            PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, 
+                        pElems->count - cnt,
+                        totalDisp + pElems->disp + cnt * pElems->extent,
+                        pos_desc );
+            pConvertor->bConverted += (starting_point - resting_place);
+            return 0;
+        }
+        pos_desc++;  /* advance to the next data */
+        pElems++;
+    }
+
+    return 0;
+}