1
1
openmpi/opal/mca/patcher/base/patcher_base_patch.c
Nathan Hjelm a9bc692d99 opal/patcher: fix xlc support
The xlc compiler seems to behave in a different way that gcc when it
comes the inline asm. There were two problems with the code with xlc:

 - The TOC read in mca_patcher_base_patch_hook used the syntax
   register unsigned long toc asm("r2") to read $r2 (the TOC
   pointer). With gcc this seems to behave as expected but with xlc
   the result in toc is not the same as $r2. I updated the code to use
   asm volatile ("std 2, %0" : "=m" (toc)) to load the TOC pointer.

 - The OPAL_PATCHER_BEGIN macro is meant to be the first thing in a
   hook. On PPC64 it loads the correct TOC pointer (thanks to
   mca_patcher_base_patch_hook) and saves the old one. The
   OPAL_PATCHER_END macro restores the TOC pointer. Because we *need*
   the TOC to be correct before it is accessed in the hook the
   OPAL_PATCHER_BEGIN macro MUST come first. We did this and all was
   well with gcc. With xlc on the other hand there was a TOC access
   before the assembly inserted by OPAL_PATCHER_BEGIN. To fix this
   quickly I broke each hook into a pair of function with the
   OPAL_PATCHER_* macros on the top level functions. This works around
   the issue but is not a clean way to fix this. In the future we
   should 1) either update overwrite to not need this, or 2) figure
   out why xlc is not inserting the asm before the first TOC read.

This fixes open-mpi/ompi#1854

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
2016-08-26 14:43:03 -06:00

192 строки
6.2 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/mca/patcher/patcher.h"
#include "opal/mca/patcher/base/base.h"
#include "opal/util/sys_limits.h"
#include "opal/prefetch.h"
#include <sys/mman.h>
static void mca_patcher_base_patch_construct (mca_patcher_base_patch_t *patch)
{
patch->patch_symbol = NULL;
patch->patch_data_size = 0;
}
static void mca_patcher_base_patch_destruct (mca_patcher_base_patch_t *patch)
{
free (patch->patch_symbol);
}
OBJ_CLASS_INSTANCE(mca_patcher_base_patch_t, opal_list_item_t,
mca_patcher_base_patch_construct,
mca_patcher_base_patch_destruct);
#if defined(__PPC__)
// PowerPC instructions used in patching
// Reference: "PowerPC User Instruction Set Architecture"
static unsigned int addis(unsigned int RT, unsigned int RS, unsigned int UI) {
return (15<<26) + (RT<<21) + (RS<<16) + (UI&0xffff);
}
static unsigned int ori(unsigned int RT, unsigned int RS, unsigned int UI) {
return (24<<26) + (RS<<21) + (RT<<16) + (UI&0xffff);
}
static unsigned int oris(unsigned int RT, unsigned int RS, unsigned int UI) {
return (25<<26) + (RS<<21) + (RT<<16) + (UI&0xffff);
}
static unsigned int mtspr(unsigned int SPR, unsigned int RS) {
return (31<<26) + (RS<<21) + ((SPR&0x1f)<<16) + ((SPR>>5)<<11) + (467<<1);
}
static unsigned int bcctr(unsigned int BO, unsigned int BI, unsigned int BH) {
return (19<<26) + (BO<<21) + (BI<<16) + (BH<<11) + (528<<1);
}
static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, unsigned int MB)
{
return (30<<26) + (RS<<21) + (RT<<16) + ((SH&0x1f)<<11) + ((SH>>5)<<1)
+ ((MB&0x1f)<<6) + ((MB>>5)<<5) + (1<<2);
}
static int PatchLoadImm (uintptr_t addr, unsigned int reg, size_t value)
{
#if defined(__PPC64__)
*(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 48));
*(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 32));
*(unsigned int *) (addr + 8) = rldicr( reg, reg, 32, 31);
*(unsigned int *) (addr +12) = oris ( reg, reg, (value >> 16));
*(unsigned int *) (addr +16) = ori ( reg, reg, (value >> 0));
return 20;
#else
*(unsigned int *) (addr + 0) = addis ( reg, 0, (value >> 16));
*(unsigned int *) (addr + 4) = ori ( reg, reg, (value >> 0));
return 8;
#endif
}
#endif
static void flush_and_invalidate_cache (unsigned long a)
{
#if OPAL_ASSEMBLY_ARCH == OPAL_IA32
static int have_clflush = -1;
if (OPAL_UNLIKELY(-1 == have_clflush)) {
int32_t cpuid1, cpuid2, tmp;
const int32_t level = 1;
/* cpuid clobbers ebx but it must be restored for -fPIC so save
* then restore ebx */
__asm__ volatile ("xchgl %%ebx, %2\n"
"cpuid\n"
"xchgl %%ebx, %2\n":
"=a" (cpuid1), "=d" (cpuid2), "=r" (tmp) :
"a" (level) :
"ecx");
/* clflush is in edx bit 19 */
have_clflush = !!(cpuid2 & (1 << 19));
}
if (have_clflush) {
/* does not work with AMD processors */
__asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a));
}
#elif OPAL_ASSEMBLY_ARCH == OPAL_AMD64
__asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a));
#elif OPAL_ASSEMBLY_ARCH == OPAL_IA64
__asm__ volatile ("fc %0;; sync.i;; srlz.i;;" : : "r"(a) : "memory");
#endif
}
// modify protection of memory range
static void ModifyMemoryProtection (uintptr_t addr, size_t length, int prot)
{
long page_size = opal_getpagesize ();
uintptr_t base = (addr & ~(page_size-1));
uintptr_t bound = ((addr + length + page_size-1) & ~(page_size-1));
length = bound - base;
#if defined(__PPC__)
/* NTH: is a loop necessary here? */
do {
if (mprotect((void *)base, page_size, prot))
perror("MemHook: mprotect failed");
base += page_size;
} while (base < bound);
#else
if (mprotect((void *) base, length, prot)) {
perror("MemHook: mprotect failed");
}
#endif
}
static inline void apply_patch (unsigned char *patch_data, uintptr_t address, size_t data_size)
{
ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ|PROT_WRITE);
memcpy ((void *) address, patch_data, data_size);
for (size_t i = 0 ; i < data_size ; i += 16) {
flush_and_invalidate_cache (address + i);
}
ModifyMemoryProtection (address, data_size, PROT_EXEC|PROT_READ);
}
static void mca_base_patcher_patch_unapply_binary (mca_patcher_base_patch_t *patch)
{
apply_patch (patch->patch_orig_data, patch->patch_orig, patch->patch_data_size);
}
void mca_base_patcher_patch_apply_binary (mca_patcher_base_patch_t *patch)
{
memcpy (patch->patch_orig_data, (void *) patch->patch_orig, patch->patch_data_size);
apply_patch (patch->patch_data, patch->patch_orig, patch->patch_data_size);
patch->patch_restore = mca_base_patcher_patch_unapply_binary;
}
int mca_patcher_base_patch_hook (mca_patcher_base_module_t *module, uintptr_t hook_addr)
{
#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64)
mca_patcher_base_patch_t *hook_patch;
const unsigned int nop = 0x60000000;
hook_patch = OBJ_NEW(mca_patcher_base_patch_t);
if (OPAL_UNLIKELY(NULL == hook_patch)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
// locate reserved code space in hook function
for (unsigned int *nop_addr = (unsigned int *)hook_addr ; ; nop_addr++) {
if (nop_addr[0] == nop && nop_addr[1] == nop && nop_addr[2] == nop
&& nop_addr[3] == nop && nop_addr[4] == nop) {
hook_patch->patch_orig = (uintptr_t) nop_addr;
break;
}
}
// generate code to restore TOC
unsigned long toc;
asm volatile ("std 2, %0" : "=m" (toc));
hook_patch->patch_data_size = PatchLoadImm((uintptr_t)hook_patch->patch_data, 2, toc);
/* put the hook patch on the patch list so it will be undone on finalize */
opal_list_append (&module->patch_list, &hook_patch->super);
mca_base_patcher_patch_apply_binary (hook_patch);
#endif
return OPAL_SUCCESS;
}