Fix some call convention bugs on PPC32

Upstream PR: https://github.com/boostorg/context/pull/215

--- libs/context/build/Jamfile.v2.orig
+++ libs/context/build/Jamfile.v2
@@ -331,6 +331,7 @@
    : asm/make_ppc32_sysv_macho_gas.S
      asm/jump_ppc32_sysv_macho_gas.S
      asm/ontop_ppc32_sysv_macho_gas.S
+     asm/tail_ppc32_sysv_elf_gas.cpp
    : <abi>sysv
      <address-model>32
      <architecture>power
--- libs/context/src/asm/make_ppc32_sysv_macho_gas.S.orig
+++ libs/context/src/asm/make_ppc32_sysv_macho_gas.S
@@ -85,11 +85,12 @@
     clrrwi  r3, r3, 4
 
     ; reserve space for context-data on context-stack
-    ; including 64 byte of linkage + parameter area (R1  16 == 0)
+    ; including 64 byte of linkage + parameter area (R1 % 16 == 0)
     subi  r3, r3, 336
 
     ; third arg of make_fcontext() == address of context-function
-    stw  r5, 240(r3)
+    ; store as trampoline's R31
+    stw  r5, 224(r3)
 
     ; set back-chain to zero
     li   r0, 0
@@ -106,12 +107,15 @@
     ; load LR
     mflr  r0
     ; jump to label 1
-    bl  l1
-l1:
+    bcl  20, 31, L1
+L1:
     ; load LR into R4
     mflr  r4
+    ; compute abs address of trampoline, use as PC
+    addi  r5, r4, lo16(Ltrampoline - L1)
+    stw  r5, 240(r3)
     ; compute abs address of label finish
-    addi  r4, r4, lo16((finish - .) + 4)
+    addi  r4, r4, lo16(Lfinish - L1)
     ; restore LR
     mtlr  r0
     ; save address of finish as return-address for context-function
@@ -123,15 +127,28 @@ l1:
 
     blr  ; return pointer to context-data
 
-finish:
-    ; save return address into R0
-    mflr  r0
-    ; save return address on stack, set up stack frame
-    stw  r0, 4(r1)
-    ; allocate stack space, R1  16 == 0
-    stwu  r1, -16(r1)
+Ltrampoline:
+    ; We get R31 = context-function, R3 = address of transfer_t,
+    ; but we need to pass R3:R4 = transfer_t.
+    mtctr  r31
+    lwz  r4, 4(r3)
+    lwz  r3, 0(r3)
+    bctr
 
+Lfinish:
+    ; load address of _exit into CTR
+    bcl  20, 31, L2
+L2:
+    mflr  r4
+    addis  r4, r4, ha16(Lexitp - L2)
+    lwz  r4, lo16(Lexitp - L2)(r4)
+    mtctr  r4
     ; exit code is zero
     li  r3, 0
     ; exit application
-    bl  _exit
+    bctr
+
+.const_data
+.align 2
+Lexitp:
+    .long  __exit
--- libs/context/src/asm/ontop_ppc32_sysv_macho_gas.S.orig
+++ libs/context/src/asm/ontop_ppc32_sysv_macho_gas.S
@@ -177,7 +177,7 @@
     lwz  r29, 216(r1)  ; restore R29
     lwz  r30, 220(r1)  ; restore R30
     lwz  r31, 224(r1)  ; restore R31
-    lwz  r4,  228(r1)  ; restore hidden
+    lwz  r3,  228(r1)  ; restore hidden
 
     ; restore CR
     lwz   r0, 232(r1)
@@ -190,12 +190,12 @@
     ; adjust stack
     addi  r1, r1, 244
 
-    ; return transfer_t 
-    stw  r7, 0(r4)
-    stw  r5, 4(r4)
-
-    ; restore CTR
-    mtctr r6
-
-    ; jump to ontop-function
-    bctr
+    ; Need to pass ontop_fcontext_tail(
+    ;   hidden R3, 
+    ;   R4 = ignore,
+    ;   R5 = data,
+    ;   R6 = ontop-function,
+    ;   R7 = fcontext_t
+    ; )
+    ; All of these registers are correctly set at this point
+    b _ontop_fcontext_tail
