13 files changed, 1180 insertions, 231 deletions
diff --git a/libffi/src/ffitest.c b/libffi/src/ffitest.c
index da528317bb3..a05b746865d 100644
--- a/libffi/src/ffitest.c
+++ b/libffi/src/ffitest.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------
-   ffitest.c - Copyright (c) 1996, 1997, 1998, 2002  Red Hat, Inc.
+   ffitest.c - Copyright (c) 1996, 1997, 1998, 2002, 2003  Red Hat, Inc.
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
@@ -288,10 +288,121 @@ static test_structure_9 struct9 (test_structure_9 ts)
 static void
 closure_test_fn(ffi_cif* cif,void* resp,void** args, void* userdata)
 {
-  *(ffi_arg*)resp = *(int*)args[0] + (int)(*(float*)args[1]) + (int)(long)userdata;
+  *(ffi_arg*)resp =
+    (int)*(unsigned long long *)args[0] + (int)(*(int *)args[1]) +
+    (int)(*(unsigned long long *)args[2]) + (int)*(int *)args[3] +
+    (int)(*(signed short *)args[4]) +
+    (int)(*(unsigned long long *)args[5]) +
+    (int)*(int *)args[6] + (int)(*(int *)args[7]) +
+    (int)(*(double *)args[8]) + (int)*(int *)args[9] +
+    (int)(*(int *)args[10]) + (int)(*(float *)args[11]) +
+    (int)*(int *)args[12] + (int)(*(int *)args[13]) +
+    (int)(*(int *)args[14]) +  *(int *)args[15] + (int)(long)userdata;
+
+    	printf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d: %d\n",
+	       (int)*(unsigned long long *)args[0], (int)(*(int *)args[1]), 
+	       (int)(*(unsigned long long *)args[2]),
+	       (int)*(int *)args[3], (int)(*(signed short *)args[4]), 
+	       (int)(*(unsigned long long *)args[5]),
+	       (int)*(int *)args[6], (int)(*(int *)args[7]), 
+	       (int)(*(double *)args[8]), (int)*(int *)args[9],
+	       (int)(*(int *)args[10]), (int)(*(float *)args[11]),
+	       (int)*(int *)args[12], (int)(*(int *)args[13]), 
+	       (int)(*(int *)args[14]),*(int *)args[15],
+	       (int)(long)userdata, *(int*)resp);
 }
 
-typedef int (*closure_test_type)(int, float);
+typedef int (*closure_test_type)(unsigned long long, int, unsigned long long, 
+				 int, signed short, unsigned long long, int, 
+				 int, double, int, int, float, int, int, 
+				 int, int);
+
+static void closure_test_fn1(ffi_cif* cif,void* resp,void** args, 
+			     void* userdata)
+ {
+    *(ffi_arg*)resp =
+      (int)*(float *)args[0] +(int)(*(float *)args[1]) + 
+      (int)(*(float *)args[2]) + (int)*(float *)args[3] +
+      (int)(*(signed short *)args[4]) + (int)(*(float *)args[5]) +
+      (int)*(float *)args[6] + (int)(*(int *)args[7]) + 
+      (int)(*(double*)args[8]) + (int)*(int *)args[9] + 
+      (int)(*(int *)args[10]) + (int)(*(float *)args[11]) + 
+      (int)*(int *)args[12] + (int)(*(int *)args[13]) + 
+      (int)(*(int *)args[14]) + *(int *)args[15] + (int)(long)userdata;
+
+    printf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d: %d\n",
+	   (int)*(float *)args[0], (int)(*(float *)args[1]), 
+	   (int)(*(float *)args[2]), (int)*(float *)args[3], 
+	   (int)(*(signed short *)args[4]), (int)(*(float *)args[5]),
+	   (int)*(float *)args[6], (int)(*(int *)args[7]),
+	   (int)(*(double *)args[8]), (int)*(int *)args[9],
+	   (int)(*(int *)args[10]), (int)(*(float *)args[11]),
+	   (int)*(int *)args[12], (int)(*(int *)args[13]),
+	   (int)(*(int *)args[14]), *(int *)args[15],
+	   (int)(long)userdata, *(int*)resp);
+}
+
+typedef int (*closure_test_type1)(float, float, float, float, signed short, 
+				  float, float, int, double, int, int, float,
+				  int, int, int, int);
+
+static void closure_test_fn2(ffi_cif* cif,void* resp,void** args, 
+			     void* userdata)
+ {
+    *(ffi_arg*)resp =
+      (int)*(double *)args[0] +(int)(*(double *)args[1]) + 
+      (int)(*(double *)args[2]) + (int)*(double *)args[3] +
+      (int)(*(signed short *)args[4]) + (int)(*(double *)args[5]) +
+      (int)*(double *)args[6] + (int)(*(int *)args[7]) + 
+      (int)(*(double *)args[8]) + (int)*(int *)args[9] +
+      (int)(*(int *)args[10]) + (int)(*(float *)args[11]) + 
+      (int)*(int *)args[12] + (int)(*(float *)args[13]) +
+      (int)(*(int *)args[14]) + *(int *)args[15] + (int)(long)userdata;
+
+    printf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d: %d\n",
+	   (int)*(double *)args[0], (int)(*(double *)args[1]), 
+	   (int)(*(double *)args[2]), (int)*(double *)args[3], 
+	   (int)(*(signed short *)args[4]), (int)(*(double *)args[5]),
+	   (int)*(double *)args[6], (int)(*(int *)args[7]), 
+	   (int)(*(double*)args[8]), (int)*(int *)args[9], 
+	   (int)(*(int *)args[10]), (int)(*(float *)args[11]),
+	   (int)*(int *)args[12], (int)(*(float *)args[13]), 
+	   (int)(*(int *)args[14]), *(int *)args[15], (int)(long)userdata, 
+	   *(int*)resp);
+ }
+
+typedef int (*closure_test_type2)(double, double, double, double, signed short,
+				  double, double, int, double, int, int, float,
+				  int, float, int, int);
+
+static void closure_test_fn3(ffi_cif* cif,void* resp,void** args,
+			     void* userdata)
+ {
+    *(ffi_arg*)resp =
+      (int)*(float *)args[0] +(int)(*(float *)args[1]) + 
+      (int)(*(float *)args[2]) + (int)*(float *)args[3] +
+      (int)(*(float *)args[4]) + (int)(*(float *)args[5]) +
+      (int)*(float *)args[6] + (int)(*(float *)args[7]) + 
+      (int)(*(double *)args[8]) + (int)*(int *)args[9] +
+      (int)(*(float *)args[10]) + (int)(*(float *)args[11]) + 
+      (int)*(int *)args[12] + (int)(*(float *)args[13]) +
+      (int)(*(float *)args[14]) +  *(int *)args[15] + (int)(long)userdata;
+
+    printf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d: %d\n",
+	   (int)*(float *)args[0], (int)(*(float *)args[1]), 
+	   (int)(*(float *)args[2]), (int)*(float *)args[3], 
+	   (int)(*(float *)args[4]), (int)(*(float *)args[5]),
+	   (int)*(float *)args[6], (int)(*(float *)args[7]), 
+	   (int)(*(double *)args[8]), (int)*(int *)args[9], 
+	   (int)(*(float *)args[10]), (int)(*(float *)args[11]),
+	   (int)*(int *)args[12], (int)(*(float *)args[13]), 
+	   (int)(*(float *)args[14]), *(int *)args[15], (int)(long)userdata,
+	   *(int*)resp);
+ }
+
+typedef int (*closure_test_type3)(float, float, float, float, float, float,
+				  float, float, double, int, float, float, int,
+				  float, float, int);
 #endif
 
 int main(/*@unused@*/ int argc, /*@unused@*/ char *argv[])
@@ -315,6 +426,14 @@ int main(/*@unused@*/ int argc, /*@unused@*/ char *argv[])
   ffi_arg rint;
   long long rlonglong;
 
+# if FFI_CLOSURES
+  /* The closure must not be an automatic variable on
+     platforms (Solaris) that forbid stack execution by default. */
+  static ffi_closure cl;
+#endif
+
+  ffi_type * cl_arg_types[17];
+
   ffi_type ts1_type;
   ffi_type ts2_type;
   ffi_type ts3_type;
@@ -1044,22 +1163,137 @@ int main(/*@unused@*/ int argc, /*@unused@*/ char *argv[])
 # if FFI_CLOSURES
   /* A simple closure test */
     {
-      ffi_closure cl;
-      ffi_type * cl_arg_types[3];
+      (void) puts("\nEnter FFI_CLOSURES\n");
+
+      cl_arg_types[0] = &ffi_type_uint64;
+      cl_arg_types[1] = &ffi_type_uint;
+      cl_arg_types[2] = &ffi_type_uint64;
+      cl_arg_types[3] = &ffi_type_uint;
+      cl_arg_types[4] = &ffi_type_sshort;
+      cl_arg_types[5] = &ffi_type_uint64;
+      cl_arg_types[6] = &ffi_type_uint;
+      cl_arg_types[7] = &ffi_type_uint;
+      cl_arg_types[8] = &ffi_type_double;
+      cl_arg_types[9] = &ffi_type_uint;
+      cl_arg_types[10] = &ffi_type_uint;
+      cl_arg_types[11] = &ffi_type_float;
+      cl_arg_types[12] = &ffi_type_uint;
+      cl_arg_types[13] = &ffi_type_uint;
+      cl_arg_types[14] = &ffi_type_uint;
+      cl_arg_types[15] = &ffi_type_uint;
+      cl_arg_types[16] = NULL;   
+
+      /* Initialize the cif */
+      CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 16,
+			 &ffi_type_sint, cl_arg_types) == FFI_OK);
+
+      CHECK(ffi_prep_closure(&cl, &cif, closure_test_fn,
+			     (void *) 3 /* userdata */) == FFI_OK);
+      
+      CHECK((*((closure_test_type)(&cl)))
+	    (1LL, 2, 3LL, 4, 127, 429LL, 7, 8, 9.5, 10, 11, 12, 13, 
+	     19, 21, 1) == 680);
+    }
+
+    {
 
-      cl_arg_types[0] = &ffi_type_sint;
+      cl_arg_types[0] = &ffi_type_float;
       cl_arg_types[1] = &ffi_type_float;
-      cl_arg_types[2] = NULL;
+      cl_arg_types[2] = &ffi_type_float;
+      cl_arg_types[3] = &ffi_type_float;
+      cl_arg_types[4] = &ffi_type_sshort;
+      cl_arg_types[5] = &ffi_type_float;
+      cl_arg_types[6] = &ffi_type_float;
+      cl_arg_types[7] = &ffi_type_uint;
+      cl_arg_types[8] = &ffi_type_double;
+      cl_arg_types[9] = &ffi_type_uint;
+      cl_arg_types[10] = &ffi_type_uint;
+      cl_arg_types[11] = &ffi_type_float;
+      cl_arg_types[12] = &ffi_type_uint;
+      cl_arg_types[13] = &ffi_type_uint;
+      cl_arg_types[14] = &ffi_type_uint;
+      cl_arg_types[15] = &ffi_type_uint;
+      cl_arg_types[16] = NULL;
       
       /* Initialize the cif */
-      CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, 
-	    	         &ffi_type_sint, cl_arg_types) == FFI_OK);
+      CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 16,
+			 &ffi_type_sint, cl_arg_types) == FFI_OK);
 
-      CHECK(ffi_prep_closure(&cl, &cif, closure_test_fn,
-			     (void *) 3 /* userdata */)
-	    == FFI_OK);
-      CHECK((*((closure_test_type)(&cl)))(1, 2.0) == 6);
+      CHECK(ffi_prep_closure(&cl, &cif, closure_test_fn1,
+			     (void *) 3 /* userdata */)  == FFI_OK);
+      
+      CHECK((*((closure_test_type1)(&cl)))
+	    (1.1, 2.2, 3.3, 4.4, 127, 5.5, 6.6, 8, 9, 10, 11, 12.0, 13,
+	     19, 21, 1) == 255);
+    }
+
+    {
+
+      cl_arg_types[0] = &ffi_type_double;
+      cl_arg_types[1] = &ffi_type_double;
+      cl_arg_types[2] = &ffi_type_double;
+      cl_arg_types[3] = &ffi_type_double;
+      cl_arg_types[4] = &ffi_type_sshort;
+      cl_arg_types[5] = &ffi_type_double;
+      cl_arg_types[6] = &ffi_type_double;
+      cl_arg_types[7] = &ffi_type_uint;
+      cl_arg_types[8] = &ffi_type_double;
+      cl_arg_types[9] = &ffi_type_uint;
+      cl_arg_types[10] = &ffi_type_uint;
+      cl_arg_types[11] = &ffi_type_float;
+      cl_arg_types[12] = &ffi_type_uint;
+      cl_arg_types[13] = &ffi_type_float;
+      cl_arg_types[14] = &ffi_type_uint;
+      cl_arg_types[15] = &ffi_type_uint;
+      cl_arg_types[16] = NULL;
+      
+      /* Initialize the cif */
+      CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 16,
+			 &ffi_type_sint, cl_arg_types) == FFI_OK);
+
+      CHECK(ffi_prep_closure(&cl, &cif, closure_test_fn2,
+			     (void *) 3 /* userdata */) == FFI_OK);
+
+      CHECK((*((closure_test_type2)(&cl)))
+	    (1, 2, 3, 4, 127, 5, 6, 8, 9, 10, 11, 12.0, 13,
+	     19.0, 21, 1) == 255);
+
+    }
+
+    {
+
+      cl_arg_types[0] = &ffi_type_float;
+      cl_arg_types[1] = &ffi_type_float;
+      cl_arg_types[2] = &ffi_type_float;
+      cl_arg_types[3] = &ffi_type_float;
+      cl_arg_types[4] = &ffi_type_float;
+      cl_arg_types[5] = &ffi_type_float;
+      cl_arg_types[6] = &ffi_type_float;
+      cl_arg_types[7] = &ffi_type_float;
+      cl_arg_types[8] = &ffi_type_double;
+      cl_arg_types[9] = &ffi_type_uint;
+      cl_arg_types[10] = &ffi_type_float;
+      cl_arg_types[11] = &ffi_type_float;
+      cl_arg_types[12] = &ffi_type_uint;
+      cl_arg_types[13] = &ffi_type_float;
+      cl_arg_types[14] = &ffi_type_float;
+      cl_arg_types[15] = &ffi_type_uint;
+      cl_arg_types[16] = NULL;
+      
+      /* Initialize the cif */
+      CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 16,
+			 &ffi_type_sint, cl_arg_types) == FFI_OK);
+
+      CHECK(ffi_prep_closure(&cl, &cif, closure_test_fn3,
+			     (void *) 3 /* userdata */)  == FFI_OK);
+      
+      CHECK((*((closure_test_type3)(&cl)))
+	    (1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9, 10, 11.11, 12.0, 13,
+	     19.19, 21.21, 1) == 135);
     }
+
+    (void) puts("\nFinished FFI_CLOSURES\n");
+
 # endif
 
   /* If we arrived here, all is good */
diff --git a/libffi/src/powerpc/aix_closure.S b/libffi/src/powerpc/aix_closure.S
index 7fa96f1fc95..d0d50ca56bd 100644
--- a/libffi/src/powerpc/aix_closure.S
+++ b/libffi/src/powerpc/aix_closure.S
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------
-   aix_closures.S - Copyright (c) 2002 Free Software Foundation, Inc.
-   based on darwin_closures.S 
+   aix_closure.S - Copyright (c) 2002 2003 Free Software Foundation, Inc.
+   based on darwin_closure.S 
   
    PowerPC Assembly glue.
 
@@ -106,24 +106,25 @@ ffi_closure_ASM:
 	/* 24 Bytes (Linkage Area) */
 	/* 32 Bytes (params) */
 	/* 104 Bytes (13*8 from FPR) */ 
-	/* 4 Bytes (result)
-	/* 164 Bytes */
+	/* 8 Bytes (result)
+	/* 168 Bytes */
 	
-	stwu r1,-164(r1)	/* skip over caller save area */
+	stwu r1,-176(r1)	/* skip over caller save area
+				keep stack aligned to 16  */
 
 /* we want to build up an area for the parameters passed */
 /* in registers (both floating point and integer) */
 	
-	/* we store gpr 3 to gpr 10 (aligned to 4) */
-	/* in the parents outgoing area		   */
-	stw   r3, 188(r1)
-	stw   r4, 192(r1)
-	stw   r5, 196(r1) 
-	stw   r6, 200(r1)
-	stw   r7, 204(r1)
-	stw   r8, 208(r1) 
-	stw   r9, 212(r1)
-	stw   r10, 216(r1)
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	stw   r3, 200(r1)
+	stw   r4, 204(r1)
+	stw   r5, 208(r1) 
+	stw   r6, 212(r1)
+	stw   r7, 216(r1)
+	stw   r8, 220(r1) 
+	stw   r9, 224(r1)
+	stw   r10, 228(r1)
 
 	/* next save fpr 1 to fpr 13 (aligned to 8) */
 	stfd  f1, 56(r1)
@@ -148,14 +149,14 @@ ffi_closure_ASM:
 	addi r4,r1,160
 	
 	/* now load up the pointer to the saved gpr registers */
-	addi r5,r1,188
+	addi r5,r1,200
 
 	/* now load up the pointer to the saved fpr registers */
 	addi r6,r1,56
 
 	/* now load up the pointer to the outgoing parameter  */
 	/* stack in the previous frame */
-	addi r7,r1,220
+	addi r7,r1,232
 	
 	/* make the call */
 	bl .ffi_closure_helper_DARWIN
@@ -243,7 +244,7 @@ L..58:
 /* case void / done	 */
 L..44:
 	
-	addi r1,r1,164		/* restore stack pointer */
+	addi r1,r1,176		/* restore stack pointer */
 	lwz r0,8(r1)		/* get return address */
 	mtlr r0			/* reset link register */
 	blr
diff --git a/libffi/src/powerpc/darwin.S b/libffi/src/powerpc/darwin.S
index 9da89bde287..108d2acff72 100644
--- a/libffi/src/powerpc/darwin.S
+++ b/libffi/src/powerpc/darwin.S
@@ -3,8 +3,6 @@
    
    PowerPC Assembly glue.
 
-   $Id: darwin.S,v 1.1 2001/10/09 05:32:15 bryce Exp $
-
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    ``Software''), to deal in the Software without restriction, including
@@ -39,30 +37,34 @@
 .text
 	.align 2
 _ffi_call_DARWIN:
-	mr      r12,r8 // We only need r12 until the call, so it doesn't have to be saved...
+LFB0:	
+	mr      r12,r8		/* We only need r12 until the call,
+				so it doesn't have to be saved...  */
+LFB1:	
 	/* Save the old stack pointer as AP.  */
 	mr	r8,r1
-
+LCFI0:	
 	/* Allocate the stack space we need.  */
 	stwux	r1,r1,r4	
 	
 	/* Save registers we use.  */
 	mflr	r9
 
-	stw	r28,-16(r8)
+	stw	r28,-16(r8)	
 	stw	r29,-12(r8)
 	stw	r30, -8(r8)
 	stw	r31, -4(r8)
-	
-	stw	r9,  8(r8)
+
+	stw	r9,  8(r8)	
 	stw	r2, 20(r1)
+LCFI1:	
 
 	/* Save arguments over call...  */
-	mr	r31,r5	/* flags, */
-	mr	r30,r6	/* rvalue, */
-	mr	r29,r7	/* function address, */
-	mr	r28,r8	/* our AP. */
-		
+	mr	r31,r5	/* flags,  */
+	mr	r30,r6	/* rvalue,  */
+	mr	r29,r7	/* function address,  */
+	mr	r28,r8	/* our AP.  */
+LCFI2:		
 	/* Call ffi_prep_args.  */
 	mr	r4,r1
 	li	r9,0
@@ -145,7 +147,8 @@ L(fp_return_value):
 L(float_return_value):
 	stfs	f1,0(r30)
 	b	L(done_return_value)
-//END(_ffi_call_DARWIN)
+LFE1:	
+/* END(_ffi_call_DARWIN)  */
 
 /* Provide a null definition of _ffi_call_AIX.  */
 .text
@@ -155,5 +158,61 @@ L(float_return_value):
 	.align 2
 _ffi_call_AIX:
 	blr
-//END(_ffi_call_AIX)
-
+/* END(_ffi_call_AIX)  */
+
+.data
+.section __TEXT,__eh_frame
+Lframe1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0x0	; CIE Identifier Tag
+	.byte	0x1	; CIE Version
+	.ascii	"zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	0x7c	; sleb128 -4; CIE Data Alignment Factor
+	.byte	0x41	; CIE RA Column
+	.byte   0x1     ; uleb128 0x1; Augmentation size
+	.byte   0x10    ; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0x0	; uleb128 0x0
+	.align	2
+LECIE1:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1	; FDE Length
+LASFDE1:
+	.set	L$set$2,LASFDE1-Lframe1
+	.long	L$set$2	; FDE CIE offset
+	.long	LFB0-.	; FDE initial location
+	.set	L$set$3,LFE1-LFB0
+	.long	L$set$3	; FDE address range
+	.byte   0x0     ; uleb128 0x0; Augmentation size
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$4,LCFI0-LFB1
+	.long	L$set$4
+	.byte	0xd	; DW_CFA_def_cfa_register
+	.byte	0x08	; uleb128 0x08 
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$5,LCFI1-LCFI0
+	.long	L$set$5
+	.byte   0x11    ; DW_CFA_offset_extended_sf
+	.byte	0x41	; uleb128 0x41
+	.byte   0x7e    ; sleb128 -2
+	.byte	0x9f	; DW_CFA_offset, column 0x1f 
+	.byte	0x1	; uleb128 0x1 
+	.byte	0x9e	; DW_CFA_offset, column 0x1e
+	.byte	0x2	; uleb128 0x2
+	.byte	0x9d	; DW_CFA_offset, column 0x1d 
+	.byte	0x3	; uleb128 0x3 
+	.byte	0x9c	; DW_CFA_offset, column 0x1c 
+	.byte	0x4	; uleb128 0x4
+	.byte	0x4	; DW_CFA_advance_loc4 
+	.set	L$set$6,LCFI2-LCFI1
+	.long	L$set$6
+	.byte	0xd	; DW_CFA_def_cfa_register 
+	.byte	0x1c	; uleb128 0x1c 
+	.align 2
+LEFDE1:
+	
diff --git a/libffi/src/powerpc/darwin_closure.S b/libffi/src/powerpc/darwin_closure.S
index 9e54d2231b9..9ae17d8cce8 100644
--- a/libffi/src/powerpc/darwin_closure.S
+++ b/libffi/src/powerpc/darwin_closure.S
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------
-   darwin_closures.S - Copyright (c) 2002 Free Software Foundation, Inc.
-   based on ppc_closures.S
+   darwin_closure.S - Copyright (c) 2002 2003 Free Software Foundation, 
+   Inc. based on ppc_closure.S
  
    PowerPC Assembly glue.
 
@@ -37,31 +37,32 @@
 .text
 	.align 2
 _ffi_closure_ASM:
+LFB1:		
+	mflr r0			/* extract return address  */
+	stw r0, 8(r1)		/* save the return address  */
+LCFI0:	
+	/* 24 Bytes (Linkage Area)
+	   32 Bytes (outgoing parameter area, always reserved)
+	   104 Bytes (13*8 from FPR)	
+	   8 Bytes (result)
+	   168 Bytes  */
 	
-	mflr r0			/* extract return address */
-	stw r0, 8(r1)		/* save the return address */
+	stwu r1,-176(r1)	/* skip over caller save area
+				keep stack aligned to 16  */
+LCFI1:		
+	/* we want to build up an area for the parameters passed
+	in registers (both floating point and integer)  */
 	
-	/* 24 Bytes (Linkage Area) */
-	/* 32 Bytes (outgoing parameter area, always reserved) */
-	/* 104 Bytes (13*8 from FPR) */ 
-	/* 4 Bytes (result)
-	/* 164 Bytes */
-	
-	stwu r1,-164(r1)	/* skip over caller save area */
-	
-/* we want to build up an area for the parameters passed */
-/* in registers (both floating point and integer) */
-	
-	/* we store gpr 3 to gpr 10 (aligned to 4) */
-	/* in the parents outgoing area		   */
-	stw   r3, 188(r1)
-	stw   r4, 192(r1)
-	stw   r5, 196(r1) 
-	stw   r6, 200(r1)
-	stw   r7, 204(r1)
-	stw   r8, 208(r1) 
-	stw   r9, 212(r1)
-	stw   r10, 216(r1)
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	stw   r3, 200(r1)
+	stw   r4, 204(r1)
+	stw   r5, 208(r1) 
+	stw   r6, 212(r1)
+	stw   r7, 216(r1)
+	stw   r8, 220(r1) 
+	stw   r9, 224(r1)
+	stw   r10, 228(r1)
 
 	/* we save fpr 1 to fpr 13 (aligned to 8) */
 	stfd  f1, 56(r1)
@@ -85,15 +86,15 @@ _ffi_closure_ASM:
 	/* now load up the pointer to the result storage */
 	addi r4,r1,160
 	
-	/* now load up the pointer to the saved gpr registers */
-	addi r5,r1,188
+	/* now load up the pointer to the saved gpr registers  */
+	addi r5,r1,200
 
 	/* now load up the pointer to the saved fpr registers */
 	addi r6,r1,56
 
-	/* now load up the pointer to the outgoing parameter  */
-	/* stack in the previous frame */
-	addi r7,r1,220
+	/* now load up the pointer to the outgoing parameter
+	stack in the previous frame  */
+	addi r7,r1,232
 	
 	/* make the call */
 	bl L(_ffi_closure_helper_DARWIN)
@@ -111,7 +112,9 @@ _ffi_closure_ASM:
 	lwzx r3,r4,r3		/* get the contents of that table value */
 	add r3,r3,r4		/* add contents of table to table address */
 	mtctr r3
-	bctr			/* jump to it */
+	bctr			/* jump to it  */
+LFE1:
+	.align	2
 
 .L60:
 	.long .L44-.L60    /* FFI_TYPE_VOID */
@@ -180,10 +183,54 @@ _ffi_closure_ASM:
 /* case void / done	 */
 .L44:
 	
-	addi r1,r1,164		/* restore stack pointer */
-	lwz r0,8(r1)		/* get return address */
-	mtlr r0			/* reset link register */
+	addi r1,r1,176		/* restore stack pointer  */
+	lwz r0,8(r1)		/* get return address  */
+	mtlr r0			/* reset link register  */
 	blr
 	
-/* END(ffi_closure_ASM) */
+/* END(ffi_closure_ASM)  */
+
+.data
+.section __TEXT,__eh_frame
+Lframe1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0x0	; CIE Identifier Tag
+	.byte	0x1	; CIE Version
+	.ascii	"zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	0x7c	; sleb128 -4; CIE Data Alignment Factor
+	.byte	0x41	; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0x0	; uleb128 0x0
+	.align	2
+LECIE1:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1	; FDE Length
+
+LASFDE1:
+	.set	L$set$2,LASFDE1-Lframe1
+	.long	L$set$2	; FDE CIE offset
+	.long	LFB1-.	; FDE initial location
+	.set	L$set$3,LFE1-LFB1
+	.long	L$set$3	; FDE address range
+	.byte   0x0     ; uleb128 0x0; Augmentation size
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$3,LCFI1-LCFI0
+	.long	L$set$3
+	.byte	0xe	; DW_CFA_def_cfa_offset
+ 	.byte	176,1	; uleb128 176
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$4,LCFI0-LFB1
+	.long	L$set$4
+	.byte   0x11    ; DW_CFA_offset_extended_sf
+	.byte	0x41	; uleb128 0x41
+	.byte   0x7e    ; sleb128 -2
+	.align	2
+LEFDE1:
 
diff --git a/libffi/src/powerpc/ffi.c b/libffi/src/powerpc/ffi.c
index c93aec0ed87..ea1a14e8f65 100644
--- a/libffi/src/powerpc/ffi.c
+++ b/libffi/src/powerpc/ffi.c
@@ -137,11 +137,20 @@ void ffi_prep_args(extended_cif *ecif, unsigned *const stack)
       switch ((*ptr)->type)
 	{
 	case FFI_TYPE_FLOAT:
-	case FFI_TYPE_DOUBLE:
-	  if ((*ptr)->type == FFI_TYPE_FLOAT)
-	    double_tmp = *(float *)*p_argv;
+	  double_tmp = *(float *)*p_argv;
+	  if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+	    {
+	      *(float *)next_arg = (float)double_tmp;
+	      next_arg += 1;
+	    }
 	  else
-	    double_tmp = *(double *)*p_argv;
+	    *fpr_base++ = double_tmp;
+	  fparg_count++;
+	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  double_tmp = *(double *)*p_argv;
 
 	  if (fparg_count >= NUM_FPR_ARG_REGISTERS)
 	    {
@@ -320,6 +329,10 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
       switch ((*ptr)->type)
 	{
 	case FFI_TYPE_FLOAT:
+	  fparg_count++;
+	  /* floating singles are not 8-aligned on stack */
+	  break;
+
 	case FFI_TYPE_DOUBLE:
 	  fparg_count++;
 	  /* If this FP arg is going on the stack, it must be
@@ -612,20 +625,15 @@ ffi_closure_helper_SYSV (ffi_closure* closure, void * rvalue,
 	case FFI_TYPE_FLOAT:
 	    /* unfortunately float values are stored as doubles
              * in the ffi_closure_SYSV code (since we don't check
-             * the type in that routine).  This is also true
-             * of floats passed on the outgoing parameter stack.
-             * Also, on the outgoing stack all values are aligned
-             * to 8
-             *
-             * Don't you just love the simplicity of this ABI!
+             * the type in that routine).
              */
 
           /* there are 8 64bit floating point registers */
 
           if (nf < 8) {
-	     temp = *(double*)pfr;
+             temp = *(double*)pfr;
              *(float*)pfr = (float)temp;
-	     avalue[i] = pfr;
+             avalue[i] = pfr;
              nf++;
              pfr+=2;
           } else {
@@ -634,12 +642,9 @@ ffi_closure_helper_SYSV (ffi_closure* closure, void * rvalue,
              * parameter stack.  This is probably a really
              * naughty thing to do but...
              */
-	     if (((long)pst) & 4) pst++;
-	     temp = *(double*)pst;
-             *(float*)pst = (float)temp;
 	     avalue[i] = pst;
              nf++;
-             pst+=2;
+             pst+=1;
           }
 	  break;
 
diff --git a/libffi/src/powerpc/ffi_darwin.c b/libffi/src/powerpc/ffi_darwin.c
index d9182ab741d..3f705275c5f 100644
--- a/libffi/src/powerpc/ffi_darwin.c
+++ b/libffi/src/powerpc/ffi_darwin.c
@@ -6,8 +6,6 @@
    Darwin ABI support (c) 2001 John Hornkvist
    AIX ABI support (c) 2002 Free Software Foundation, Inc.
 
-   $Id: ffi_darwin.c,v 1.4 2002/03/07 18:24:42 dje Exp $
-
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    ``Software''), to deal in the Software without restriction, including
@@ -412,38 +410,40 @@ typedef struct aix_fd_struct {
                   +---------------------------------------+ 20
                   | saved TOC pointer 4                   | 
                   +---------------------------------------+ 24
-                  | always reserved 8*4=32  (revious GPRs)| 
+                  | always reserved 8*4=32 (previous GPRs)| 
                   | according to the linkage convention   |
-                  | from AIX			          |
+                  | from AIX                              |
                   +---------------------------------------+ 56
-                  | our FPR area 13*8=104   		  |
-                  | f1				   	  |
-                  | .	       				  |
-                  | f13    	        		  | 
+                  | our FPR area 13*8=104                 |
+                  | f1                                    |
+                  | .                                     |
+                  | f13                                   | 
                   +---------------------------------------+ 160
-                  | result area 4                         | 
-SP current -->    +---------------------------------------+ 164 <- parent frame
-                  | back chain to caller 4                | 
+                  | result area 8                         |
                   +---------------------------------------+ 168
+                  | alignement to the next multiple of 16 |
+SP current -->    +---------------------------------------+ 176 <- parent frame
+                  | back chain to caller 4                | 
+                  +---------------------------------------+ 180
                   | saved CR 4                            | 
-                  +---------------------------------------+ 172
+                  +---------------------------------------+ 184
                   | saved LR 4                            | 
-                  +---------------------------------------+ 176
+                  +---------------------------------------+ 188
                   | reserved for compilers 4              | 
-                  +---------------------------------------+ 180
+                  +---------------------------------------+ 192
                   | reserved for binders 4                | 
-                  +---------------------------------------+ 184
+                  +---------------------------------------+ 196
                   | saved TOC pointer 4                   | 
-                  +---------------------------------------+ 188
+                  +---------------------------------------+ 200
                   | always reserved 8*4=32  we store our  |
-                  | GPRs here	        		  |
-                  | r3		       			  |
-                  | .	        			  |
-                  | r10      				  |
-                  +---------------------------------------+ 220
-                  | PST area, overflow part	          | 
+                  | GPRs here                             |
+                  | r3                                    |
+                  | .                                     |
+                  | r10                                   |
+                  +---------------------------------------+ 232
+                  | PST area, overflow part               | 
                   +---------------------------------------+ xxx
-                  | ????				  | 
+                  | ????                                  | 
                   +---------------------------------------+ xxx
 
 */
diff --git a/libffi/src/powerpc/ppc_closure.S b/libffi/src/powerpc/ppc_closure.S
index a9ea9c7ee52..e402fb5cda3 100644
--- a/libffi/src/powerpc/ppc_closure.S
+++ b/libffi/src/powerpc/ppc_closure.S
@@ -1,14 +1,13 @@
 #define LIBFFI_ASM
 #include <powerpc/asm.h>
 
-.globl	ffi_closure_helper_SYSV
+        .file   "ppc_closure.S"
 
 ENTRY(ffi_closure_SYSV)
 .LFB1:
 	stwu %r1,-144(%r1)
 .LCFI0:
 	mflr %r0
-	stw %r31,140(%r1)
 .LCFI1:
 	stw %r0,148(%r1)
 
@@ -63,87 +62,136 @@ ENTRY(ffi_closure_SYSV)
         # look up the proper starting point in table 
 	# by using return type as offset
 	addi %r5,%r1,112   # get pointer to results area
-	addis %r4,0,.L60@ha  # get address of jump table
-	addi %r4,%r4,.L60@l
-	slwi %r3,%r3,2         # now multiply return type by 4
-	lwzx %r3,%r4,%r3         # get the contents of that table value
-	add %r3,%r3,%r4          # add contents of table to table address
+	bl .Lget_ret_type0_addr # get pointer to .Lret_type0 into LR
+	mflr %r4           # move to r4
+	slwi %r3,%r3,4     # now multiply return type by 16
+	add %r3,%r3,%r4    # add contents of table to table address
 	mtctr %r3
 	bctr               # jump to it
 .LFE1:
-	.align 2
-.L60:
-	.long .L44-.L60    # FFI_TYPE_VOID
-	.long .L50-.L60    # FFI_TYPE_INT
-	.long .L47-.L60    # FFI_TYPE_FLOAT
-	.long .L46-.L60    # FFI_TYPE_DOUBLE
-	.long .L46-.L60    # FFI_TYPE_LONGDOUBLE
-	.long .L56-.L60    # FFI_TYPE_UINT8
-	.long .L55-.L60    # FFI_TYPE_SINT8
-	.long .L58-.L60    # FFI_TYPE_UINT16
-	.long .L57-.L60    # FFI_TYPE_SINT16
-	.long .L50-.L60    # FFI_TYPE_UINT32
-	.long .L50-.L60    # FFI_TYPE_SINT32
-	.long .L48-.L60    # FFI_TYPE_UINT64
-	.long .L48-.L60    # FFI_TYPE_SINT64
-	.long .L44-.L60    # FFI_TYPE_STRUCT
-	.long .L50-.L60    # FFI_TYPE_POINTER
-
-
-# case double
-.L46:   
-        lfd %f1,0(%r5)
-	b .L44
 
-# case float
-.L47:
+# Each of the ret_typeX code fragments has to be exactly 16 bytes long
+# (4 instructions). For cache effectiveness we align to a 16 byte boundary
+# first.
+	.align 4
+
+	nop
+	nop
+	nop
+.Lget_ret_type0_addr:
+	blrl
+
+# case FFI_TYPE_VOID
+.Lret_type0:
+	b .Lfinish
+	nop
+	nop
+	nop
+
+# case FFI_TYPE_INT
+.Lret_type1:
+	lwz %r3,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_FLOAT
+.Lret_type2:
 	lfs %f1,0(%r5)
-	b .L44
-	
-# case long long
-.L48:
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_DOUBLE
+.Lret_type3:
+        lfd %f1,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_LONGDOUBLE
+.Lret_type4:
+        lfd %f1,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_UINT8
+.Lret_type5:
+        lbz %r3,3(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_SINT8
+.Lret_type6:
+	lbz %r3,3(%r5)
+	extsb %r3,%r3
+	b .Lfinish
+	nop
+
+# case FFI_TYPE_UINT16
+.Lret_type7:
+	lhz %r3,2(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_SINT16
+.Lret_type8:
+	lha %r3,2(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_UINT32
+.Lret_type9:
+	lwz %r3,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_SINT32
+.Lret_type10:
+	lwz %r3,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case FFI_TYPE_UINT64
+.Lret_type11:
 	lwz %r3,0(%r5)
 	lwz %r4,4(%r5)
-	b .L44
-	
-# case default / int32 / pointer
-.L50:
+	b .Lfinish
+	nop
+
+# case FFI_TYPE_SINT64
+.Lret_type12:
 	lwz %r3,0(%r5)
-	b .L44
-	
-# case signed int8	
-.L55:
-	addi %r5,%r5,3
-	lbz %r3,0(%r5)
-	extsb %r3,%r3
-	b .L44
-
-# case unsigned int8	
-.L56:
-	addi %r5,%r5,3
-        lbz %r3,0(%r5)
-	b .L44
-
-# case signed int16
-.L57:
-	addi %r5,%r5,2
-	lhz %r3,0(%r5)
-	extsh %r3,%r3
-	b .L44
-
-#case unsigned int16
-.L58:	
-	addi %r5,%r5,2
-	lhz %r3,0(%r5)
-
-# case void / done	
-.L44:
+	lwz %r4,4(%r5)
+	b .Lfinish
+	nop
+
+# case FFI_TYPE_STRUCT
+.Lret_type13:
+	b .Lfinish
+	nop
+	nop
+	nop
+
+# case FFI_TYPE_POINTER
+.Lret_type14:
+	lwz %r3,0(%r5)
+	b .Lfinish
+	nop
+	nop
+
+# case done	
+.Lfinish:
 	
-	lwz %r11,0(%r1)
-	lwz %r0,4(%r11)
+	lwz %r0,148(%r1)
 	mtlr %r0
-	lwz %r31,-4(%r11)
-	mr %r1,%r11
+	addi %r1,%r1,144
 	blr
 END(ffi_closure_SYSV)
 
diff --git a/libffi/src/sparc/ffi.c b/libffi/src/sparc/ffi.c
index dc975356b4b..573fc84ee5a 100644
--- a/libffi/src/sparc/ffi.c
+++ b/libffi/src/sparc/ffi.c
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------
-   ffi.c - Copyright (c) 1996 Cygnus Solutions
+   ffi.c - Copyright (c) 1996, 2003 Cygnus Solutions
    
    Sparc Foreign Function Interface 
 
@@ -28,6 +28,12 @@
 
 #include <stdlib.h>
 
+#ifdef SPARC64
+extern void ffi_closure_v9(void);
+#else
+extern void ffi_closure_v8(void);
+#endif
+
 /* ffi_prep_args is called by the assembly routine once stack space
    has been allocated for the function's arguments */
 
@@ -409,3 +415,101 @@ void ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
     }
 
 }
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*, void*, void**, void*),
+		  void *user_data)
+{
+  unsigned int *tramp = (unsigned int *) &closure->tramp[0];
+  unsigned long fn;
+  unsigned long ctx = (unsigned long) closure;
+
+#ifdef SPARC64
+  /* Trampoline address is equal to the closure address.  We take advantage
+     of that to reduce the trampoline size by 8 bytes. */
+  FFI_ASSERT (cif->abi == FFI_V9);
+  fn = (unsigned long) ffi_closure_v9;
+  tramp[0] = 0x83414000;	/* rd	%pc, %g1	*/
+  tramp[1] = 0xca586010;	/* ldx	[%g1+16], %g5	*/
+  tramp[2] = 0x81c14000;	/* jmp	%g5		*/
+  tramp[3] = 0x01000000;	/* nop			*/
+  *((unsigned long *) &tramp[4]) = fn;
+#else
+  FFI_ASSERT (cif->abi == FFI_V8);
+  fn = (unsigned long) ffi_closure_v8;
+  tramp[0] = 0x03000000 | fn >> 10;	/* sethi %hi(fn), %g1	*/
+  tramp[1] = 0x05000000 | ctx >> 10;	/* sethi %hi(ctx), %g2	*/
+  tramp[2] = 0x81c06000 | (fn & 0x3ff);	/* jmp   %g1+%lo(fn)	*/
+  tramp[3] = 0x8410a000 | (ctx & 0x3ff);/* or    %g2, %lo(ctx)	*/
+#endif
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  /* Flush the Icache.  FIXME: alignment isn't certain, assume 8 bytes */
+#ifdef SPARC64
+  asm volatile ("flush	%0" : : "r" (closure) : "memory");
+  asm volatile ("flush	%0" : : "r" (((char *) closure) + 8) : "memory");
+#else
+  asm volatile ("iflush	%0" : : "r" (closure) : "memory");
+  asm volatile ("iflush	%0" : : "r" (((char *) closure) + 8) : "memory");
+#endif
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_sparc_inner(ffi_closure *closure,
+  void *rvalue, unsigned long *gpr, double *fpr)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  int i, avn, argn;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+
+  argn = 0;
+
+  /* Copy the caller's structure return address to that the closure
+     returns the data directly to the caller.  */
+  if (cif->flags == FFI_TYPE_STRUCT)
+    {
+      rvalue = (void *) gpr[0];
+      argn = 1;
+    }
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      /* Assume big-endian.  FIXME */
+      argn += ALIGN(arg_types[i]->size, SIZEOF_ARG) / SIZEOF_ARG;
+
+#ifdef SPARC64
+      if (i < 6 && (arg_types[i]->type == FFI_TYPE_FLOAT
+		 || arg_types[i]->type == FFI_TYPE_DOUBLE
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+		 || arg_types[i]->type == FFI_TYPE_LONGDOUBLE
+#endif
+		))
+        avalue[i] = ((char *) &fpr[argn]) - arg_types[i]->size;
+      else
+#endif
+        avalue[i] = ((char *) &gpr[argn]) - arg_types[i]->size;
+      i++;
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+  /* Tell ffi_closure_sparc how to perform return type promotions.  */
+  return cif->rtype->type;
+}
diff --git a/libffi/src/sparc/v8.S b/libffi/src/sparc/v8.S
index d94fe03d2aa..53374de3855 100644
--- a/libffi/src/sparc/v8.S
+++ b/libffi/src/sparc/v8.S
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------
-   v8.S - Copyright (c) 1996, 1997 Cygnus Solutions
+   v8.S - Copyright (c) 1996, 1997, 2003 Cygnus Solutions
    
    Sparc Foreign Function Interface 
 
@@ -94,6 +94,74 @@ longlong:
 .ffi_call_V8_end:
 	.size	ffi_call_V8,.ffi_call_V8_end-ffi_call_V8
 
+
+#define	STACKFRAME	104	/* 16*4 register window +
+				   1*4 struct return +	
+				   6*4 args backing store +
+				   3*4 locals */
+
+/* ffi_closure_v8(...)
+
+   Receives the closure argument in %g2.   */
+
+	.text
+	.align 8
+	.globl ffi_closure_v8
+
+ffi_closure_v8:
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+		.register	%g2, #scratch
+#endif
+.LLFB2:
+	save	%sp, -STACKFRAME, %sp
+.LLCFI1:
+
+	! Store all of the potential argument registers in va_list format.
+	st	%i0, [%fp+68+0]
+	st	%i1, [%fp+68+4]
+	st	%i2, [%fp+68+8]
+	st	%i3, [%fp+68+12]
+	st	%i4, [%fp+68+16]
+	st	%i5, [%fp+68+20]
+
+	! Call ffi_closure_sparc_inner to do the bulk of the work.
+	mov	%g2, %o0
+	add	%fp, -8, %o1
+	add	%fp,  68, %o2
+	call	ffi_closure_sparc_inner
+	 mov	0, %o3
+
+	! Load up the return value in the proper type.
+	cmp	%o0, FFI_TYPE_VOID
+	be	done1
+
+	cmp	%o0, FFI_TYPE_FLOAT
+	be,a	done1
+	 ld	[%fp-8], %f0
+
+	cmp	%o0, FFI_TYPE_DOUBLE
+	be,a	done1
+	 ldd	[%fp-8], %f0
+
+	cmp	%o0, FFI_TYPE_SINT64
+	be,a	integer
+	 ld	[%fp-4], %i1
+
+	cmp	%o0, FFI_TYPE_UINT64
+	be,a	integer
+	 ld	[%fp-4], %i1
+
+integer:
+	ld	[%fp-8], %i0
+
+done1:
+	ret
+	 restore
+.LLFE2:
+
+.ffi_closure_v8_end:
+	.size	ffi_closure_v8,.ffi_closure_v8_end-ffi_closure_v8
+
 #ifdef SPARC64
 #define WS 8
 #define nword	xword
@@ -148,3 +216,26 @@ longlong:
 	.byte	0x1f	! uleb128 0x1f
 	.align	WS
 .LLEFDE1:
+.LLSFDE2:
+	.uaword	.LLEFDE2-.LLASFDE2	! FDE Length
+.LLASFDE2:
+	.uaword	.LLASFDE2-.LLframe1	! FDE CIE offset
+#ifdef HAVE_AS_SPARC_UA_PCREL
+	.uaword	%r_disp32(.LLFB2)
+	.uaword	.LLFE2-.LLFB2	! FDE address range
+#else
+	.align	WS
+	.nword	.LLFB2
+	.uanword .LLFE2-.LLFB2	! FDE address range
+#endif
+	.byte	0x0	! uleb128 0x0; Augmentation size
+	.byte	0x4	! DW_CFA_advance_loc4
+	.uaword	.LLCFI1-.LLFB2
+	.byte	0xd	! DW_CFA_def_cfa_register
+	.byte	0x1e	! uleb128 0x1e
+	.byte	0x2d	! DW_CFA_GNU_window_save
+	.byte	0x9	! DW_CFA_register
+	.byte	0xf	! uleb128 0xf
+	.byte	0x1f	! uleb128 0x1f
+	.align	WS
+.LLEFDE2:
diff --git a/libffi/src/sparc/v9.S b/libffi/src/sparc/v9.S
index bd358c0d84d..8dc9c90f661 100644
--- a/libffi/src/sparc/v9.S
+++ b/libffi/src/sparc/v9.S
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------
-   v9.S - Copyright (c) 2000 Cygnus Solutions
+   v9.S - Copyright (c) 2000, 2003 Cygnus Solutions
    
    Sparc 64bit Foreign Function Interface 
 
@@ -99,7 +99,7 @@ _ffi_call_V9:
 	cmp	%i3, FFI_TYPE_STRUCT
 	be,pn	%icc, dostruct
 
-	 cmp	%i3, FFI_TYPE_LONGDOUBLE
+	cmp	%i3, FFI_TYPE_LONGDOUBLE
 	bne,pt	%icc, done
 	 nop
 	std	%f0, [%i4+0]
@@ -125,6 +125,88 @@ dostruct:
 .ffi_call_V9_end:
 	.size	ffi_call_V9,.ffi_call_V9_end-ffi_call_V9
 
+
+#define	STACKFRAME	 240	/* 16*8 register window +
+				   6*8 args backing store +
+				   8*8 locals */
+#define	FP		%fp+STACK_BIAS
+
+/* ffi_closure_v9(...)
+
+   Receives the closure argument in %g1.   */
+
+	.text
+	.align 8
+	.globl ffi_closure_v9
+
+ffi_closure_v9:
+.LLFB2:
+	save	%sp, -STACKFRAME, %sp
+.LLCFI1:
+
+	! Store all of the potential argument registers in va_list format.
+	stx	%i0, [FP+128+0]
+	stx	%i1, [FP+128+8]
+	stx	%i2, [FP+128+16]
+	stx	%i3, [FP+128+24]
+	stx	%i4, [FP+128+32]
+	stx	%i5, [FP+128+40]
+
+	! Store possible floating point argument registers too.
+	std	%f0, [FP-48]
+	std	%f2, [FP-40]
+	std	%f4, [FP-32]
+	std	%f6, [FP-24]
+	std	%f8, [FP-16]
+	std	%f10, [FP-8]
+
+	! Call ffi_closure_sparc_inner to do the bulk of the work.
+	mov	%g1, %o0
+	add	%fp, STACK_BIAS-64, %o1
+	add	%fp, STACK_BIAS+128, %o2
+	call	ffi_closure_sparc_inner
+	 add	%fp, STACK_BIAS-48, %o3
+
+	! Load up the return value in the proper type.
+	cmp	%o0, FFI_TYPE_VOID
+	be,pn	%icc, done1
+
+	cmp	%o0, FFI_TYPE_FLOAT
+	be,a,pn	%icc, done1
+	 ld	[FP-64], %f0
+
+	cmp	%o0, FFI_TYPE_DOUBLE
+	be,a,pn	%icc, done1
+	 ldd	[FP-64], %f0
+
+	cmp	%o0, FFI_TYPE_LONGDOUBLE
+	be,a,pn	%icc, longdouble1
+	 ldd	[FP-64], %f0
+
+	cmp	%o0, FFI_TYPE_STRUCT
+	be,pn	%icc, struct1
+
+	! FFI_TYPE_UINT64 | FFI_TYPE_SINT64 | FFI_TYPE_POINTER
+	ldx	[FP-64], %i0
+
+done1:
+	ret
+	 restore
+
+struct1:
+	ldx [FP-56], %i2
+	ret
+	 restore
+
+longdouble1:
+	ldd	[FP-56], %f2
+	ret
+	 restore
+.LLFE2:
+
+.ffi_closure_v9_end:
+	.size	ffi_closure_v9,.ffi_closure_v9_end-ffi_closure_v9
+
 	.section	".eh_frame",#alloc,#write
 .LLframe1:
 	.uaword	.LLECIE1-.LLSCIE1	! Length of Common Information Entry
@@ -169,5 +251,27 @@ dostruct:
 	.byte	0x1f	! uleb128 0x1f
 	.align 8
 .LLEFDE1:
-
+.LLSFDE2:
+	.uaword	.LLEFDE2-.LLASFDE2	! FDE Length
+.LLASFDE2:
+	.uaword	.LLASFDE2-.LLframe1	! FDE CIE offset
+#ifdef HAVE_AS_SPARC_UA_PCREL
+	.uaword	%r_disp32(.LLFB2)
+	.uaword	.LLFE2-.LLFB2		! FDE address range
+#else
+	.align 8
+	.xword	.LLFB2
+	.uaxword	.LLFE2-.LLFB2	! FDE address range
+#endif
+	.byte	0x0	! uleb128 0x0; Augmentation size
+	.byte	0x4	! DW_CFA_advance_loc4
+	.uaword	.LLCFI1-.LLFB2
+	.byte	0xd	! DW_CFA_def_cfa_register
+	.byte	0x1e	! uleb128 0x1e
+	.byte	0x2d	! DW_CFA_GNU_window_save
+	.byte	0x9	! DW_CFA_register
+	.byte	0xf	! uleb128 0xf
+	.byte	0x1f	! uleb128 0x1f
+	.align 8
+.LLEFDE2:
 #endif
diff --git a/libffi/src/x86/ffi.c b/libffi/src/x86/ffi.c
index 68135f97c35..bd0874f771a 100644
--- a/libffi/src/x86/ffi.c
+++ b/libffi/src/x86/ffi.c
@@ -214,35 +214,29 @@ void ffi_call(/*@dependent@*/ ffi_cif *cif,
 
 static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
 					 void** args, ffi_cif* cif);
-static void ffi_closure_SYSV ();
-static void ffi_closure_raw_SYSV ();
+static void ffi_closure_SYSV (ffi_closure *)
+     __attribute__ ((regparm(1)));
+static void ffi_closure_raw_SYSV (ffi_raw_closure *)
+     __attribute__ ((regparm(1)));
 
-/* This function is jumped to by the trampoline, on entry, %ecx (a
- * caller-save register) holds the address of the closure.  
- * Clearly, this requires __GNUC__, so perhaps we should translate this
- * into an assembly file if this is to be distributed with ffi.
- */
+/* This function is jumped to by the trampoline */
 
 static void
-ffi_closure_SYSV ()
+ffi_closure_SYSV (closure)
+     ffi_closure *closure;
 {
   // this is our return value storage
   long double    res;
 
   // our various things...
-  void          *args;
   ffi_cif       *cif;
   void         **arg_area;
-  ffi_closure   *closure;
   unsigned short rtype;
   void          *resp = (void*)&res;
+  void *args = __builtin_dwarf_cfa ();
 
-  /* grab the trampoline context pointer */
-  asm ("movl %%ecx,%0" : "=r" (closure));
-  
   cif         = closure->cif;
   arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
-  asm ("leal 8(%%ebp),%0" : "=q" (args));  
 
   /* this call will initialize ARG_AREA, such that each
    * element in that array points to the corresponding 
@@ -330,11 +324,11 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
 ({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
    unsigned int  __fun = (unsigned int)(FUN); \
    unsigned int  __ctx = (unsigned int)(CTX); \
-   unsigned int  __dis = __fun - ((unsigned int) __tramp + 10); \
-   *(unsigned char*) &__tramp[0] = 0xb9; \
-   *(unsigned int*)  &__tramp[1] = __ctx; \
-   *(unsigned char*) &__tramp[5] = 0xe9; \
-   *(unsigned int*)  &__tramp[6] = __dis; \
+   unsigned int  __dis = __fun - ((unsigned int) __tramp + FFI_TRAMPOLINE_SIZE); \
+   *(unsigned char*) &__tramp[0] = 0xb8; \
+   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
+   *(unsigned char *)  &__tramp[5] = 0xe9; \
+   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
  })
 
 
@@ -364,30 +358,23 @@ ffi_prep_closure (ffi_closure* closure,
 #if !FFI_NO_RAW_API
 
 static void
-ffi_closure_raw_SYSV ()
+ffi_closure_raw_SYSV (closure)
+     ffi_raw_closure *closure;
 {
   // this is our return value storage
   long double    res;
 
   // our various things...
-  void            *args;
   ffi_raw         *raw_args;
   ffi_cif         *cif;
-  ffi_raw_closure *closure;
   unsigned short   rtype;
   void            *resp = (void*)&res;
 
-  /* grab the trampoline context pointer */
-  asm ("movl %%ecx,%0" : "=r" (closure));
-
-  /* take the argument pointer */
-  asm ("leal 8(%%ebp),%0" : "=q" (args));  
-
   /* get the cif */
   cif = closure->cif;
 
   /* the SYSV/X86 abi matches the RAW API exactly, well.. almost */
-  raw_args = (ffi_raw*) args;
+  raw_args = (ffi_raw*) __builtin_dwarf_cfa ();
 
   (closure->fun) (cif, resp, raw_args, closure->user_data);
 
diff --git a/libffi/src/x86/ffi64.c b/libffi/src/x86/ffi64.c
index f278a924eda..9427a37c8b5 100644
--- a/libffi/src/x86/ffi64.c
+++ b/libffi/src/x86/ffi64.c
@@ -27,6 +27,7 @@
 #include <ffi_common.h>
 
 #include <stdlib.h>
+#include <stdarg.h>
 
 /* ffi_prep_args is called by the assembly routine once stack space
    has been allocated for the function's arguments */
@@ -285,7 +286,8 @@ ffi_prep_args (stackLayout *stack, extended_cif *ecif)
   /* First check if the return value should be passed in memory. If so,
      pass the pointer as the first argument.  */
   gprcount = ssecount = 0;
-  if (examine_argument (ecif->cif->rtype, 1, &g, &s) == 0)
+  if (ecif->cif->rtype->type != FFI_TYPE_VOID 
+      && examine_argument (ecif->cif->rtype, 1, &g, &s) == 0)
     (void *)stack->gpr[gprcount++] = ecif->rvalue;
 
   for (i=ecif->cif->nargs, p_arg=ecif->cif->arg_types, p_argv = ecif->avalue;
@@ -389,8 +391,8 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 
   /* If the return value should be passed in memory, pass the pointer
      as the first argument. The actual memory isn't allocated here.  */
-
-  if (examine_argument (cif->rtype, 1, &g, &s) == 0)
+  if (cif->rtype->type != FFI_TYPE_VOID 
+      && examine_argument (cif->rtype, 1, &g, &s) == 0)
     gprcount = 1;
 
   /* Go over all arguments and determine the way they should be passed.
@@ -570,4 +572,135 @@ void ffi_call(/*@dependent@*/ ffi_cif *cif,
     }
 }
 
+extern void ffi_closure_UNIX64(void);
+
+ffi_status
+ffi_prep_closure (ffi_closure* closure,
+		  ffi_cif* cif,
+		  void (*fun)(ffi_cif*, void*, void**, void*),
+		  void *user_data)
+{
+  volatile unsigned short *tramp;
+
+  /* FFI_ASSERT (cif->abi == FFI_OSF);  */
+
+  tramp = (volatile unsigned short *) &closure->tramp[0];
+  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
+  tramp[10] = 0xff49;		/* jmp *%r11	*/
+  tramp[11] = 0x00e3;
+  *(void * volatile *) &tramp[1] = ffi_closure_UNIX64;
+  *(void * volatile *) &tramp[6] = closure;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+int
+ffi_closure_UNIX64_inner(ffi_closure *closure, va_list l, void *rp)
+{
+  ffi_cif *cif;
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn, argn;
+
+  cif = closure->cif;
+  avalue = alloca(cif->nargs * sizeof(void *));
+
+  argn = 0;
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+  
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	  {
+	    if (l->gp_offset > 48-8)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->gp_offset;
+		l->gp_offset += 8;
+	      }
+	  }
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  /* FIXME  */
+	  FFI_ASSERT(0);
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  {
+	    if (l->fp_offset > 176-16)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
+		l->fp_offset += 16;
+	      }
+	  }
+#if DEBUG_FFI
+	  fprintf (stderr, "double arg %d = %g\n", i, *(double *)avalue[i]);
+#endif
+	  break;
+	  
+	case FFI_TYPE_FLOAT:
+	  {
+	    if (l->fp_offset > 176-16)
+	      {
+		avalue[i] = l->overflow_arg_area;
+		l->overflow_arg_area = (char *)l->overflow_arg_area + 8;
+	      }
+	    else
+	      {
+		avalue[i] = (char *)l->reg_save_area + l->fp_offset;
+		l->fp_offset += 16;
+	      }
+	  }
+#if DEBUG_FFI
+	  fprintf (stderr, "float arg %d = %g\n", i, *(float *)avalue[i]);
+#endif
+	  break;
+	  
+	default:
+	  FFI_ASSERT(0);
+	}
+
+      argn += ALIGN(arg_types[i]->size, SIZEOF_ARG) / SIZEOF_ARG;
+      i++;
+    }
+
+  /* Invoke the closure.  */
+  (closure->fun) (cif, rp, avalue, closure->user_data);
+
+  /* FIXME: Structs not supported.  */
+  FFI_ASSERT(cif->rtype->type != FFI_TYPE_STRUCT);
+
+  /* Tell ffi_closure_UNIX64 how to perform return type promotions.  */
+
+  return cif->rtype->type;
+}
 #endif /* ifndef __x86_64__ */
diff --git a/libffi/src/x86/unix64.S b/libffi/src/x86/unix64.S
index 2e64b4195bf..f0cd3c9c0c9 100644
--- a/libffi/src/x86/unix64.S
+++ b/libffi/src/x86/unix64.S
@@ -162,5 +162,141 @@ sse2floatfloat:
 	movaps	(%rdi), %xmm0
 	movq	%xmm0, (%rsi)
 	ret
-	
+
+	.align	2
+.globl ffi_closure_UNIX64
+        .type	ffi_closure_UNIX64,@function
+
+ffi_closure_UNIX64:
+.LFB2:
+        pushq   %rbp
+.LCFI10:
+        movq    %rsp, %rbp
+.LCFI11:
+        subq    $240, %rsp
+.LCFI12:
+	movq	%rdi, -176(%rbp)
+        movq    %rsi, -168(%rbp)
+        movq    %rdx, -160(%rbp)
+        movq    %rcx, -152(%rbp)
+        movq    %r8, -144(%rbp)
+        movq    %r9, -136(%rbp)
+        /* FIXME: We can avoid all this stashing of XMM registers by
+	   (in ffi_prep_closure) computing the number of
+	   floating-point args and moving it into %rax before calling
+	   this function.  Once this is done, uncomment the next few
+	   lines and only the essential XMM registers will be written
+	   to memory.  This is a significant saving.  */
+/*         movzbl  %al, %eax  */
+/*         movq    %rax, %rdx */
+/*         leaq    0(,%rdx,4), %rax */
+/*         leaq    2f(%rip), %rdx */
+/*         subq    %rax, %rdx */
+        leaq    -1(%rbp), %rax
+/*         jmp     *%rdx */
+        movaps  %xmm7, -15(%rax)
+        movaps  %xmm6, -31(%rax)
+        movaps  %xmm5, -47(%rax)
+        movaps  %xmm4, -63(%rax)
+        movaps  %xmm3, -79(%rax)
+        movaps  %xmm2, -95(%rax)
+        movaps  %xmm1, -111(%rax)
+        movaps  %xmm0, -127(%rax)
+2:
+        movl    %edi, -180(%rbp)
+        movl    $0, -224(%rbp)
+        movl    $48, -220(%rbp)
+        leaq    16(%rbp), %rax
+        movq    %rax, -216(%rbp)
+        leaq    -176(%rbp), %rdx
+        movq    %rdx, -208(%rbp)
+        leaq    -224(%rbp), %rsi
+	movq	%r10, %rdi
+	movq	%rsp, %rdx
+        call    ffi_closure_UNIX64_inner@PLT
+
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	1f
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	2f
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	je	3f
+	cmpl	$FFI_TYPE_STRUCT, %eax
+	je	4f
+	popq	%rax
+        leave
+        ret
+1:
+2:
+3:	
+	movaps	-240(%rbp), %xmm0
+        leave
+        ret
+4:
+	leave
+	ret
+.LFE2:	
+		
+        .section        .eh_frame,"a",@progbits
+.Lframe0:
+        .long   .LECIE1-.LSCIE1
+.LSCIE1:
+        .long   0x0
+        .byte   0x1
+        .string "zR"
+        .uleb128 0x1
+        .sleb128 -8
+        .byte   0x10
+        .uleb128 0x1
+        .byte   0x1b
+        .byte   0xc
+        .uleb128 0x7
+        .uleb128 0x8
+        .byte   0x90
+        .uleb128 0x1
+        .align 8
+.LECIE1:
+.LSFDE1:
+	.long	.LEFDE1-.LASFDE1
+.LASFDE1:
+        .long   .LASFDE1-.Lframe0
+
+        .long   .LFB1-.
+        .long   .LFE1-.LFB1
+        .uleb128 0x0
+        .byte   0x4		# DW_CFA_advance_loc4
+        .long   .LCFI0-.LFB1
+        .byte   0xe		# DW_CFA_def_cfa_offset
+        .uleb128 0x10
+        .byte   0x86		# DW_CFA_offset: r6 at cfa-16
+        .uleb128 0x2
+        .byte   0x4		# DW_CFA_advance_loc4
+        .long   .LCFI1-.LCFI0
+        .byte   0x86		# DW_CFA_offset: r6 at cfa-16
+        .uleb128 0x2
+        .byte   0xd		# DW_CFA_def_cfa_reg: r6
+        .uleb128 0x6
+	.align 8
+.LEFDE1:
+.LSFDE3:
+        .long   .LEFDE3-.LASFDE3        # FDE Length
+.LASFDE3:
+        .long   .LASFDE3-.Lframe0       # FDE CIE offset
+
+        .long   .LFB2-. # FDE initial location
+        .long   .LFE2-.LFB2     # FDE address range
+        .uleb128 0x0    # Augmentation size
+        .byte   0x4     # DW_CFA_advance_loc4
+        .long   .LCFI10-.LFB2
+        .byte   0xe     # DW_CFA_def_cfa_offset
+        .uleb128 0x10
+        .byte   0x86    # DW_CFA_offset, column 0x6
+        .uleb128 0x2
+        .byte   0x4     # DW_CFA_advance_loc4
+        .long   .LCFI11-.LCFI10
+        .byte   0xd     # DW_CFA_def_cfa_register
+        .uleb128 0x6
+        .align 8
+.LEFDE3:
+
 #endif /* __x86_64__  */