Add ppc64le patches for glibc 2.17 from CentOS git

This commit is contained in:
messense 2021-05-13 11:35:09 +08:00
parent f9716e8b90
commit 798904409c
50 changed files with 28768 additions and 0 deletions

View File

@ -0,0 +1,31 @@
From 920e759ea4f48ca9c8b4dba6dfe5c88d27033121 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Mon, 17 Jun 2013 15:50:53 -0500
Subject: [PATCH 36/42] PowerPC: Reserve TCB space for EBB framework
This patch reserves four pointer to be used in future Event-Based
Branch framework for PowerPC.
(cherry picked from commit e55a9b256d53c7fc5145e3e4d338d3741b23e232)
---
nptl/sysdeps/powerpc/tls.h | 5 +++++
2 files changed, 10 insertions(+)
diff --git glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h
index 4c09eec..611c773 100644
--- glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h
+++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h
@@ -61,6 +61,11 @@ typedef union dtv
are private. */
typedef struct
{
+ /* Reservation for the Event-Based Branching ABI. */
+ uintptr_t ebb_handler;
+ uintptr_t ebb_ctx_pointer;
+ uintptr_t ebb_reserved1;
+ uintptr_t ebb_reserved2;
uintptr_t pointer_guard;
uintptr_t stack_guard;
dtv_t *dtv;
--
1.7.11.7

View File

@ -0,0 +1,269 @@
From c00f26c0eaba5a9680aac0f98de4b6e385a8cb82 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Fri, 8 Mar 2013 11:07:15 -0300
Subject: [PATCH 18/42] PowerPC: unify math_ldbl.h implementations
This patch removes redudant definition from PowerPC specific
math_ldbl, using the definitions from ieee754 math_ldbl.h.
(backported from commit edf66e57fc2bac083ecc9756a5fe47f9041ed3bb)
---
sysdeps/ieee754/ldbl-128ibm/math_ldbl.h | 10 +-
sysdeps/powerpc/Implies | 1 +
sysdeps/powerpc/fpu/math_ldbl.h | 171 ++------------------------------
sysdeps/unix/sysv/linux/powerpc/Implies | 4 -
5 files changed, 34 insertions(+), 168 deletions(-)
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/Implies
diff --git glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index be9ac71..1cce1fc 100644
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
/* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
of long double implemented as double double. */
static inline long double
-ldbl_pack (double a, double aa)
+default_ldbl_pack (double a, double aa)
{
union ibm_extended_long_double u;
u.dd[0] = a;
@@ -134,7 +134,7 @@ ldbl_pack (double a, double aa)
}
static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+default_ldbl_unpack (long double l, double *a, double *aa)
{
union ibm_extended_long_double u;
u.d = l;
@@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa)
*aa = u.dd[1];
}
+#ifndef ldbl_pack
+# define ldbl_pack default_ldbl_pack
+#endif
+#ifndef ldbl_unpack
+# define ldbl_unpack default_ldbl_unpack
+#endif
/* Convert a finite long double to canonical form.
Does not handle +/-Inf properly. */
diff --git glibc-2.17-c758a686/sysdeps/powerpc/Implies glibc-2.17-c758a686/sysdeps/powerpc/Implies
index 7ccf9a7..78dba95 100644
--- glibc-2.17-c758a686/sysdeps/powerpc/Implies
+++ glibc-2.17-c758a686/sysdeps/powerpc/Implies
@@ -1,4 +1,5 @@
# On PowerPC we use the IBM extended long double format.
ieee754/ldbl-128ibm
+ieee754/ldbl-opt
ieee754/dbl-64
ieee754/flt-32
diff --git glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h
index 6cd6d0b..36378c0 100644
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/math_ldbl.h
@@ -2,132 +2,12 @@
#error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
#endif
-#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
-#include <ieee754.h>
-
-static inline void
-ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x)
-{
- /* We have 105 bits of mantissa plus one implicit digit. Since
- 106 bits are representable we use the first implicit digit for
- the number before the decimal point and the second implicit bit
- as bit 53 of the mantissa. */
- unsigned long long hi, lo;
- int ediff;
- union ibm_extended_long_double eldbl;
- eldbl.d = x;
- *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
-
- lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
- hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
- /* If the lower double is not a denomal or zero then set the hidden
- 53rd bit. */
- if (eldbl.ieee.exponent2 > 0x001)
- {
- lo |= (1ULL << 52);
- lo = lo << 7; /* pre-shift lo to match ieee854. */
- /* The lower double is normalized separately from the upper. We
- may need to adjust the lower manitissa to reflect this. */
- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
- if (ediff > 53)
- lo = lo >> (ediff-53);
- }
- hi |= (1ULL << 52);
-
- if ((eldbl.ieee.negative != eldbl.ieee.negative2)
- && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
- {
- hi--;
- lo = (1ULL << 60) - lo;
- if (hi < (1ULL << 52))
- {
- /* we have a borrow from the hidden bit, so shift left 1. */
- hi = (hi << 1) | (lo >> 59);
- lo = 0xfffffffffffffffLL & (lo << 1);
- *exp = *exp - 1;
- }
- }
- *lo64 = (hi << 60) | lo;
- *hi64 = hi >> 4;
-}
-
-static inline long double
-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
-{
- union ibm_extended_long_double u;
- unsigned long hidden2, lzcount;
- unsigned long long hi, lo;
-
- u.ieee.negative = sign;
- u.ieee.negative2 = sign;
- u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- /* Expect 113 bits (112 bits + hidden) right justified in two longs.
- The low order 53 bits (52 + hidden) go into the lower double */
- lo = (lo64 >> 7)& ((1ULL << 53) - 1);
- hidden2 = (lo64 >> 59) & 1ULL;
- /* The high order 53 bits (52 + hidden) go into the upper double */
- hi = (lo64 >> 60) & ((1ULL << 11) - 1);
- hi |= (hi64 << 4);
-
- if (lo != 0LL)
- {
- /* hidden2 bit of low double controls rounding of the high double.
- If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
- plus change the sign of the low double to compensate. */
- if (hidden2)
- {
- hi++;
- u.ieee.negative2 = !sign;
- lo = (1ULL << 53) - lo;
- }
- /* The hidden bit of the lo mantissa is zero so we need to
- normalize the it for the low double. Shift it left until the
- hidden bit is '1' then adjust the 2nd exponent accordingly. */
-
- if (sizeof (lo) == sizeof (long))
- lzcount = __builtin_clzl (lo);
- else if ((lo >> 32) != 0)
- lzcount = __builtin_clzl ((long) (lo >> 32));
- else
- lzcount = __builtin_clzl ((long) lo) + 32;
- lzcount = lzcount - 11;
- if (lzcount > 0)
- {
- int expnt2 = u.ieee.exponent2 - lzcount;
- if (expnt2 >= 1)
- {
- /* Not denormal. Normalize and set low exponent. */
- lo = lo << lzcount;
- u.ieee.exponent2 = expnt2;
- }
- else
- {
- /* Is denormal. */
- lo = lo << (lzcount + expnt2);
- u.ieee.exponent2 = 0;
- }
- }
- }
- else
- {
- u.ieee.negative2 = 0;
- u.ieee.exponent2 = 0;
- }
-
- u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
- u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
- u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
- u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
- return u.d;
-}
-
-/* gcc generates disgusting code to pack and unpack long doubles.
- This tells gcc that pack/unpack is really a nop. We use fr1/fr2
- because those are the regs used to pass/return a single
- long double arg. */
+/* GCC does not optimize the default ldbl_pack code to not spill register
+ in the stack. The following optimization tells gcc that pack/unpack
+ is really a nop. We use fr1/fr2 because those are the regs used to
+ pass/return a single long double arg. */
static inline long double
-ldbl_pack (double a, double aa)
+ldbl_pack_ppc (double a, double aa)
{
register long double x __asm__ ("fr1");
register double xh __asm__ ("fr1");
@@ -139,7 +19,7 @@ ldbl_pack (double a, double aa)
}
static inline void
-ldbl_unpack (long double l, double *a, double *aa)
+ldbl_unpack_ppc (long double l, double *a, double *aa)
{
register long double x __asm__ ("fr1");
register double xh __asm__ ("fr1");
@@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa)
*aa = xl;
}
+#define ldbl_pack ldbl_pack_ppc
+#define ldbl_unpack ldbl_unpack_ppc
-/* Convert a finite long double to canonical form.
- Does not handle +/-Inf properly. */
-static inline void
-ldbl_canonicalize (double *a, double *aa)
-{
- double xh, xl;
-
- xh = *a + *aa;
- xl = (*a - xh) + *aa;
- *a = xh;
- *aa = xl;
-}
-
-/* Simple inline nearbyint (double) function .
- Only works in the default rounding mode
- but is useful in long double rounding functions. */
-static inline double
-ldbl_nearbyint (double a)
-{
- double two52 = 0x10000000000000LL;
-
- if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
- {
- if (__builtin_expect ((a > 0.0), 1))
- {
- a += two52;
- a -= two52;
- }
- else if (__builtin_expect ((a < 0.0), 1))
- {
- a = two52 - a;
- a = -(a - two52);
- }
- }
- return a;
-}
+#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h>
diff --git glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies
deleted file mode 100644
index ff27cdb..0000000
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Implies
+++ /dev/null
@@ -1,4 +0,0 @@
-# Make sure these routines come before ldbl-opt.
-ieee754/ldbl-128ibm
-# These supply the ABI compatibility for when long double was double.
-ieee754/ldbl-opt
--
1.7.11.7

View File

@ -0,0 +1,53 @@
Combination of the following two commits:
From 45045c44fabde9152ab1a0b4ed06419a3621f535 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Thu, 21 Mar 2013 14:15:45 -0300
Subject: [PATCH 20/42] PowerPC: fix sqrtl ABI issue
This patch fixes a sqrtl ABI issue when building for powerpc64.
(cherry picked from commit b5784d95bb94eda59b08aca735406908e209f638)
From dad835a11f370afd2dae4bac554fa64fac5a8c6e Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
Date: Tue, 26 Mar 2013 10:01:57 -0300
Subject: [PATCH 21/42] PowerPC: fix libm ABI issue for llroundl (cherry
picked from commit
fce14d4e9c6e08ad8c825fe88d8cbdac5c739565)
diff -pruN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2012-12-25 08:32:13.000000000 +0530
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2013-08-06 17:45:56.719534470 +0530
@@ -17,6 +17,7 @@
<http://www.gnu.org/licenses/>. */
#include <math.h>
+#include <math_ldbl_opt.h>
/* I think that what this routine is supposed to do is round a value
to the nearest integer, with values exactly on the boundary rounded
@@ -47,3 +48,6 @@ weak_alias (__llround, llround)
strong_alias (__llround, __llroundl)
weak_alias (__llround, llroundl)
#endif
+#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1)
+compat_symbol (libm, __llround, llroundl, GLIBC_2_1);
+#endif
diff -pruN glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c 2012-12-25 08:32:13.000000000 +0530
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/w_sqrt.c 2013-08-06 17:45:53.459534613 +0530
@@ -19,6 +19,7 @@
#include <math.h>
#include <math_private.h>
#include <fenv_libc.h>
+#include <math_ldbl_opt.h>
double
__sqrt (double x) /* wrapper sqrt */
@@ -42,3 +43,6 @@ weak_alias (__sqrt, sqrt)
#ifdef NO_LONG_DOUBLE
strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif

View File

@ -0,0 +1,13 @@
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:59:00.000000000 -0500
@@ -190,6 +190,9 @@
# define ldbl_unpack default_ldbl_unpack
#endif
+/* Extract high double. */
+#define ldbl_high(x) ((double) x)
+
/* Convert a finite long double to canonical form.
Does not handle +/-Inf properly. */
static inline void

View File

@ -0,0 +1,83 @@
# commit 1695c7737655241e1773bdddc93e82c22d8d1584
# Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
# Date: Tue Feb 4 09:48:47 2014 -0200
#
# abilist-pattern configurability
#
# This patch creates implicit rules to match the abifiles if
# abilist-pattern is defined in the architecture Makefile. This allows
# machine specific Makefiles to define different abifiles names
# (for instance *-le.abilist for powerpc64le).
#
diff -urN glibc-2.17-c758a686/Makerules glibc-2.17-c758a686/Makerules
--- glibc-2.17-c758a686/Makerules 2014-06-02 15:29:42.000000000 +0000
+++ glibc-2.17-c758a686/Makerules 2014-06-02 15:25:21.000000000 +0000
@@ -1152,6 +1152,14 @@
LC_ALL=C $(OBJDUMP) --dynamic-syms $< > $@T
mv -f $@T $@
+# A sysdeps/.../Makefile can set abilist-pattern to something like
+# %-foo.abilist to look for libc-foo.abilist instead of libc.abilist.
+# This makes sense if multiple ABIs can be most cleanly supported by a
+# configuration without using separate sysdeps directories for each.
+ifdef abilist-pattern
+vpath $(abilist-pattern) $(+sysdep_dirs)
+endif
+
vpath %.abilist $(+sysdep_dirs)
# The .PRECIOUS rule prevents the files built by an implicit rule whose
@@ -1161,18 +1169,42 @@
.PRECIOUS: %.symlist
generated += $(extra-libs:=.symlist)
+ifdef abilist-pattern
+check-abi-%: $(common-objpfx)config.make $(abilist-pattern) $(objpfx)%.symlist
+ $(check-abi-pattern)
+check-abi-%: $(common-objpfx)config.make $(abilist-pattern) \
+ $(common-objpfx)%.symlist
+ $(check-abi-pattern)
+endif
check-abi-%: $(common-objpfx)config.make %.abilist $(objpfx)%.symlist
$(check-abi)
check-abi-%: $(common-objpfx)config.make %.abilist $(common-objpfx)%.symlist
$(check-abi)
+define check-abi-pattern
+ diff -p -U 0 $(filter $(abilist-pattern),$^) $(filter %.symlist,$^)
+endef
define check-abi
diff -p -U 0 $(filter %.abilist,$^) $(filter %.symlist,$^)
endef
+ifdef abilist-pattern
+update-abi-%: $(objpfx)%.symlist $(abilist-pattern)
+ $(update-abi-pattern)
+update-abi-%: $(common-objpfx)%.symlist $(abilist-pattern)
+ $(update-abi-pattern)
+endif
update-abi-%: $(objpfx)%.symlist %.abilist
$(update-abi)
update-abi-%: $(common-objpfx)%.symlist %.abilist
$(update-abi)
+define update-abi-pattern
+@if cmp -s $^ 2> /dev/null; \
+ then \
+ echo '+++ $(filter $(abilist-pattern),$^) is unchanged'; \
+ else cp -f $^; \
+ echo '*** Now check $(filter $(abilist-pattern),$^) changes for correctness ***'; \
+ fi
+endef
define update-abi
@if cmp -s $^ 2> /dev/null; \
then \
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/Makefile glibc-2.17-c758a686/sysdeps/powerpc/Makefile
--- glibc-2.17-c758a686/sysdeps/powerpc/Makefile 2014-06-02 15:29:42.000000000 +0000
+++ glibc-2.17-c758a686/sysdeps/powerpc/Makefile 2014-06-02 15:25:21.000000000 +0000
@@ -27,3 +27,7 @@
sysdep_headers += sys/platform/ppc.h
tests += test-gettimebase
endif
+
+ifneq (,$(filter %le,$(config-machine)))
+abilist-pattern = %-le.abilist
+endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,676 @@
# commit 9605ca6c085a749f29b6866a3e00bce1ba1a2698
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:12:56 2013 +0930
#
# IBM long double mechanical changes to support little-endian
# http://sourceware.org/ml/libc-alpha/2013-07/msg00001.html
#
# This patch starts the process of supporting powerpc64 little-endian
# long double in glibc. IBM long double is an array of two ieee
# doubles, so making union ibm_extended_long_double reflect this fact is
# the correct way to access fields of the doubles.
#
# * sysdeps/ieee754/ldbl-128ibm/ieee754.h
# (union ibm_extended_long_double): Define as an array of ieee754_double.
# (IBM_EXTENDED_LONG_DOUBLE_BIAS): Delete.
# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c: Update all references
# to ibm_extended_long_double and IBM_EXTENDED_LONG_DOUBLE_BIAS.
# * sysdeps/ieee754/ldbl-128ibm/e_exp10l.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_expl.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/strtold_l.c: Likewise.
# * sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c 2014-05-26 21:08:10.000000000 -0500
@@ -36,9 +36,9 @@
else if (arg > LDBL_MAX_10_EXP + 1)
return LDBL_MAX * LDBL_MAX;
- u.d = arg;
- arg_high = u.dd[0];
- arg_low = u.dd[1];
+ u.ld = arg;
+ arg_high = u.d[0].d;
+ arg_low = u.d[1].d;
exp_high = arg_high * log10_high;
exp_low = arg_high * log10_low + arg_low * M_LN10l;
return __ieee754_expl (exp_high) * __ieee754_expl (exp_low);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_expl.c 2014-05-26 21:08:10.000000000 -0500
@@ -162,39 +162,39 @@
x = x + xl;
/* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]). */
- ex2_u.d = __expl_table[T_EXPL_RES1 + tval1]
- * __expl_table[T_EXPL_RES2 + tval2];
+ ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
+ * __expl_table[T_EXPL_RES2 + tval2]);
n_i = (int)n;
/* 'unsafe' is 1 iff n_1 != 0. */
unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
- ex2_u.ieee.exponent += n_i >> unsafe;
+ ex2_u.d[0].ieee.exponent += n_i >> unsafe;
/* Fortunately, there are no subnormal lowpart doubles in
__expl_table, only normal values and zeros.
But after scaling it can be subnormal. */
- exponent2 = ex2_u.ieee.exponent2 + (n_i >> unsafe);
- if (ex2_u.ieee.exponent2 == 0)
- /* assert ((ex2_u.ieee.mantissa2|ex2_u.ieee.mantissa3) == 0) */;
+ exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe);
+ if (ex2_u.d[1].ieee.exponent == 0)
+ /* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */;
else if (exponent2 > 0)
- ex2_u.ieee.exponent2 = exponent2;
+ ex2_u.d[1].ieee.exponent = exponent2;
else if (exponent2 <= -54)
{
- ex2_u.ieee.exponent2 = 0;
- ex2_u.ieee.mantissa2 = 0;
- ex2_u.ieee.mantissa3 = 0;
+ ex2_u.d[1].ieee.exponent = 0;
+ ex2_u.d[1].ieee.mantissa0 = 0;
+ ex2_u.d[1].ieee.mantissa1 = 0;
}
else
{
static const double
two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
- ex2_u.dd[1] *= two54;
- ex2_u.ieee.exponent2 += n_i >> unsafe;
- ex2_u.dd[1] *= twom54;
+ ex2_u.d[1].d *= two54;
+ ex2_u.d[1].ieee.exponent += n_i >> unsafe;
+ ex2_u.d[1].d *= twom54;
}
/* Compute scale = 2^n_1. */
- scale_u.d = 1.0L;
- scale_u.ieee.exponent += n_i - (n_i >> unsafe);
+ scale_u.ld = 1.0L;
+ scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe);
/* Approximate e^x2 - 1, using a seventh-degree polynomial,
with maximum error in [-2^-16-2^-53,2^-16+2^-53]
@@ -204,7 +204,7 @@
/* Return result. */
fesetenv (&oldenv);
- result = x22 * ex2_u.d + ex2_u.d;
+ result = x22 * ex2_u.ld + ex2_u.ld;
/* Now we can test whether the result is ultimate or if we are unsure.
In the later case we should probably call a mpn based routine to give
@@ -238,7 +238,7 @@
if (!unsafe)
return result;
else
- return result * scale_u.d;
+ return result * scale_u.ld;
}
/* Exceptional cases: */
else if (isless (x, himark))
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-26 21:08:10.000000000 -0500
@@ -180,29 +180,9 @@
union ibm_extended_long_double
{
- long double d;
- double dd[2];
-
- /* This is the IBM extended format long double. */
- struct
- { /* Big endian. There is no other. */
-
- unsigned int negative:1;
- unsigned int exponent:11;
- /* Together Mantissa0-3 comprise the mantissa. */
- unsigned int mantissa0:20;
- unsigned int mantissa1:32;
-
- unsigned int negative2:1;
- unsigned int exponent2:11;
- /* There is an implied 1 here? */
- /* Together these comprise the mantissa. */
- unsigned int mantissa2:20;
- unsigned int mantissa3:32;
- } ieee;
- };
-
-#define IBM_EXTENDED_LONG_DOUBLE_BIAS 0x3ff /* Added to exponent. */
+ long double ld;
+ union ieee754_double d[2];
+ };
__END_DECLS
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-26 21:08:10.000000000 -0500
@@ -36,22 +36,22 @@
union ibm_extended_long_double u;
unsigned long long hi, lo;
int ediff;
- u.d = value;
+ u.ld = value;
- *is_neg = u.ieee.negative;
- *expt = (int) u.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ *is_neg = u.d[0].ieee.negative;
+ *expt = (int) u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
- lo = ((long long) u.ieee.mantissa2 << 32) | u.ieee.mantissa3;
- hi = ((long long) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
+ lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+ hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
/* If the lower double is not a denomal or zero then set the hidden
53rd bit. */
- if (u.ieee.exponent2 > 0)
+ if (u.d[1].ieee.exponent > 0)
{
lo |= 1LL << 52;
/* The lower double is normalized separately from the upper. We may
need to adjust the lower manitissa to reflect this. */
- ediff = u.ieee.exponent - u.ieee.exponent2;
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent;
if (ediff > 53)
lo = lo >> (ediff-53);
}
@@ -59,8 +59,8 @@
difference between the long double and the rounded high double
value. This is indicated by a differnce between the signs of the
high and low doubles. */
- if ((u.ieee.negative != u.ieee.negative2)
- && ((u.ieee.exponent2 != 0) && (lo != 0L)))
+ if ((u.d[0].ieee.negative != u.d[1].ieee.negative)
+ && ((u.d[1].ieee.exponent != 0) && (lo != 0L)))
{
lo = (1ULL << 53) - lo;
if (hi == 0LL)
@@ -92,7 +92,7 @@
#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
- (LDBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
- if (u.ieee.exponent == 0)
+ if (u.d[0].ieee.exponent == 0)
{
/* A biased exponent of zero is a special case.
Either it is a zero or it is a denormal number. */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-26 21:08:10.000000000 -0500
@@ -14,28 +14,28 @@
as bit 53 of the mantissa. */
uint64_t hi, lo;
int ediff;
- union ibm_extended_long_double eldbl;
- eldbl.d = x;
- *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ union ibm_extended_long_double u;
+ u.ld = x;
+ *exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
- lo = ((int64_t)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
- hi = ((int64_t)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
+ lo = ((uint64_t)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+ hi = ((uint64_t)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
/* If the lower double is not a denomal or zero then set the hidden
53rd bit. */
- if (eldbl.ieee.exponent2 > 0x001)
+ if (u.d[1].ieee.exponent > 0x001)
{
lo |= (1ULL << 52);
lo = lo << 7; /* pre-shift lo to match ieee854. */
/* The lower double is normalized separately from the upper. We
may need to adjust the lower manitissa to reflect this. */
- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent;
if (ediff > 53)
lo = lo >> (ediff-53);
hi |= (1ULL << 52);
}
- if ((eldbl.ieee.negative != eldbl.ieee.negative2)
- && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
+ if ((u.d[0].ieee.negative != u.d[1].ieee.negative)
+ && ((u.d[1].ieee.exponent != 0) && (lo != 0LL)))
{
hi--;
lo = (1ULL << 60) - lo;
@@ -58,10 +58,10 @@
unsigned long hidden2, lzcount;
unsigned long long hi, lo;
- u.ieee.negative = sign;
- u.ieee.negative2 = sign;
- u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ u.d[0].ieee.negative = sign;
+ u.d[1].ieee.negative = sign;
+ u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS;
+ u.d[1].ieee.exponent = exp-53 + IEEE754_DOUBLE_BIAS;
/* Expect 113 bits (112 bits + hidden) right justified in two longs.
The low order 53 bits (52 + hidden) go into the lower double */
lo = (lo64 >> 7)& ((1ULL << 53) - 1);
@@ -78,7 +78,7 @@
if (hidden2)
{
hi++;
- u.ieee.negative2 = !sign;
+ u.d[1].ieee.negative = !sign;
lo = (1ULL << 53) - lo;
}
/* The hidden bit of the lo mantissa is zero so we need to
@@ -94,32 +94,32 @@
lzcount = lzcount - 11;
if (lzcount > 0)
{
- int expnt2 = u.ieee.exponent2 - lzcount;
+ int expnt2 = u.d[1].ieee.exponent - lzcount;
if (expnt2 >= 1)
{
/* Not denormal. Normalize and set low exponent. */
lo = lo << lzcount;
- u.ieee.exponent2 = expnt2;
+ u.d[1].ieee.exponent = expnt2;
}
else
{
/* Is denormal. */
lo = lo << (lzcount + expnt2);
- u.ieee.exponent2 = 0;
+ u.d[1].ieee.exponent = 0;
}
}
}
else
{
- u.ieee.negative2 = 0;
- u.ieee.exponent2 = 0;
+ u.d[1].ieee.negative = 0;
+ u.d[1].ieee.exponent = 0;
}
- u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
- u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
- u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
- u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
- return u.d;
+ u.d[1].ieee.mantissa1 = lo & ((1ULL << 32) - 1);
+ u.d[1].ieee.mantissa0 = (lo >> 32) & ((1ULL << 20) - 1);
+ u.d[0].ieee.mantissa1 = hi & ((1ULL << 32) - 1);
+ u.d[0].ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
+ return u.ld;
}
/* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
@@ -128,18 +128,18 @@
default_ldbl_pack (double a, double aa)
{
union ibm_extended_long_double u;
- u.dd[0] = a;
- u.dd[1] = aa;
- return u.d;
+ u.d[0].d = a;
+ u.d[1].d = aa;
+ return u.ld;
}
static inline void
default_ldbl_unpack (long double l, double *a, double *aa)
{
union ibm_extended_long_double u;
- u.d = l;
- *a = u.dd[0];
- *aa = u.dd[1];
+ u.ld = l;
+ *a = u.d[0].d;
+ *aa = u.d[1].d;
}
#ifndef ldbl_pack
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-26 21:08:10.000000000 -0500
@@ -34,11 +34,11 @@
unsigned long long hi, lo;
int exponent2;
- u.ieee.negative = sign;
- u.ieee.negative2 = sign;
- u.ieee.exponent = expt + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- u.ieee.exponent2 = 0;
- exponent2 = expt - 53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ u.d[0].ieee.negative = sign;
+ u.d[1].ieee.negative = sign;
+ u.d[0].ieee.exponent = expt + IEEE754_DOUBLE_BIAS;
+ u.d[1].ieee.exponent = 0;
+ exponent2 = expt - 53 + IEEE754_DOUBLE_BIAS;
#if BITS_PER_MP_LIMB == 32
/* The low order 53 bits (52 + hidden) go into the lower double */
@@ -74,15 +74,15 @@
else
lzcount = lzcount + 42;
- if (lzcount > u.ieee.exponent)
+ if (lzcount > u.d[0].ieee.exponent)
{
- lzcount = u.ieee.exponent;
- u.ieee.exponent = 0;
+ lzcount = u.d[0].ieee.exponent;
+ u.d[0].ieee.exponent = 0;
exponent2 -= lzcount;
}
else
{
- u.ieee.exponent -= (lzcount - 1);
+ u.d[0].ieee.exponent -= (lzcount - 1);
exponent2 -= (lzcount - 1);
}
@@ -112,9 +112,9 @@
{
if ((hi & (1LL << 53)) != 0)
hi -= 1LL << 52;
- u.ieee.exponent++;
+ u.d[0].ieee.exponent++;
}
- u.ieee.negative2 = !sign;
+ u.d[1].ieee.negative = !sign;
lo = (1LL << 53) - lo;
}
@@ -135,17 +135,17 @@
exponent2 = exponent2 - lzcount;
}
if (exponent2 > 0)
- u.ieee.exponent2 = exponent2;
+ u.d[1].ieee.exponent = exponent2;
else
lo >>= 1 - exponent2;
}
else
- u.ieee.negative2 = 0;
+ u.d[1].ieee.negative = 0;
- u.ieee.mantissa3 = lo & 0xffffffffLL;
- u.ieee.mantissa2 = (lo >> 32) & 0xfffff;
- u.ieee.mantissa1 = hi & 0xffffffffLL;
- u.ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1);
+ u.d[1].ieee.mantissa1 = lo & 0xffffffffLL;
+ u.d[1].ieee.mantissa0 = (lo >> 32) & 0xfffff;
+ u.d[0].ieee.mantissa1 = hi & 0xffffffffLL;
+ u.d[0].ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1);
- return u.d;
+ return u.ld;
}
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-26 21:08:10.000000000 -0500
@@ -27,31 +27,31 @@
unsigned long long int num0, num1; \
unsigned long long hi, lo; \
int ediff; \
- union ibm_extended_long_double eldbl; \
- eldbl.d = fpnum.ldbl.d; \
+ union ibm_extended_long_double u; \
+ u.ld = fpnum.ldbl.d; \
\
assert (sizeof (long double) == 16); \
\
- lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; \
- hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; \
+ lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; \
+ hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; \
lo <<= 7; /* pre-shift lo to match ieee854. */ \
/* If the lower double is not a denomal or zero then set the hidden \
53rd bit. */ \
- if (eldbl.ieee.exponent2 != 0) \
+ if (u.d[1].ieee.exponent != 0) \
lo |= (1ULL << (52 + 7)); \
else \
lo <<= 1; \
/* The lower double is normalized separately from the upper. We \
may need to adjust the lower manitissa to reflect this. */ \
- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; \
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; \
if (ediff > 53 + 63) \
lo = 0; \
else if (ediff > 53) \
lo = lo >> (ediff - 53); \
- else if (eldbl.ieee.exponent2 == 0 && ediff < 53) \
+ else if (u.d[1].ieee.exponent == 0 && ediff < 53) \
lo = lo << (53 - ediff); \
- if (eldbl.ieee.negative != eldbl.ieee.negative2 \
- && (eldbl.ieee.exponent2 != 0 || lo != 0L)) \
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative \
+ && (u.d[1].ieee.exponent != 0 || lo != 0L)) \
{ \
lo = (1ULL << 60) - lo; \
if (hi == 0L) \
@@ -59,7 +59,7 @@
/* we have a borrow from the hidden bit, so shift left 1. */ \
hi = 0xffffffffffffeLL | (lo >> 59); \
lo = 0xfffffffffffffffLL & (lo << 1); \
- eldbl.ieee.exponent--; \
+ u.d[0].ieee.exponent--; \
} \
else \
hi--; \
@@ -110,9 +110,9 @@
*--wnumstr = L'0'; \
} \
\
- leading = eldbl.ieee.exponent == 0 ? '0' : '1'; \
+ leading = u.d[0].ieee.exponent == 0 ? '0' : '1'; \
\
- exponent = eldbl.ieee.exponent; \
+ exponent = u.d[0].ieee.exponent; \
\
if (exponent == 0) \
{ \
@@ -122,18 +122,18 @@
{ \
/* This is a denormalized number. */ \
expnegative = 1; \
- exponent = IBM_EXTENDED_LONG_DOUBLE_BIAS - 1; \
+ exponent = IEEE754_DOUBLE_BIAS - 1; \
} \
} \
- else if (exponent >= IBM_EXTENDED_LONG_DOUBLE_BIAS) \
+ else if (exponent >= IEEE754_DOUBLE_BIAS) \
{ \
expnegative = 0; \
- exponent -= IBM_EXTENDED_LONG_DOUBLE_BIAS; \
+ exponent -= IEEE754_DOUBLE_BIAS; \
} \
else \
{ \
expnegative = 1; \
- exponent = -(exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS); \
+ exponent = -(exponent - IEEE754_DOUBLE_BIAS); \
} \
} while (0)
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c 2014-05-26 21:08:10.000000000 -0500
@@ -33,11 +33,11 @@
fenv_t env;
static const long double TWO52 = 4503599627370496.0L;
union ibm_extended_long_double u;
- u.d = x;
+ u.ld = x;
- if (fabs (u.dd[0]) < TWO52)
+ if (fabs (u.d[0].d) < TWO52)
{
- double high = u.dd[0];
+ double high = u.d[0].d;
feholdexcept (&env);
if (high > 0.0)
{
@@ -51,11 +51,11 @@
high += TWO52;
if (high == 0.0) high = -0.0;
}
- u.dd[0] = high;
- u.dd[1] = 0.0;
+ u.d[0].d = high;
+ u.d[1].d = 0.0;
fesetenv (&env);
}
- else if (fabs (u.dd[1]) < TWO52 && u.dd[1] != 0.0)
+ else if (fabs (u.d[1].d) < TWO52 && u.d[1].d != 0.0)
{
double high, low, tau;
/* In this case we have to round the low double and handle any
@@ -64,55 +64,55 @@
may already be rounded and the low double may have the
opposite sign to compensate. */
feholdexcept (&env);
- if (u.dd[0] > 0.0)
+ if (u.d[0].d > 0.0)
{
- if (u.dd[1] > 0.0)
+ if (u.d[1].d > 0.0)
{
/* If the high/low doubles are the same sign then simply
round the low double. */
- high = u.dd[0];
- low = u.dd[1];
+ high = u.d[0].d;
+ low = u.d[1].d;
}
- else if (u.dd[1] < 0.0)
+ else if (u.d[1].d < 0.0)
{
/* Else the high double is pre rounded and we need to
adjust for that. */
- tau = __nextafter (u.dd[0], 0.0);
- tau = (u.dd[0] - tau) * 2.0;
- high = u.dd[0] - tau;
- low = u.dd[1] + tau;
+ tau = __nextafter (u.d[0].d, 0.0);
+ tau = (u.d[0].d - tau) * 2.0;
+ high = u.d[0].d - tau;
+ low = u.d[1].d + tau;
}
low += TWO52;
low -= TWO52;
}
- else if (u.dd[0] < 0.0)
+ else if (u.d[0].d < 0.0)
{
- if (u.dd[1] < 0.0)
+ if (u.d[1].d < 0.0)
{
/* If the high/low doubles are the same sign then simply
round the low double. */
- high = u.dd[0];
- low = u.dd[1];
+ high = u.d[0].d;
+ low = u.d[1].d;
}
- else if (u.dd[1] > 0.0)
+ else if (u.d[1].d > 0.0)
{
/* Else the high double is pre rounded and we need to
adjust for that. */
- tau = __nextafter (u.dd[0], 0.0);
- tau = (u.dd[0] - tau) * 2.0;
- high = u.dd[0] - tau;
- low = u.dd[1] + tau;
+ tau = __nextafter (u.d[0].d, 0.0);
+ tau = (u.d[0].d - tau) * 2.0;
+ high = u.d[0].d - tau;
+ low = u.d[1].d + tau;
}
low = TWO52 - low;
low = -(low - TWO52);
}
- u.dd[0] = high + low;
- u.dd[1] = high - u.dd[0] + low;
+ u.d[0].d = high + low;
+ u.d[1].d = high - u.d[0].d + low;
fesetenv (&env);
}
- return u.d;
+ return u.ld;
}
long_double_symbol (libm, __nearbyintl, nearbyintl);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/strtold_l.c 2014-05-26 21:12:01.000000000 -0500
@@ -43,12 +43,11 @@
#define FLOAT_HUGE_VAL HUGE_VALL
# define SET_MANTISSA(flt, mant) \
do { union ibm_extended_long_double u; \
- u.d = (flt); \
- if ((mant & 0xfffffffffffffULL) == 0) \
- mant = 0x8000000000000ULL; \
- u.ieee.mantissa0 = ((mant) >> 32) & 0xfffff; \
- u.ieee.mantissa1 = (mant) & 0xffffffff; \
- (flt) = u.d; \
+ u.ld = (flt); \
+ u.d[0].ieee_nan.mantissa0 = (mant) >> 32; \
+ u.d[0].ieee_nan.mantissa1 = (mant); \
+ if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0) \
+ (flt) = u.ld; \
} while (0)
#include <strtod_l.c>
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c 2014-05-26 21:08:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c 2014-05-26 21:08:10.000000000 -0500
@@ -89,23 +89,23 @@
double vals[12];
SET_RESTORE_ROUND (FE_TONEAREST);
union ibm_extended_long_double xu, yu;
- xu.d = x;
- yu.d = y;
- if (fabs (xu.dd[1]) < 0x1p-500)
- xu.dd[1] = 0.0;
- if (fabs (yu.dd[1]) < 0x1p-500)
- yu.dd[1] = 0.0;
- mul_split (&vals[1], &vals[0], xu.dd[0], xu.dd[0]);
- mul_split (&vals[3], &vals[2], xu.dd[0], xu.dd[1]);
+ xu.ld = x;
+ yu.ld = y;
+ if (fabs (xu.d[1].d) < 0x1p-500)
+ xu.d[1].d = 0.0;
+ if (fabs (yu.d[1].d) < 0x1p-500)
+ yu.d[1].d = 0.0;
+ mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
+ mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
vals[2] *= 2.0;
vals[3] *= 2.0;
- mul_split (&vals[5], &vals[4], xu.dd[1], xu.dd[1]);
- mul_split (&vals[7], &vals[6], yu.dd[0], yu.dd[0]);
- mul_split (&vals[9], &vals[8], yu.dd[0], yu.dd[1]);
+ mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
+ mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
+ mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
vals[8] *= 2.0;
vals[9] *= 2.0;
- mul_split (&vals[11], &vals[10], yu.dd[1], yu.dd[1]);
- if (xu.dd[0] >= 0.75)
+ mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
+ if (xu.d[0].d >= 0.75)
vals[1] -= 1.0;
else
{

View File

@ -0,0 +1,486 @@
# commit 4cf69995e26e16005d4e3843ad4d18c75cf21a04
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:19:44 2013 +0930
#
# Fix for [BZ #15680] IBM long double inaccuracy
# http://sourceware.org/ml/libc-alpha/2013-06/msg00919.html
#
# I discovered a number of places where denormals and other corner cases
# were being handled wrongly.
#
# - printf_fphex.c: Testing for the low double exponent being zero is
# unnecessary. If the difference in exponents is less than 53 then the
# high double exponent must be nearing the low end of its range, and the
# low double exponent hit rock bottom.
#
# - ldbl2mpn.c: A denormal (ie. exponent of zero) value is treated as
# if the exponent was one, so shift mantissa left by one. Code handling
# normalisation of the low double mantissa lacked a test for shift count
# greater than bits in type being shifted, and lacked anything to handle
# the case where the difference in exponents is less than 53 as in
# printf_fphex.c.
#
# - math_ldbl.h (ldbl_extract_mantissa): Same as above, but worse, with
# code testing for exponent > 1 for some reason, probably a typo for >= 1.
#
# - math_ldbl.h (ldbl_insert_mantissa): Round the high double as per
# mpn2ldbl.c (hi is odd or explicit mantissas non-zero) so that the
# number we return won't change when applying ldbl_canonicalize().
# Add missing overflow checks and normalisation of high mantissa.
# Correct misleading comment: "The hidden bit of the lo mantissa is
# zero" is not always true as can be seen from the code rounding the hi
# mantissa. Also by inspection, lzcount can never be less than zero so
# remove that test. Lastly, masking bitfields to their widths can be
# left to the compiler.
#
# - mpn2ldbl.c: The overflow checks here on rounding of high double were
# just plain wrong. Incrementing the exponent must be accompanied by a
# shift right of the mantissa to keep the value unchanged. Above notes
# for ldbl_insert_mantissa are also relevant.
#
# [BZ #15680]
# * sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c: Comment fix.
# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
# (PRINT_FPHEX_LONG_DOUBLE): Tidy code by moving -53 into ediff
# calculation. Remove unnecessary test for denormal exponent.
# * sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c (__mpn_extract_long_double):
# Correct handling of denormals. Avoid undefined shift behaviour.
# Correct normalisation of low mantissa when low double is denormal.
# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
# (ldbl_extract_mantissa): Likewise. Comment. Use uint64_t* for hi64.
# (ldbl_insert_mantissa): Make both hi64 and lo64 parms uint64_t.
# Correct normalisation of low mantissa. Test for overflow of high
# mantissa and normalise.
# (ldbl_nearbyint): Use more readable constant for two52.
# * sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
# (__mpn_construct_long_double): Fix test for overflow of high
# mantissa and correct normalisation. Avoid undefined shift.
#
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:13:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:14:45.000000000 -0500
@@ -243,7 +243,7 @@
We split the 113 bits of the mantissa into 5 24bit integers
stored in a double array. */
/* Make the IBM extended format 105 bit mantissa look like the ieee854 112
- bit mantissa so the next operatation will give the correct result. */
+ bit mantissa so the next operation will give the correct result. */
ldbl_extract_mantissa (&ixd, &lxd, &exp, x);
exp = exp - 23;
/* This is faster than doing this in floating point, because we
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-27 19:13:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c 2014-05-27 19:14:45.000000000 -0500
@@ -36,6 +36,7 @@
union ibm_extended_long_double u;
unsigned long long hi, lo;
int ediff;
+
u.ld = value;
*is_neg = u.d[0].ieee.negative;
@@ -43,27 +44,36 @@
lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
- /* If the lower double is not a denomal or zero then set the hidden
+
+ /* If the lower double is not a denormal or zero then set the hidden
53rd bit. */
- if (u.d[1].ieee.exponent > 0)
- {
- lo |= 1LL << 52;
+ if (u.d[1].ieee.exponent != 0)
+ lo |= 1ULL << 52;
+ else
+ lo = lo << 1;
- /* The lower double is normalized separately from the upper. We may
- need to adjust the lower manitissa to reflect this. */
- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent;
- if (ediff > 53)
- lo = lo >> (ediff-53);
+ /* The lower double is normalized separately from the upper. We may
+ need to adjust the lower manitissa to reflect this. */
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+ if (ediff > 0)
+ {
+ if (ediff < 64)
+ lo = lo >> ediff;
+ else
+ lo = 0;
}
+ else if (ediff < 0)
+ lo = lo << -ediff;
+
/* The high double may be rounded and the low double reflects the
difference between the long double and the rounded high double
value. This is indicated by a differnce between the signs of the
high and low doubles. */
- if ((u.d[0].ieee.negative != u.d[1].ieee.negative)
- && ((u.d[1].ieee.exponent != 0) && (lo != 0L)))
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative
+ && lo != 0)
{
lo = (1ULL << 53) - lo;
- if (hi == 0LL)
+ if (hi == 0)
{
/* we have a borrow from the hidden bit, so shift left 1. */
hi = 0x0ffffffffffffeLL | (lo >> 51);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:13:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 19:51:13.000000000 -0500
@@ -13,77 +13,118 @@
the number before the decimal point and the second implicit bit
as bit 53 of the mantissa. */
uint64_t hi, lo;
- int ediff;
union ibm_extended_long_double u;
+
u.ld = x;
*exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
lo = ((uint64_t)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
hi = ((uint64_t)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
- /* If the lower double is not a denomal or zero then set the hidden
- 53rd bit. */
- if (u.d[1].ieee.exponent > 0x001)
- {
- lo |= (1ULL << 52);
- lo = lo << 7; /* pre-shift lo to match ieee854. */
- /* The lower double is normalized separately from the upper. We
- may need to adjust the lower manitissa to reflect this. */
- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent;
- if (ediff > 53)
- lo = lo >> (ediff-53);
- hi |= (1ULL << 52);
- }
- if ((u.d[0].ieee.negative != u.d[1].ieee.negative)
- && ((u.d[1].ieee.exponent != 0) && (lo != 0LL)))
+ if (u.d[0].ieee.exponent != 0)
{
- hi--;
- lo = (1ULL << 60) - lo;
- if (hi < (1ULL << 52))
+ int ediff;
+
+ /* If not a denormal or zero then we have an implicit 53rd bit. */
+ hi |= (uint64_t) 1 << 52;
+
+ if (u.d[1].ieee.exponent != 0)
+ lo |= (uint64_t) 1 << 52;
+ else
+ /* A denormal is to be interpreted as having a biased exponent
+ of 1. */
+ lo = lo << 1;
+
+ /* We are going to shift 4 bits out of hi later, because we only
+ want 48 bits in *hi64. That means we want 60 bits in lo, but
+ we currently only have 53. Shift the value up. */
+ lo = lo << 7;
+
+ /* The lower double is normalized separately from the upper.
+ We may need to adjust the lower mantissa to reflect this.
+ The difference between the exponents can be larger than 53
+ when the low double is much less than 1ULP of the upper
+ (in which case there are significant bits, all 0's or all
+ 1's, between the two significands). The difference between
+ the exponents can be less than 53 when the upper double
+ exponent is nearing its minimum value (in which case the low
+ double is denormal ie. has an exponent of zero). */
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+ if (ediff > 0)
{
- /* we have a borrow from the hidden bit, so shift left 1. */
- hi = (hi << 1) | (lo >> 59);
- lo = 0xfffffffffffffffLL & (lo << 1);
- *exp = *exp - 1;
+ if (ediff < 64)
+ lo = lo >> ediff;
+ else
+ lo = 0;
+ }
+ else if (ediff < 0)
+ lo = lo << -ediff;
+
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative
+ && lo != 0)
+ {
+ hi--;
+ lo = ((uint64_t) 1 << 60) - lo;
+ if (hi < (uint64_t) 1 << 52)
+ {
+ /* We have a borrow from the hidden bit, so shift left 1. */
+ hi = (hi << 1) | (lo >> 59);
+ lo = (((uint64_t) 1 << 60) - 1) & (lo << 1);
+ *exp = *exp - 1;
+ }
}
}
+ else
+ /* If the larger magnitude double is denormal then the smaller
+ one must be zero. */
+ hi = hi << 1;
+
*lo64 = (hi << 60) | lo;
*hi64 = hi >> 4;
}
static inline long double
-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
+ldbl_insert_mantissa (int sign, int exp, int64_t hi64, uint64_t lo64)
{
union ibm_extended_long_double u;
- unsigned long hidden2, lzcount;
- unsigned long long hi, lo;
+ int expnt2;
+ uint64_t hi, lo;
u.d[0].ieee.negative = sign;
u.d[1].ieee.negative = sign;
u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS;
- u.d[1].ieee.exponent = exp-53 + IEEE754_DOUBLE_BIAS;
+ u.d[1].ieee.exponent = 0;
+ expnt2 = exp - 53 + IEEE754_DOUBLE_BIAS;
+
/* Expect 113 bits (112 bits + hidden) right justified in two longs.
The low order 53 bits (52 + hidden) go into the lower double */
- lo = (lo64 >> 7)& ((1ULL << 53) - 1);
- hidden2 = (lo64 >> 59) & 1ULL;
+ lo = (lo64 >> 7) & (((uint64_t) 1 << 53) - 1);
/* The high order 53 bits (52 + hidden) go into the upper double */
- hi = (lo64 >> 60) & ((1ULL << 11) - 1);
- hi |= (hi64 << 4);
+ hi = lo64 >> 60;
+ hi |= hi64 << 4;
- if (lo != 0LL)
+ if (lo != 0)
{
- /* hidden2 bit of low double controls rounding of the high double.
- If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
+ int lzcount;
+
+ /* hidden bit of low double controls rounding of the high double.
+ If hidden is '1' and either the explicit mantissa is non-zero
+ or hi is odd, then round up hi and adjust lo (2nd mantissa)
plus change the sign of the low double to compensate. */
- if (hidden2)
+ if ((lo & ((uint64_t) 1 << 52)) != 0
+ && ((hi & 1) != 0 || (lo & (((uint64_t) 1 << 52) - 1)) != 0))
{
hi++;
+ if ((hi & ((uint64_t) 1 << 53)) != 0)
+ {
+ hi = hi >> 1;
+ u.d[0].ieee.exponent++;
+ }
u.d[1].ieee.negative = !sign;
- lo = (1ULL << 53) - lo;
+ lo = ((uint64_t) 1 << 53) - lo;
}
- /* The hidden bit of the lo mantissa is zero so we need to
- normalize the it for the low double. Shift it left until the
- hidden bit is '1' then adjust the 2nd exponent accordingly. */
+ /* Normalize the low double. Shift the mantissa left until
+ the hidden bit is '1' and adjust the exponent accordingly. */
if (sizeof (lo) == sizeof (long))
lzcount = __builtin_clzl (lo);
@@ -91,34 +132,30 @@
lzcount = __builtin_clzl ((long) (lo >> 32));
else
lzcount = __builtin_clzl ((long) lo) + 32;
- lzcount = lzcount - 11;
- if (lzcount > 0)
+ lzcount = lzcount - (64 - 53);
+ lo <<= lzcount;
+ expnt2 -= lzcount;
+
+ if (expnt2 >= 1)
+ /* Not denormal. */
+ u.d[1].ieee.exponent = expnt2;
+ else
{
- int expnt2 = u.d[1].ieee.exponent - lzcount;
- if (expnt2 >= 1)
- {
- /* Not denormal. Normalize and set low exponent. */
- lo = lo << lzcount;
- u.d[1].ieee.exponent = expnt2;
- }
+ /* Is denormal. Note that biased exponent of 0 is treated
+ as if it was 1, hence the extra shift. */
+ if (expnt2 > -53)
+ lo >>= 1 - expnt2;
else
- {
- /* Is denormal. */
- lo = lo << (lzcount + expnt2);
- u.d[1].ieee.exponent = 0;
- }
+ lo = 0;
}
}
else
- {
- u.d[1].ieee.negative = 0;
- u.d[1].ieee.exponent = 0;
- }
+ u.d[1].ieee.negative = 0;
- u.d[1].ieee.mantissa1 = lo & ((1ULL << 32) - 1);
- u.d[1].ieee.mantissa0 = (lo >> 32) & ((1ULL << 20) - 1);
- u.d[0].ieee.mantissa1 = hi & ((1ULL << 32) - 1);
- u.d[0].ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
+ u.d[1].ieee.mantissa1 = lo;
+ u.d[1].ieee.mantissa0 = lo >> 32;
+ u.d[0].ieee.mantissa1 = hi;
+ u.d[0].ieee.mantissa0 = hi >> 32;
return u.ld;
}
@@ -133,6 +170,10 @@
return u.ld;
}
+/* To suit our callers we return *hi64 and *lo64 as if they came from
+ an ieee854 112 bit mantissa, that is, 48 bits in *hi64 (plus one
+ implicit bit) and 64 bits in *lo64. */
+
static inline void
default_ldbl_unpack (long double l, double *a, double *aa)
{
@@ -162,13 +203,13 @@
*aa = xl;
}
-/* Simple inline nearbyint (double) function .
+/* Simple inline nearbyint (double) function.
Only works in the default rounding mode
but is useful in long double rounding functions. */
static inline double
ldbl_nearbyint (double a)
{
- double two52 = 0x10000000000000LL;
+ double two52 = 0x1p52;
if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
{
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-27 19:13:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c 2014-05-27 19:14:45.000000000 -0500
@@ -70,9 +70,9 @@
else
lzcount = __builtin_clzl ((long) val) + 32;
if (hi)
- lzcount = lzcount - 11;
+ lzcount = lzcount - (64 - 53);
else
- lzcount = lzcount + 42;
+ lzcount = lzcount + 53 - (64 - 53);
if (lzcount > u.d[0].ieee.exponent)
{
@@ -98,29 +98,27 @@
}
}
- if (lo != 0L)
+ if (lo != 0)
{
- /* hidden2 bit of low double controls rounding of the high double.
- If hidden2 is '1' and either the explicit mantissa is non-zero
+ /* hidden bit of low double controls rounding of the high double.
+ If hidden is '1' and either the explicit mantissa is non-zero
or hi is odd, then round up hi and adjust lo (2nd mantissa)
plus change the sign of the low double to compensate. */
if ((lo & (1LL << 52)) != 0
- && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1))))
+ && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1)) != 0))
{
hi++;
- if ((hi & ((1LL << 52) - 1)) == 0)
+ if ((hi & (1LL << 53)) != 0)
{
- if ((hi & (1LL << 53)) != 0)
- hi -= 1LL << 52;
+ hi >>= 1;
u.d[0].ieee.exponent++;
}
u.d[1].ieee.negative = !sign;
lo = (1LL << 53) - lo;
}
- /* The hidden bit of the lo mantissa is zero so we need to normalize
- it for the low double. Shift it left until the hidden bit is '1'
- then adjust the 2nd exponent accordingly. */
+ /* Normalize the low double. Shift the mantissa left until
+ the hidden bit is '1' and adjust the exponent accordingly. */
if (sizeof (lo) == sizeof (long))
lzcount = __builtin_clzl (lo);
@@ -128,24 +126,24 @@
lzcount = __builtin_clzl ((long) (lo >> 32));
else
lzcount = __builtin_clzl ((long) lo) + 32;
- lzcount = lzcount - 11;
- if (lzcount > 0)
- {
- lo = lo << lzcount;
- exponent2 = exponent2 - lzcount;
- }
+ lzcount = lzcount - (64 - 53);
+ lo <<= lzcount;
+ exponent2 -= lzcount;
+
if (exponent2 > 0)
u.d[1].ieee.exponent = exponent2;
- else
+ else if (exponent2 > -53)
lo >>= 1 - exponent2;
+ else
+ lo = 0;
}
else
u.d[1].ieee.negative = 0;
- u.d[1].ieee.mantissa1 = lo & 0xffffffffLL;
- u.d[1].ieee.mantissa0 = (lo >> 32) & 0xfffff;
- u.d[0].ieee.mantissa1 = hi & 0xffffffffLL;
- u.d[0].ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1);
+ u.d[1].ieee.mantissa1 = lo;
+ u.d[1].ieee.mantissa0 = lo >> 32;
+ u.d[0].ieee.mantissa1 = hi;
+ u.d[0].ieee.mantissa0 = hi >> 32;
return u.ld;
}
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:13:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:14:45.000000000 -0500
@@ -43,15 +43,15 @@
lo <<= 1; \
/* The lower double is normalized separately from the upper. We \
may need to adjust the lower manitissa to reflect this. */ \
- ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent; \
- if (ediff > 53 + 63) \
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53; \
+ if (ediff > 63) \
lo = 0; \
- else if (ediff > 53) \
- lo = lo >> (ediff - 53); \
- else if (u.d[1].ieee.exponent == 0 && ediff < 53) \
- lo = lo << (53 - ediff); \
+ else if (ediff > 0) \
+ lo = lo >> ediff; \
+ else if (ediff < 0) \
+ lo = lo << -ediff; \
if (u.d[0].ieee.negative != u.d[1].ieee.negative \
- && (u.d[1].ieee.exponent != 0 || lo != 0L)) \
+ && lo != 0) \
{ \
lo = (1ULL << 60) - lo; \
if (hi == 0L) \

View File

@ -0,0 +1,652 @@
# commit 1b6adf888de14675bc3207578dcb7132ed5f8ecc
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:21:58 2013 +0930
#
# PowerPC floating point little-endian [1 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00081.html
#
# This is the first of a series of patches to ban ieee854_long_double
# and the ieee854_long_double macros when using IBM long double. union
# ieee854_long_double just isn't correct for IBM long double, especially
# when little-endian, and pretending it is OK has allowed a number of
# bugs to remain undetected in sysdeps/ieee754/ldbl-128ibm/.
#
# This changes the few places in generic code that use it.
#
# * stdio-common/printf_size.c (__printf_size): Don't use
# union ieee854_long_double in fpnum union.
# * stdio-common/printf_fphex.c (__printf_fphex): Likewise. Use
# signbit macro to retrieve sign from long double.
# * stdio-common/printf_fp.c (___printf_fp): Use signbit macro to
# retrieve sign from long double.
# * sysdeps/ieee754/ldbl-128ibm/printf_fphex.c: Adjust for fpnum change.
# * sysdeps/ieee754/ldbl-128/printf_fphex.c: Likewise.
# * sysdeps/ieee754/ldbl-96/printf_fphex.c: Likewise.
# * sysdeps/x86_64/fpu/printf_fphex.c: Likewise.
# * math/test-misc.c (main): Don't use union ieee854_long_double.
# ports/
# * sysdeps/ia64/fpu/printf_fphex.c: Adjust for fpnum change.
#
diff -urN glibc-2.17-c758a686/math/test-misc.c glibc-2.17-c758a686/math/test-misc.c
--- glibc-2.17-c758a686/math/test-misc.c 2014-05-27 19:53:22.000000000 -0500
+++ glibc-2.17-c758a686/math/test-misc.c 2014-05-27 19:53:45.000000000 -0500
@@ -721,300 +721,161 @@
#ifndef NO_LONG_DOUBLE
{
- union ieee854_long_double v1;
- union ieee854_long_double v2;
- long double ld;
+ long double v1, v2;
- v1.d = ld = LDBL_MIN;
- if (fpclassify (ld) != FP_NORMAL)
+ v1 = LDBL_MIN;
+ if (fpclassify (v1) != FP_NORMAL)
{
- printf ("fpclassify (LDBL_MIN) failed: %d\n", fpclassify (ld));
+ printf ("fpclassify (LDBL_MIN) failed: %d (%La)\n",
+ fpclassify (v1), v1);
result = 1;
}
- ld = nextafterl (ld, LDBL_MIN / 2.0);
- if (fpclassify (ld) != FP_SUBNORMAL)
+ v2 = nextafterl (v1, LDBL_MIN / 2.0);
+ if (fpclassify (v2) != FP_SUBNORMAL)
{
printf ("fpclassify (LDBL_MIN-epsilon) failed: %d (%La)\n",
- fpclassify (ld), ld);
+ fpclassify (v2), v2);
result = 1;
}
- v2.d = ld = nextafterl (ld, LDBL_MIN);
- if (fpclassify (ld) != FP_NORMAL)
+ v2 = nextafterl (v2, LDBL_MIN);
+ if (fpclassify (v2) != FP_NORMAL)
{
printf ("fpclassify (LDBL_MIN-epsilon+epsilon) failed: %d (%La)\n",
- fpclassify (ld), ld);
+ fpclassify (v2), v2);
result = 1;
}
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
+ if (v1 != v2)
{
- printf ("LDBL_MIN: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("LDBL_MIN: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
- {
- printf ("LDBL_MIN: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
- {
- printf ("LDBL_MIN: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("LDBL_MIN-epsilon+epsilon != LDBL_MIN: %La vs %La\n", v2, v1);
result = 1;
}
- v1.d = ld = -LDBL_MIN;
- if (fpclassify (ld) != FP_NORMAL)
+ v1 = -LDBL_MIN;
+ if (fpclassify (v1) != FP_NORMAL)
{
- printf ("fpclassify (-LDBL_MIN) failed: %d\n", fpclassify (ld));
+ printf ("fpclassify (-LDBL_MIN) failed: %d (%La)\n",
+ fpclassify (v1), v1);
result = 1;
}
- ld = nextafterl (ld, -LDBL_MIN / 2.0);
- if (fpclassify (ld) != FP_SUBNORMAL)
+ v2 = nextafterl (v1, -LDBL_MIN / 2.0);
+ if (fpclassify (v2) != FP_SUBNORMAL)
{
printf ("fpclassify (-LDBL_MIN-epsilon) failed: %d (%La)\n",
- fpclassify (ld), ld);
+ fpclassify (v2), v2);
result = 1;
}
- v2.d = ld = nextafterl (ld, -LDBL_MIN);
- if (fpclassify (ld) != FP_NORMAL)
+ v2 = nextafterl (v2, -LDBL_MIN);
+ if (fpclassify (v2) != FP_NORMAL)
{
printf ("fpclassify (-LDBL_MIN-epsilon+epsilon) failed: %d (%La)\n",
- fpclassify (ld), ld);
+ fpclassify (v2), v2);
result = 1;
}
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
+ if (v1 != v2)
{
- printf ("-LDBL_MIN: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("-LDBL_MIN: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
- {
- printf ("-LDBL_MIN: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
- {
- printf ("-LDBL_MIN: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("-LDBL_MIN-epsilon+epsilon != -LDBL_MIN: %La vs %La\n", v2, v1);
result = 1;
}
- ld = LDBL_MAX;
- if (fpclassify (ld) != FP_NORMAL)
+ v1 = LDBL_MAX;
+ if (fpclassify (v1) != FP_NORMAL)
{
- printf ("fpclassify (LDBL_MAX) failed: %d\n", fpclassify (ld));
+ printf ("fpclassify (LDBL_MAX) failed: %d (%La)\n",
+ fpclassify (v1), v1);
result = 1;
}
- ld = nextafterl (ld, INFINITY);
- if (fpclassify (ld) != FP_INFINITE)
+ v2 = nextafterl (v1, INFINITY);
+ if (fpclassify (v2) != FP_INFINITE)
{
- printf ("fpclassify (LDBL_MAX+epsilon) failed: %d\n", fpclassify (ld));
+ printf ("fpclassify (LDBL_MAX+epsilon) failed: %d (%La)\n",
+ fpclassify (v2), v2);
result = 1;
}
- ld = -LDBL_MAX;
- if (fpclassify (ld) != FP_NORMAL)
+ v1 = -LDBL_MAX;
+ if (fpclassify (v1) != FP_NORMAL)
{
- printf ("fpclassify (-LDBL_MAX) failed: %d\n", fpclassify (ld));
+ printf ("fpclassify (-LDBL_MAX) failed: %d (%La)\n",
+ fpclassify (v1), v1);
result = 1;
}
- ld = nextafterl (ld, -INFINITY);
- if (fpclassify (ld) != FP_INFINITE)
+ v2 = nextafterl (v1, -INFINITY);
+ if (fpclassify (v2) != FP_INFINITE)
{
- printf ("fpclassify (-LDBL_MAX-epsilon) failed: %d\n",
- fpclassify (ld));
+ printf ("fpclassify (-LDBL_MAX-epsilon) failed: %d (%La)\n",
+ fpclassify (v2), v2);
result = 1;
}
- v1.d = ld = 0.0625;
- ld = nextafterl (ld, 0.0);
- v2.d = ld = nextafterl (ld, 1.0);
+ v1 = 0.0625;
+ v2 = nextafterl (v1, 0.0);
+ v2 = nextafterl (v2, 1.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("0.0625L down: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("0.0625L down: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
- {
- printf ("0.0625L down: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
+ if (v1 != v2)
{
- printf ("0.0625L down: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("0.0625L-epsilon+epsilon != 0.0625L: %La vs %La\n", v2, v1);
result = 1;
}
- v1.d = ld = 0.0625;
- ld = nextafterl (ld, 1.0);
- v2.d = ld = nextafterl (ld, 0.0);
+ v1 = 0.0625;
+ v2 = nextafterl (v1, 1.0);
+ v2 = nextafterl (v2, 0.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("0.0625L up: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("0.0625L up: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
+ if (v1 != v2)
{
- printf ("0.0625L up: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
- {
- printf ("0.0625L up: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("0.0625L+epsilon-epsilon != 0.0625L: %La vs %La\n", v2, v1);
result = 1;
}
- v1.d = ld = -0.0625;
- ld = nextafterl (ld, 0.0);
- v2.d = ld = nextafterl (ld, -1.0);
+ v1 = -0.0625;
+ v2 = nextafterl (v1, 0.0);
+ v2 = nextafterl (v2, -1.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("-0.0625L up: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("-0.0625L up: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
+ if (v1 != v2)
{
- printf ("-0.0625L up: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
- {
- printf ("-0.0625L up: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("-0.0625L+epsilon-epsilon != -0.0625L: %La vs %La\n", v2, v1);
result = 1;
}
- v1.d = ld = -0.0625;
- ld = nextafterl (ld, -1.0);
- v2.d = ld = nextafterl (ld, 0.0);
+ v1 = -0.0625;
+ v2 = nextafterl (v1, -1.0);
+ v2 = nextafterl (v2, 0.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("-0.0625L down: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
+ if (v1 != v2)
{
- printf ("-0.0625L down: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
- {
- printf ("-0.0625L down: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
- result = 1;
- }
- if (v1.ieee.negative != v2.ieee.negative)
- {
- printf ("-0.0625L down: negative differs: %d vs %d\n",
- v1.ieee.negative, v2.ieee.negative);
+ printf ("-0.0625L-epsilon+epsilon != -0.0625L: %La vs %La\n", v2, v1);
result = 1;
}
- v1.d = ld = 0.0;
- ld = nextafterl (ld, 1.0);
- v2.d = nextafterl (ld, -1.0);
+ v1 = 0.0;
+ v2 = nextafterl (v1, 1.0);
+ v2 = nextafterl (v2, -1.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("0.0L up: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("0.0L up: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
+ if (v1 != v2)
{
- printf ("0.0L up: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
+ printf ("0.0+epsilon-epsilon != 0.0L: %La vs %La\n", v2, v1);
result = 1;
}
- if (0 != v2.ieee.negative)
+ if (signbit (v2))
{
- printf ("0.0L up: negative differs: 0 vs %d\n",
- v2.ieee.negative);
+ printf ("0.0+epsilon-epsilon is negative\n");
result = 1;
}
- v1.d = ld = 0.0;
- ld = nextafterl (ld, -1.0);
- v2.d = nextafterl (ld, 1.0);
+ v1 = 0.0;
+ v2 = nextafterl (v1, -1.0);
+ v2 = nextafterl (v2, 1.0);
- if (v1.ieee.mantissa0 != v2.ieee.mantissa0)
- {
- printf ("0.0L down: mantissa0 differs: %8x vs %8x\n",
- v1.ieee.mantissa0, v2.ieee.mantissa0);
- result = 1;
- }
- if (v1.ieee.mantissa1 != v2.ieee.mantissa1)
- {
- printf ("0.0L down: mantissa1 differs: %8x vs %8x\n",
- v1.ieee.mantissa1, v2.ieee.mantissa1);
- result = 1;
- }
- if (v1.ieee.exponent != v2.ieee.exponent)
+ if (v1 != v2)
{
- printf ("0.0L down: exponent differs: %4x vs %4x\n",
- v1.ieee.exponent, v2.ieee.exponent);
+ printf ("0.0-epsilon+epsilon != 0.0L: %La vs %La\n", v2, v1);
result = 1;
}
- if (1 != v2.ieee.negative)
+ if (!signbit (v2))
{
- printf ("0.0L down: negative differs: 1 vs %d\n",
- v2.ieee.negative);
+ printf ("0.0-epsilon+epsilon is positive\n");
result = 1;
}
diff -urN glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c
--- glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c 2014-05-27 19:53:21.000000000 -0500
+++ glibc-2.17-c758a686/ports/sysdeps/ia64/fpu/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -25,9 +25,11 @@
/* The "strange" 80 bit format on ia64 has an explicit \
leading digit in the 64 bit mantissa. */ \
unsigned long long int num; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
+ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
\
zero_mantissa = num == 0; \
\
@@ -49,8 +51,8 @@
\
/* We have 3 bits from the mantissa in the leading nibble. \
Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \
- exponent = fpnum.ldbl.ieee.exponent; \
- \
+ exponent = u.ieee.exponent; \
+ \
if (exponent == 0) \
{ \
if (zero_mantissa) \
diff -urN glibc-2.17-c758a686/stdio-common/printf_fp.c glibc-2.17-c758a686/stdio-common/printf_fp.c
--- glibc-2.17-c758a686/stdio-common/printf_fp.c 2014-05-27 19:53:22.000000000 -0500
+++ glibc-2.17-c758a686/stdio-common/printf_fp.c 2014-05-27 19:53:45.000000000 -0500
@@ -335,8 +335,7 @@
int res;
if (__isnanl (fpnum.ldbl))
{
- union ieee854_long_double u = { .d = fpnum.ldbl };
- is_neg = u.ieee.negative != 0;
+ is_neg = signbit (fpnum.ldbl);
if (isupper (info->spec))
{
special = "NAN";
diff -urN glibc-2.17-c758a686/stdio-common/printf_fphex.c glibc-2.17-c758a686/stdio-common/printf_fphex.c
--- glibc-2.17-c758a686/stdio-common/printf_fphex.c 2014-05-27 19:53:22.000000000 -0500
+++ glibc-2.17-c758a686/stdio-common/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -93,7 +93,7 @@
union
{
union ieee754_double dbl;
- union ieee854_long_double ldbl;
+ long double ldbl;
}
fpnum;
@@ -162,12 +162,11 @@
#ifndef __NO_LONG_DOUBLE_MATH
if (info->is_long_double && sizeof (long double) > sizeof (double))
{
- fpnum.ldbl.d = *(const long double *) args[0];
+ fpnum.ldbl = *(const long double *) args[0];
/* Check for special values: not a number or infinity. */
- if (__isnanl (fpnum.ldbl.d))
+ if (__isnanl (fpnum.ldbl))
{
- negative = fpnum.ldbl.ieee.negative != 0;
if (isupper (info->spec))
{
special = "NAN";
@@ -181,8 +180,7 @@
}
else
{
- int res = __isinfl (fpnum.ldbl.d);
- if (res)
+ if (__isinfl (fpnum.ldbl))
{
if (isupper (info->spec))
{
@@ -194,11 +192,9 @@
special = "inf";
wspecial = L"inf";
}
- negative = res < 0;
}
- else
- negative = signbit (fpnum.ldbl.d);
}
+ negative = signbit (fpnum.ldbl);
}
else
#endif /* no long double */
diff -urN glibc-2.17-c758a686/stdio-common/printf_size.c glibc-2.17-c758a686/stdio-common/printf_size.c
--- glibc-2.17-c758a686/stdio-common/printf_size.c 2014-05-27 19:53:22.000000000 -0500
+++ glibc-2.17-c758a686/stdio-common/printf_size.c 2014-05-27 19:53:45.000000000 -0500
@@ -103,7 +103,7 @@
union
{
union ieee754_double dbl;
- union ieee854_long_double ldbl;
+ long double ldbl;
}
fpnum;
const void *ptr = &fpnum;
@@ -123,25 +123,25 @@
#ifndef __NO_LONG_DOUBLE_MATH
if (info->is_long_double && sizeof (long double) > sizeof (double))
{
- fpnum.ldbl.d = *(const long double *) args[0];
+ fpnum.ldbl = *(const long double *) args[0];
/* Check for special values: not a number or infinity. */
- if (__isnanl (fpnum.ldbl.d))
+ if (__isnanl (fpnum.ldbl))
{
special = "nan";
wspecial = L"nan";
// fpnum_sign = 0; Already zero
}
- else if ((res = __isinfl (fpnum.ldbl.d)))
+ else if ((res = __isinfl (fpnum.ldbl)))
{
fpnum_sign = res;
special = "inf";
wspecial = L"inf";
}
else
- while (fpnum.ldbl.d >= divisor && tag[1] != '\0')
+ while (fpnum.ldbl >= divisor && tag[1] != '\0')
{
- fpnum.ldbl.d /= divisor;
+ fpnum.ldbl /= divisor;
++tag;
}
}
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -24,13 +24,15 @@
digits we use only the implicit digits for the number before \
the decimal point. */ \
unsigned long long int num0, num1; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
assert (sizeof (long double) == 16); \
\
- num0 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
- num1 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa2) << 32 \
- | fpnum.ldbl.ieee.mantissa3); \
+ num0 = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
+ num1 = (((unsigned long long int) u.ieee.mantissa2) << 32 \
+ | u.ieee.mantissa3); \
\
zero_mantissa = (num0|num1) == 0; \
\
@@ -75,9 +77,9 @@
*--wnumstr = L'0'; \
} \
\
- leading = fpnum.ldbl.ieee.exponent == 0 ? '0' : '1'; \
+ leading = u.ieee.exponent == 0 ? '0' : '1'; \
\
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -28,14 +28,14 @@
unsigned long long hi, lo; \
int ediff; \
union ibm_extended_long_double u; \
- u.ld = fpnum.ldbl.d; \
+ u.ld = fpnum.ldbl; \
\
assert (sizeof (long double) == 16); \
\
lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; \
hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; \
lo <<= 7; /* pre-shift lo to match ieee854. */ \
- /* If the lower double is not a denomal or zero then set the hidden \
+ /* If the lower double is not a denormal or zero then set the hidden \
53rd bit. */ \
if (u.d[1].ieee.exponent != 0) \
lo |= (1ULL << (52 + 7)); \
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-96/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -25,11 +25,13 @@
/* The "strange" 80 bit format on ix86 and m68k has an explicit \
leading digit in the 64 bit mantissa. */ \
unsigned long long int num; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
assert (sizeof (long double) == 12); \
\
- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
+ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
\
zero_mantissa = num == 0; \
\
@@ -62,7 +64,7 @@
\
/* We have 3 bits from the mantissa in the leading nibble. \
Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \
diff -urN glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c
--- glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c 2014-05-27 19:53:20.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/x86_64/fpu/printf_fphex.c 2014-05-27 19:53:45.000000000 -0500
@@ -25,10 +25,11 @@
/* The "strange" 80 bit format on ix86 and m68k has an explicit \
leading digit in the 64 bit mantissa. */ \
unsigned long long int num; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
- \
- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
+ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
\
zero_mantissa = num == 0; \
\
@@ -61,7 +62,7 @@
\
/* We have 3 bits from the mantissa in the leading nibble. \
Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \

View File

@ -0,0 +1,651 @@
# commit 4ebd120cd983c8d2ac7a234884b3ac6805d82973
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:24:05 2013 +0930
#
# PowerPC floating point little-endian [2 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00082.html
#
# This patch replaces occurrences of GET_LDOUBLE_* and SET_LDOUBLE_*
# macros, and union ieee854_long_double_shape_type in ldbl-128ibm/,
# and a stray one in the 32-bit fpu support. These files have no
# significant changes apart from rewriting the long double bit access.
#
# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h (ldbl_high): Define.
# * sysdeps/ieee754/ldbl-128ibm/e_acoshl.c (__ieee754_acoshl): Rewrite
# all uses of ieee854 long double macros and unions.
# * sysdeps/ieee754/ldbl-128ibm/e_acosl.c (__ieee754_acosl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_asinl.c (__ieee754_asinl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_atanhl.c (__ieee754_atanhl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_coshl.c (__ieee754_coshl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_log2l.c (__ieee754_log2l): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c (__ieee754_rem_pio2l):
# Likewise.
# * sysdeps/ieee754/ldbl-128ibm/e_sinhl.c (__ieee754_sinhl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/k_cosl.c (__kernel_cosl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/k_sincosl.c (__kernel_sincosl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/k_sinl.c (__kernel_sinl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_asinhl.c (__asinhl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_atanl.c (__atanl): Likewise.
# Simplify sign and nan test too.
# * sysdeps/ieee754/ldbl-128ibm/s_cosl.c (__cosl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_fabsl.c (__fabsl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_finitel.c (___finitel): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c (___fpclassifyl):
# Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_isnanl.c (___isnanl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c (__issignalingl):
# Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_logbl.c (__logbl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_signbitl.c (___signbitl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_sincosl.c (__sincosl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_sinl.c (__sinl): Likewise.
# * sysdeps/ieee754/ldbl-128ibm/s_tanl.c (__tanl): Likewise.
# * sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c (__logbl): Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c 2014-05-27 19:59:00.000000000 -0500
@@ -36,8 +36,12 @@
{
long double t;
int64_t hx;
- u_int64_t lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ uint64_t lx;
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
if(hx<0x3ff0000000000000LL) { /* x < 1 */
return (x-x)/(x-x);
} else if(hx >=0x41b0000000000000LL) { /* x > 2**28 */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_acosl.c 2014-05-27 19:59:00.000000000 -0500
@@ -151,26 +151,25 @@
long double
__ieee754_acosl (long double x)
{
- long double z, r, w, p, q, s, t, f2;
- ieee854_long_double_shape_type u;
+ long double a, z, r, w, p, q, s, t, f2;
- u.value = __builtin_fabsl (x);
- if (u.value == 1.0L)
+ a = __builtin_fabsl (x);
+ if (a == 1.0L)
{
if (x > 0.0L)
return 0.0; /* acos(1) = 0 */
else
return (2.0 * pio2_hi) + (2.0 * pio2_lo); /* acos(-1)= pi */
}
- else if (u.value > 1.0L)
+ else if (a > 1.0L)
{
return (x - x) / (x - x); /* acos(|x| > 1) is NaN */
}
- if (u.value < 0.5L)
+ if (a < 0.5L)
{
- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */
+ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
return pio2_hi + pio2_lo;
- if (u.value < 0.4375L)
+ if (a < 0.4375L)
{
/* Arcsine of x. */
z = x * x;
@@ -199,7 +198,7 @@
return z;
}
/* .4375 <= |x| < .5 */
- t = u.value - 0.4375L;
+ t = a - 0.4375L;
p = ((((((((((P10 * t
+ P9) * t
+ P8) * t
@@ -230,9 +229,9 @@
r = acosr4375 + r;
return r;
}
- else if (u.value < 0.625L)
+ else if (a < 0.625L)
{
- t = u.value - 0.5625L;
+ t = a - 0.5625L;
p = ((((((((((rS10 * t
+ rS9) * t
+ rS8) * t
@@ -264,7 +263,9 @@
}
else
{ /* |x| >= .625 */
- z = (one - u.value) * 0.5;
+ double shi, slo;
+
+ z = (one - a) * 0.5;
s = __ieee754_sqrtl (z);
/* Compute an extended precision square root from
the Newton iteration s -> 0.5 * (s + z / s).
@@ -273,12 +274,11 @@
Express s = f1 + f2 where f1 * f1 is exactly representable.
w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
s + w has extended precision. */
- u.value = s;
- u.parts32.w2 = 0;
- u.parts32.w3 = 0;
- f2 = s - u.value;
- w = z - u.value * u.value;
- w = w - 2.0 * u.value * f2;
+ ldbl_unpack (s, &shi, &slo);
+ a = shi;
+ f2 = slo;
+ w = z - a * a;
+ w = w - 2.0 * a * f2;
w = w - f2 * f2;
w = w / (2.0 * s);
/* Arcsine of s. */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_asinl.c 2014-05-27 19:59:00.000000000 -0500
@@ -131,19 +131,18 @@
long double
__ieee754_asinl (long double x)
{
- long double t, w, p, q, c, r, s;
+ long double a, t, w, p, q, c, r, s;
int flag;
- ieee854_long_double_shape_type u;
flag = 0;
- u.value = __builtin_fabsl (x);
- if (u.value == 1.0L) /* |x|>= 1 */
+ a = __builtin_fabsl (x);
+ if (a == 1.0L) /* |x|>= 1 */
return x * pio2_hi + x * pio2_lo; /* asin(1)=+-pi/2 with inexact */
- else if (u.value >= 1.0L)
+ else if (a >= 1.0L)
return (x - x) / (x - x); /* asin(|x|>1) is NaN */
- else if (u.value < 0.5L)
+ else if (a < 0.5L)
{
- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */
+ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
{
if (huge + x > one)
return x; /* return x with inexact if x!=0 */
@@ -155,9 +154,9 @@
flag = 1;
}
}
- else if (u.value < 0.625L)
+ else if (a < 0.625L)
{
- t = u.value - 0.5625;
+ t = a - 0.5625;
p = ((((((((((rS10 * t
+ rS9) * t
+ rS8) * t
@@ -190,7 +189,7 @@
else
{
/* 1 > |x| >= 0.625 */
- w = one - u.value;
+ w = one - a;
t = w * 0.5;
}
@@ -223,17 +222,14 @@
}
s = __ieee754_sqrtl (t);
- if (u.value > 0.975L)
+ if (a > 0.975L)
{
w = p / q;
t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
}
else
{
- u.value = s;
- u.parts32.w3 = 0;
- u.parts32.w2 = 0;
- w = u.value;
+ w = ldbl_high (s);
c = (t - w * w) / (s + w);
r = p / q;
p = 2.0 * s * r - (pio2_lo - 2.0 * c);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c 2014-05-27 19:59:00.000000000 -0500
@@ -40,8 +40,10 @@
{
long double t;
int64_t hx,ix;
- u_int64_t lx __attribute__ ((unused));
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx&0x7fffffffffffffffLL;
if (ix >= 0x3ff0000000000000LL) { /* |x|>=1 */
if (ix > 0x3ff0000000000000LL)
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_coshl.c 2014-05-27 19:59:00.000000000 -0500
@@ -41,9 +41,11 @@
{
long double t,w;
int64_t ix;
+ double xhi;
/* High word of |x|. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
ix &= 0x7fffffffffffffffLL;
/* x is INF or NaN */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_log2l.c 2014-05-27 19:59:00.000000000 -0500
@@ -177,11 +177,13 @@
long double z;
long double y;
int e;
- int64_t hx, lx;
+ int64_t hx;
+ double xhi;
/* Test for domain */
- GET_LDOUBLE_WORDS64 (hx, lx, x);
- if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0)
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ if ((hx & 0x7fffffffffffffffLL) == 0)
return (-1.0L / (x - x));
if (hx < 0)
return (x - x) / (x - x);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c 2014-05-27 19:59:00.000000000 -0500
@@ -200,10 +200,11 @@
double tx[8];
int exp;
int64_t n, ix, hx, ixd;
- u_int64_t lx __attribute__ ((unused));
u_int64_t lxd;
+ double xhi;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx & 0x7fffffffffffffffLL;
if (ix <= 0x3fe921fb54442d10LL) /* x in <-pi/4, pi/4> */
{
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c 2014-05-27 19:59:00.000000000 -0500
@@ -38,9 +38,11 @@
{
long double t,w,h;
int64_t ix,jx;
+ double xhi;
/* High word of |x|. */
- GET_LDOUBLE_MSW64(jx,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (jx, xhi);
ix = jx&0x7fffffffffffffffLL;
/* x is INF or NaN */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_cosl.c 2014-05-27 19:59:00.000000000 -0500
@@ -81,8 +81,11 @@
{
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
- u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
+ uint32_t tix, hix, index;
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
tix = ((u_int64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
@@ -136,7 +139,8 @@
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
l = y - (h - x);
z = l * l;
sin_l = l*(ONE+z*(SSIN1+z*(SSIN2+z*(SSIN3+z*(SSIN4+z*SSIN5)))));
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c 2014-05-27 19:59:00.000000000 -0500
@@ -100,9 +100,12 @@
{
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
- u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
- tix = ((u_int64_t)ix) >> 32;
+ uint32_t tix, hix, index;
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
+ tix = ((uint64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
{
@@ -164,7 +167,8 @@
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
if (iy)
l = y - (h - x);
else
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/k_sinl.c 2014-05-27 19:59:00.000000000 -0500
@@ -82,7 +82,10 @@
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
tix = ((u_int64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
@@ -132,7 +135,8 @@
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
if (iy)
l = (ix < 0 ? -y : y) - (h - x);
else
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c 2014-05-27 19:59:00.000000000 -0500
@@ -38,7 +38,10 @@
{
long double t,w;
int64_t hx,ix;
- GET_LDOUBLE_MSW64(hx,x);
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx&0x7fffffffffffffffLL;
if(ix>=0x7ff0000000000000LL) return x+x; /* x is inf or NaN */
if(ix< 0x3e20000000000000LL) { /* |x|<2**-29 */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_atanl.c 2014-05-27 19:59:00.000000000 -0500
@@ -173,23 +173,20 @@
long double
__atanl (long double x)
{
- int k, sign;
+ int32_t k, sign, lx;
long double t, u, p, q;
- ieee854_long_double_shape_type s;
+ double xhi;
- s.value = x;
- k = s.parts32.w0;
- if (k & 0x80000000)
- sign = 1;
- else
- sign = 0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (k, lx, xhi);
+ sign = k & 0x80000000;
/* Check for IEEE special cases. */
k &= 0x7fffffff;
if (k >= 0x7ff00000)
{
/* NaN. */
- if ((k & 0xfffff) | s.parts32.w1 )
+ if (((k - 0x7ff00000) | lx) != 0)
return (x + x);
/* Infinity. */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_cosl.c 2014-05-27 19:59:00.000000000 -0500
@@ -53,9 +53,11 @@
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c 2014-05-27 19:59:00.000000000 -0500
@@ -29,10 +29,16 @@
long double __fabsl(long double x)
{
u_int64_t hx, lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
lx = lx ^ ( hx & 0x8000000000000000LL );
hx = hx & 0x7fffffffffffffffLL;
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
long_double_symbol (libm, __fabsl, fabsl);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_finitel.c 2014-05-27 19:59:00.000000000 -0500
@@ -29,10 +29,14 @@
int
___finitel (long double x)
{
- int64_t hx;
- GET_LDOUBLE_MSW64(hx,x);
- return (int)((u_int64_t)((hx&0x7fffffffffffffffLL)
- -0x7ff0000000000000LL)>>63);
+ uint64_t hx;
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ hx &= 0x7fffffffffffffffLL;
+ hx -= 0x7ff0000000000000LL;
+ return hx >> 63;
}
hidden_ver (___finitel, __finitel)
weak_alias (___finitel, ____finitel)
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c 2014-05-27 19:59:00.000000000 -0500
@@ -46,8 +46,10 @@
{
u_int64_t hx, lx;
int retval = FP_NORMAL;
+ double xhi, xlo;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
if ((hx & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL) {
/* +/-NaN or +/-Inf */
if (hx & 0x000fffffffffffffULL) {
@@ -65,6 +67,7 @@
retval = FP_NORMAL;
} else {
if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) {
+ EXTRACT_WORDS64 (lx, xlo);
if ((lx & 0x7fffffffffffffff) /* lower is non-zero */
&& ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */
/* +/- denormal */
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c 2014-05-27 19:59:00.000000000 -0500
@@ -29,12 +29,14 @@
int
___isnanl (long double x)
{
- int64_t hx;
- int64_t lx __attribute__ ((unused));
- GET_LDOUBLE_WORDS64(hx,lx,x);
- hx &= 0x7fffffffffffffffLL;
- hx = 0x7ff0000000000000LL - hx;
- return (int)((u_int64_t)hx>>63);
+ uint64_t hx;
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ hx &= 0x7fffffffffffffffLL;
+ hx = 0x7ff0000000000000LL - hx;
+ return (int) (hx >> 63);
}
hidden_ver (___isnanl, __isnanl)
#ifndef IS_IN_libm
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_logbl.c 2014-05-27 19:59:19.000000000 -0500
@@ -27,9 +27,10 @@
__logbl (long double x)
{
int64_t hx, rhx;
- int64_t lx __attribute__ ((unused));
+ double xhi;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
hx &= 0x7fffffffffffffffLL; /* high |x| */
if (hx == 0)
return -1.0 / fabs (x);
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c 2014-05-27 19:59:19.000000000 -0500
@@ -25,8 +25,10 @@
___signbitl (long double x)
{
int64_t e;
+ double xhi;
- GET_LDOUBLE_MSW64 (e, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (e, xhi);
return e < 0;
}
#ifdef IS_IN_libm
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c 2014-05-27 19:59:19.000000000 -0500
@@ -27,9 +27,11 @@
__sincosl (long double x, long double *sinx, long double *cosx)
{
int64_t ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64 (ix, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_sinl.c 2014-05-27 19:59:19.000000000 -0500
@@ -53,9 +53,11 @@
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_tanl.c 2014-05-27 19:59:19.000000000 -0500
@@ -53,9 +53,11 @@
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c 2014-05-27 19:58:07.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c 2014-05-27 19:59:19.000000000 -0500
@@ -35,14 +35,14 @@
long double
__logbl (long double x)
{
- double xh, xl;
+ double xh;
double ret;
if (__builtin_expect (x == 0.0L, 0))
/* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */
return -1.0L / __builtin_fabsl (x);
- ldbl_unpack (x, &xh, &xl);
+ xh = ldbl_high (x);
/* ret = x & 0x7ff0000000000000; */
asm (
"xxland %x0,%x1,%x2\n"
@@ -58,9 +58,9 @@
{
/* POSIX specifies that denormal number is treated as
though it were normalized. */
- int64_t lx, hx;
+ int64_t hx;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ EXTRACT_WORDS64 (hx, xh);
return (long double) (-1023 - (__builtin_clzll (hx) - 12));
}
/* Test to avoid logb_downward (0.0) == -0.0. */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,567 @@
# commit 650ef4bd7976e36831cba22d838b567d3b5f6e8f
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:25:51 2013 +0930
#
# PowerPC floating point little-endian [4 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00084.html
#
# Another batch of ieee854 macros and union replacement. These four
# files also have bugs fixed with this patch. The fact that the two
# doubles in an IBM long double may have different signs means that
# negation and absolute value operations can't just twiddle one sign bit
# as you can with ieee864 style extended double. fmodl, remainderl,
# erfl and erfcl all had errors of this type. erfl also returned +1 for
# large magnitude negative input where it should return -1. The hypotl
# error is innocuous since the value adjusted twice is only used as a
# flag. The e_hypotl.c tests for large "a" and small "b" are mutually
# exclusive because we've already exited when x/y > 2**120. That allows
# some further small simplifications.
#
# [BZ #15734], [BZ #15735]
# * sysdeps/ieee754/ldbl-128ibm/e_fmodl.c (__ieee754_fmodl): Rewrite
# all uses of ieee875 long double macros and unions. Simplify test
# for 0.0L. Correct |x|<|y| and |x|=|y| test. Use
# ldbl_extract_mantissa value for ix,iy exponents. Properly
# normalize after ldbl_extract_mantissa, and don't add hidden bit
# already handled. Don't treat low word of ieee854 mantissa like
# low word of IBM long double and mask off bit when testing for
# zero.
# * sysdeps/ieee754/ldbl-128ibm/e_hypotl.c (__ieee754_hypotl): Rewrite
# all uses of ieee875 long double macros and unions. Simplify tests
# for 0.0L and inf. Correct double adjustment of k. Delete dead code
# adjusting ha,hb. Simplify code setting kld. Delete two600 and
# two1022, instead use their values. Recognise that tests for large
# "a" and small "b" are mutually exclusive. Rename vars. Comment.
# * sysdeps/ieee754/ldbl-128ibm/e_remainderl.c (__ieee754_remainderl):
# Rewrite all uses of ieee875 long double macros and unions. Simplify
# test for 0.0L and nan. Correct negation.
# * sysdeps/ieee754/ldbl-128ibm/s_erfl.c (__erfl): Rewrite all uses of
# ieee875 long double macros and unions. Correct output for large
# magnitude x. Correct absolute value calculation.
# (__erfcl): Likewise.
# * math/libm-test.inc: Add tests for errors discovered in IBM long
# double versions of fmodl, remainderl, erfl and erfcl.
#
diff -urN glibc-2.17-c758a686/math/libm-test.inc glibc-2.17-c758a686/math/libm-test.inc
--- glibc-2.17-c758a686/math/libm-test.inc 2014-05-27 20:02:29.000000000 -0500
+++ glibc-2.17-c758a686/math/libm-test.inc 2014-05-27 20:09:59.000000000 -0500
@@ -4040,6 +4040,10 @@
TEST_f_f (erf, 2.0L, 0.995322265018952734162069256367252929L);
TEST_f_f (erf, 4.125L, 0.999999994576599200434933994687765914L);
TEST_f_f (erf, 27.0L, 1.0L);
+#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 54
+ /* The input is not exactly representable as a double. */
+ TEST_f_f (erf, -0x1.fffffffffffff8p-2L, -0.5204998778130465132916303345518417673509L);
+#endif
END (erf);
}
@@ -4071,6 +4075,10 @@
TEST_f_f (erfc, 0x1.ffa002p+2L, 1.233585992097580296336099501489175967033e-29L);
TEST_f_f (erfc, 0x1.ffffc8p+2L, 1.122671365033056305522366683719541099329e-29L);
#ifdef TEST_LDOUBLE
+# if LDBL_MANT_DIG >= 54
+ /* The input is not exactly representable as a double. */
+ TEST_f_f (erfc, -0x1.fffffffffffff8p-2L, 1.52049987781304651329163033455184176735L);
+# endif
/* The result can only be represented in long double. */
# if LDBL_MIN_10_EXP < -319
TEST_f_f (erfc, 27.0L, 0.523704892378925568501606768284954709e-318L);
@@ -5634,6 +5642,13 @@
#if defined TEST_LDOUBLE && LDBL_MIN_EXP <= -16381
TEST_ff_f (fmod, 0x0.fffffffffffffffep-16382L, 0x1p-16445L, plus_zero);
#endif
+#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 56
+ TEST_ff_f (fmod, -0x1.00000000000004p+0L, 0x1.fffffffffffff8p-1L, -0x1p-53L);
+ TEST_ff_f (fmod, 0x1.fffffffffffffap-1L, 0x1.fffffffffffff8p-1L, 0x1p-56L);
+ TEST_ff_f (fmod, -0x1.fffffffffffffap-1L, 0x1.fffffffffffff8p-1L, -0x1p-56L);
+ TEST_ff_f (fmod, 0x1.fffffffffffffap-1L, -0x1.fffffffffffff8p-1L, 0x1p-56L);
+ TEST_ff_f (fmod, -0x1.fffffffffffffap-1L, -0x1.fffffffffffff8p-1L, -0x1p-56L);
+#endif
END (fmod);
}
@@ -8642,6 +8657,9 @@
TEST_ff_f (remainder, -1.625, -1.0, 0.375);
TEST_ff_f (remainder, 5.0, 2.0, 1.0);
TEST_ff_f (remainder, 3.0, 2.0, -1.0);
+#if defined TEST_LDOUBLE && LDBL_MANT_DIG >= 56
+ TEST_ff_f (remainder, -0x1.80000000000002p1L, 2.0, 0x1.fffffffffffff8p-1L);
+#endif
END (remainder);
}
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c 2014-05-27 20:02:27.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c 2014-05-27 20:04:08.000000000 -0500
@@ -27,76 +27,83 @@
long double
__ieee754_fmodl (long double x, long double y)
{
- int64_t n,hx,hy,hz,ix,iy,sx, i;
- u_int64_t lx,ly,lz;
- int temp;
-
- GET_LDOUBLE_WORDS64(hx,lx,x);
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ int64_t hx, hy, hz, sx, sy;
+ uint64_t lx, ly, lz;
+ int n, ix, iy;
+ double xhi, xlo, yhi, ylo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ ldbl_unpack (y, &yhi, &ylo);
+ EXTRACT_WORDS64 (hy, yhi);
+ EXTRACT_WORDS64 (ly, ylo);
sx = hx&0x8000000000000000ULL; /* sign of x */
- hx ^=sx; /* |x| */
- hy &= 0x7fffffffffffffffLL; /* |y| */
+ hx ^= sx; /* |x| */
+ sy = hy&0x8000000000000000ULL; /* sign of y */
+ hy ^= sy; /* |y| */
/* purge off exception values */
- if(__builtin_expect((hy|(ly&0x7fffffffffffffff))==0 ||
+ if(__builtin_expect(hy==0 ||
(hx>=0x7ff0000000000000LL)|| /* y=0,or x not finite */
(hy>0x7ff0000000000000LL),0)) /* or y is NaN */
return (x*y)/(x*y);
- if(__builtin_expect(hx<=hy,0)) {
- if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
- if(lx==ly)
- return Zero[(u_int64_t)sx>>63]; /* |x|=|y| return x*0*/
+ if (__builtin_expect (hx <= hy, 0))
+ {
+ /* If |x| < |y| return x. */
+ if (hx < hy)
+ return x;
+ /* At this point the absolute value of the high doubles of
+ x and y must be equal. */
+ /* If the low double of y is the same sign as the high
+ double of y (ie. the low double increases |y|)... */
+ if (((ly ^ sy) & 0x8000000000000000LL) == 0
+ /* ... then a different sign low double to high double
+ for x or same sign but lower magnitude... */
+ && (int64_t) (lx ^ sx) < (int64_t) (ly ^ sy))
+ /* ... means |x| < |y|. */
+ return x;
+ /* If the low double of x differs in sign to the high
+ double of x (ie. the low double decreases |x|)... */
+ if (((lx ^ sx) & 0x8000000000000000LL) != 0
+ /* ... then a different sign low double to high double
+ for y with lower magnitude (we've already caught
+ the same sign for y case above)... */
+ && (int64_t) (lx ^ sx) > (int64_t) (ly ^ sy))
+ /* ... means |x| < |y|. */
+ return x;
+ /* If |x| == |y| return x*0. */
+ if ((lx ^ sx) == (ly ^ sy))
+ return Zero[(uint64_t) sx >> 63];
}
- /* determine ix = ilogb(x) */
- if(__builtin_expect(hx<0x0010000000000000LL,0)) { /* subnormal x */
- if(hx==0) {
- for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
- } else {
- for (ix = -1022, i=(hx<<11); i>0; i<<=1) ix -=1;
- }
- } else ix = (hx>>52)-0x3ff;
-
- /* determine iy = ilogb(y) */
- if(__builtin_expect(hy<0x0010000000000000LL,0)) { /* subnormal y */
- if(hy==0) {
- for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
- } else {
- for (iy = -1022, i=(hy<<11); i>0; i<<=1) iy -=1;
- }
- } else iy = (hy>>52)-0x3ff;
-
/* Make the IBM extended format 105 bit mantissa look like the ieee854 112
bit mantissa so the following operations will give the correct
result. */
- ldbl_extract_mantissa(&hx, &lx, &temp, x);
- ldbl_extract_mantissa(&hy, &ly, &temp, y);
+ ldbl_extract_mantissa(&hx, &lx, &ix, x);
+ ldbl_extract_mantissa(&hy, &ly, &iy, y);
- /* set up {hx,lx}, {hy,ly} and align y to x */
- if(__builtin_expect(ix >= -1022, 1))
- hx = 0x0001000000000000LL|(0x0000ffffffffffffLL&hx);
- else { /* subnormal x, shift x to normal */
- n = -1022-ix;
- if(n<=63) {
- hx = (hx<<n)|(lx>>(64-n));
- lx <<= n;
- } else {
- hx = lx<<(n-64);
- lx = 0;
- }
- }
- if(__builtin_expect(iy >= -1022, 1))
- hy = 0x0001000000000000LL|(0x0000ffffffffffffLL&hy);
- else { /* subnormal y, shift y to normal */
- n = -1022-iy;
- if(n<=63) {
- hy = (hy<<n)|(ly>>(64-n));
- ly <<= n;
- } else {
- hy = ly<<(n-64);
- ly = 0;
- }
- }
+ if (__builtin_expect (ix == -IEEE754_DOUBLE_BIAS, 0))
+ {
+ /* subnormal x, shift x to normal. */
+ while ((hx & (1LL << 48)) == 0)
+ {
+ hx = (hx << 1) | (lx >> 63);
+ lx = lx << 1;
+ ix -= 1;
+ }
+ }
+
+ if (__builtin_expect (iy == -IEEE754_DOUBLE_BIAS, 0))
+ {
+ /* subnormal y, shift y to normal. */
+ while ((hy & (1LL << 48)) == 0)
+ {
+ hy = (hy << 1) | (ly >> 63);
+ ly = ly << 1;
+ iy -= 1;
+ }
+ }
/* fix point fmod */
n = ix - iy;
@@ -104,7 +111,7 @@
hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
if(hz<0){hx = hx+hx+(lx>>63); lx = lx+lx;}
else {
- if((hz|(lz&0x7fffffffffffffff))==0) /* return sign(x)*0 */
+ if((hz|lz)==0) /* return sign(x)*0 */
return Zero[(u_int64_t)sx>>63];
hx = hz+hz+(lz>>63); lx = lz+lz;
}
@@ -113,7 +120,7 @@
if(hz>=0) {hx=hz;lx=lz;}
/* convert back to floating value and restore the sign */
- if((hx|(lx&0x7fffffffffffffff))==0) /* return sign(x)*0 */
+ if((hx|lx)==0) /* return sign(x)*0 */
return Zero[(u_int64_t)sx>>63];
while(hx<0x0001000000000000LL) { /* normalize x */
hx = hx+hx+(lx>>63); lx = lx+lx;
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c 2014-05-27 20:02:27.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c 2014-05-27 20:04:08.000000000 -0500
@@ -45,76 +45,84 @@
#include <math.h>
#include <math_private.h>
-static const long double two600 = 0x1.0p+600L;
-static const long double two1022 = 0x1.0p+1022L;
-
long double
__ieee754_hypotl(long double x, long double y)
{
- long double a,b,t1,t2,y1,y2,w,kld;
+ long double a,b,a1,a2,b1,b2,w,kld;
int64_t j,k,ha,hb;
+ double xhi, yhi, hi, lo;
- GET_LDOUBLE_MSW64(ha,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ha, xhi);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64 (hb, yhi);
ha &= 0x7fffffffffffffffLL;
- GET_LDOUBLE_MSW64(hb,y);
hb &= 0x7fffffffffffffffLL;
if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
a = fabsl(a); /* a <- |a| */
b = fabsl(b); /* b <- |b| */
- if((ha-hb)>0x780000000000000LL) {return a+b;} /* x/y > 2**120 */
+ if((ha-hb)>0x0780000000000000LL) {return a+b;} /* x/y > 2**120 */
k=0;
kld = 1.0L;
if(ha > 0x5f30000000000000LL) { /* a>2**500 */
if(ha >= 0x7ff0000000000000LL) { /* Inf or NaN */
- u_int64_t low;
w = a+b; /* for sNaN */
- GET_LDOUBLE_LSW64(low,a);
- if(((ha&0xfffffffffffffLL)|(low&0x7fffffffffffffffLL))==0)
+ if(ha == 0x7ff0000000000000LL)
w = a;
- GET_LDOUBLE_LSW64(low,b);
- if(((hb^0x7ff0000000000000LL)|(low&0x7fffffffffffffffLL))==0)
+ if(hb == 0x7ff0000000000000LL)
w = b;
return w;
}
/* scale a and b by 2**-600 */
- ha -= 0x2580000000000000LL; hb -= 0x2580000000000000LL; k += 600;
- a /= two600;
- b /= two600;
- k += 600;
- kld = two600;
+ a *= 0x1p-600L;
+ b *= 0x1p-600L;
+ k = 600;
+ kld = 0x1p+600L;
}
- if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */
+ else if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */
if(hb <= 0x000fffffffffffffLL) { /* subnormal b or 0 */
- u_int64_t low;
- GET_LDOUBLE_LSW64(low,b);
- if((hb|(low&0x7fffffffffffffffLL))==0) return a;
- t1=two1022; /* t1=2^1022 */
- b *= t1;
- a *= t1;
- k -= 1022;
- kld = kld / two1022;
+ if(hb==0) return a;
+ a *= 0x1p+1022L;
+ b *= 0x1p+1022L;
+ k = -1022;
+ kld = 0x1p-1022L;
} else { /* scale a and b by 2^600 */
- ha += 0x2580000000000000LL; /* a *= 2^600 */
- hb += 0x2580000000000000LL; /* b *= 2^600 */
- k -= 600;
- a *= two600;
- b *= two600;
- kld = kld / two600;
+ a *= 0x1p+600L;
+ b *= 0x1p+600L;
+ k = -600;
+ kld = 0x1p-600L;
}
}
/* medium size a and b */
w = a-b;
if (w>b) {
- SET_LDOUBLE_WORDS64(t1,ha,0);
- t2 = a-t1;
- w = __ieee754_sqrtl(t1*t1-(b*(-b)-t2*(a+t1)));
+ ldbl_unpack (a, &hi, &lo);
+ a1 = hi;
+ a2 = lo;
+ /* a*a + b*b
+ = (a1+a2)*a + b*b
+ = a1*a + a2*a + b*b
+ = a1*(a1+a2) + a2*a + b*b
+ = a1*a1 + a1*a2 + a2*a + b*b
+ = a1*a1 + a2*(a+a1) + b*b */
+ w = __ieee754_sqrtl(a1*a1-(b*(-b)-a2*(a+a1)));
} else {
a = a+a;
- SET_LDOUBLE_WORDS64(y1,hb,0);
- y2 = b - y1;
- SET_LDOUBLE_WORDS64(t1,ha+0x0010000000000000LL,0);
- t2 = a - t1;
- w = __ieee754_sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+ ldbl_unpack (b, &hi, &lo);
+ b1 = hi;
+ b2 = lo;
+ ldbl_unpack (a, &hi, &lo);
+ a1 = hi;
+ a2 = lo;
+ /* a*a + b*b
+ = a*a + (a-b)*(a-b) - (a-b)*(a-b) + b*b
+ = a*a + w*w - (a*a - 2*a*b + b*b) + b*b
+ = w*w + 2*a*b
+ = w*w + (a1+a2)*b
+ = w*w + a1*b + a2*b
+ = w*w + a1*(b1+b2) + a2*b
+ = w*w + a1*b1 + a1*b2 + a2*b */
+ w = __ieee754_sqrtl(a1*b1-(w*(-w)-(a1*b2+a2*b)));
}
if(k!=0)
return w*kld;
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c 2014-05-27 20:02:27.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c 2014-05-27 20:04:08.000000000 -0500
@@ -33,18 +33,22 @@
int64_t hx,hp;
u_int64_t sx,lx,lp;
long double p_half;
+ double xhi, xlo, phi, plo;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- GET_LDOUBLE_WORDS64(hp,lp,p);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ ldbl_unpack (p, &phi, &plo);
+ EXTRACT_WORDS64 (hp, phi);
+ EXTRACT_WORDS64 (lp, plo);
sx = hx&0x8000000000000000ULL;
hp &= 0x7fffffffffffffffLL;
hx &= 0x7fffffffffffffffLL;
/* purge off exception values */
- if((hp|(lp&0x7fffffffffffffff))==0) return (x*p)/(x*p); /* p = 0 */
+ if(hp==0) return (x*p)/(x*p); /* p = 0 */
if((hx>=0x7ff0000000000000LL)|| /* x not finite */
- ((hp>=0x7ff0000000000000LL)&& /* p is NaN */
- (((hp-0x7ff0000000000000LL)|lp)!=0)))
+ (hp>0x7ff0000000000000LL)) /* p is NaN */
return (x*p)/(x*p);
@@ -64,8 +68,8 @@
if(x>=p_half) x -= p;
}
}
- GET_LDOUBLE_MSW64(hx,x);
- SET_LDOUBLE_MSW64(x,hx^sx);
+ if (sx)
+ x = -x;
return x;
}
strong_alias (__ieee754_remainderl, __remainderl_finite)
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c 2014-05-27 20:02:27.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/s_erfl.c 2014-05-27 20:04:08.000000000 -0500
@@ -760,16 +760,16 @@
__erfl (long double x)
{
long double a, y, z;
- int32_t i, ix, sign;
- ieee854_long_double_shape_type u;
+ int32_t i, ix, hx;
+ double xhi;
- u.value = x;
- sign = u.parts32.w0;
- ix = sign & 0x7fffffff;
+ xhi = ldbl_high (x);
+ GET_HIGH_WORD (hx, xhi);
+ ix = hx & 0x7fffffff;
if (ix >= 0x7ff00000)
{ /* erf(nan)=nan */
- i = ((sign & 0xfff00000) >> 31) << 1;
+ i = ((uint32_t) hx >> 31) << 1;
return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
}
@@ -778,7 +778,7 @@
if (ix >= 0x4039A0DE)
{
/* __erfcl (x) underflows if x > 25.6283 */
- if (sign)
+ if ((hx & 0x80000000) == 0)
return one-tiny;
else
return tiny-one;
@@ -789,8 +789,9 @@
return (one - y);
}
}
- u.parts32.w0 = ix;
- a = u.value;
+ a = x;
+ if ((hx & 0x80000000) != 0)
+ a = -a;
z = x * x;
if (ix < 0x3fec0000) /* a < 0.875 */
{
@@ -814,7 +815,7 @@
y = erf_const + neval (a, TN2, NTN2) / deval (a, TD2, NTD2);
}
- if (sign & 0x80000000) /* x < 0 */
+ if (hx & 0x80000000) /* x < 0 */
y = -y;
return( y );
}
@@ -824,18 +825,18 @@
__erfcl (long double x)
{
long double y, z, p, r;
- int32_t i, ix, sign;
- ieee854_long_double_shape_type u;
-
- u.value = x;
- sign = u.parts32.w0;
- ix = sign & 0x7fffffff;
- u.parts32.w0 = ix;
+ int32_t i, ix;
+ uint32_t hx;
+ double xhi;
+
+ xhi = ldbl_high (x);
+ GET_HIGH_WORD (hx, xhi);
+ ix = hx & 0x7fffffff;
if (ix >= 0x7ff00000)
{ /* erfc(nan)=nan */
/* erfc(+-inf)=0,2 */
- return (long double) (((u_int32_t) sign >> 31) << 1) + one / x;
+ return (long double) ((hx >> 31) << 1) + one / x;
}
if (ix < 0x3fd00000) /* |x| <1/4 */
@@ -846,7 +847,8 @@
}
if (ix < 0x3ff40000) /* 1.25 */
{
- x = u.value;
+ if ((hx & 0x80000000) != 0)
+ x = -x;
i = 8.0 * x;
switch (i)
{
@@ -891,7 +893,7 @@
y += C20a;
break;
}
- if (sign & 0x80000000)
+ if (hx & 0x80000000)
y = 2.0L - y;
return y;
}
@@ -899,10 +901,11 @@
if (ix < 0x405ac000)
{
/* x < -9 */
- if ((ix >= 0x40220000) && (sign & 0x80000000))
+ if (hx >= 0xc0220000)
return two - tiny;
- x = fabsl (x);
+ if ((hx & 0x80000000) != 0)
+ x = -x;
z = one / (x * x);
i = 8.0 / x;
switch (i)
@@ -933,21 +936,17 @@
p = neval (z, RNr8, NRNr8) / deval (z, RDr8, NRDr8);
break;
}
- u.value = x;
- u.parts32.w3 = 0;
- u.parts32.w2 = 0;
- u.parts32.w1 &= 0xf8000000;
- z = u.value;
+ z = (float) x;
r = __ieee754_expl (-z * z - 0.5625) *
__ieee754_expl ((z - x) * (z + x) + p);
- if ((sign & 0x80000000) == 0)
+ if ((hx & 0x80000000) == 0)
return r / x;
else
return two - r / x;
}
else
{
- if ((sign & 0x80000000) == 0)
+ if ((hx & 0x80000000) == 0)
return tiny * tiny;
else
return two - tiny;

View File

@ -0,0 +1,91 @@
# commit 32c301dfc9b786453e59b61fe4a821a89e1a206b
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:26:39 2013 +0930
#
# PowerPC floating point little-endian [5 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00085.html
#
# Rid ourselves of ieee854.
#
# * sysdeps/ieee754/ldbl-128ibm/ieee754.h (union ieee854_long_double):
# Delete.
# (IEEE854_LONG_DOUBLE_BIAS): Delete.
# * sysdeps/ieee754/ldbl-128ibm/math_ldbl.h: Don't include ieee854
# version of math_ldbl.h.
#
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-27 22:10:43.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/ieee754.h 2014-05-27 22:11:10.000000000 -0500
@@ -112,61 +112,6 @@
#define IEEE754_DOUBLE_BIAS 0x3ff /* Added to exponent. */
-union ieee854_long_double
- {
- long double d;
-
- /* This is the IEEE 854 quad-precision format. */
- struct
- {
-#if __BYTE_ORDER == __BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:15;
- /* Together these comprise the mantissa. */
- unsigned int mantissa0:16;
- unsigned int mantissa1:32;
- unsigned int mantissa2:32;
- unsigned int mantissa3:32;
-#endif /* Big endian. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- /* Together these comprise the mantissa. */
- unsigned int mantissa3:32;
- unsigned int mantissa2:32;
- unsigned int mantissa1:32;
- unsigned int mantissa0:16;
- unsigned int exponent:15;
- unsigned int negative:1;
-#endif /* Little endian. */
- } ieee;
-
- /* This format makes it easier to see if a NaN is a signalling NaN. */
- struct
- {
-#if __BYTE_ORDER == __BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:15;
- unsigned int quiet_nan:1;
- /* Together these comprise the mantissa. */
- unsigned int mantissa0:15;
- unsigned int mantissa1:32;
- unsigned int mantissa2:32;
- unsigned int mantissa3:32;
-#else
- /* Together these comprise the mantissa. */
- unsigned int mantissa3:32;
- unsigned int mantissa2:32;
- unsigned int mantissa1:32;
- unsigned int mantissa0:15;
- unsigned int quiet_nan:1;
- unsigned int exponent:15;
- unsigned int negative:1;
-#endif
- } ieee_nan;
- };
-
-#define IEEE854_LONG_DOUBLE_BIAS 0x3fff /* Added to exponent. */
-
-
/* IBM extended format for long double.
Each long double is made up of two IEEE doubles. The value of the
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 22:10:43.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h 2014-05-27 22:11:10.000000000 -0500
@@ -2,7 +2,6 @@
#error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
#endif
-#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
#include <ieee754.h>
static inline void

View File

@ -0,0 +1,113 @@
# commit 62a728aeff93507ce5975f245a5f1d2046fb4503
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:27:19 2013 +0930
#
# PowerPC floating point little-endian [6 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00197.html
#
# A rewrite to make this code correct for little-endian.
#
# * sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c (mynumber): Replace
# union 32-bit int array member with 64-bit int array.
# (t515, tm256): Double rather than long double.
# (__ieee754_sqrtl): Rewrite using 64-bit arithmetic.
#
diff -urN glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
--- glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c 2014-05-27 22:20:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c 2014-05-27 22:21:39.000000000 -0500
@@ -34,15 +34,13 @@
#include <math_private.h>
-typedef unsigned int int4;
-typedef union {int4 i[4]; long double x; double d[2]; } mynumber;
+typedef union {int64_t i[2]; long double x; double d[2]; } mynumber;
-static const mynumber
- t512 = {{0x5ff00000, 0x00000000, 0x00000000, 0x00000000 }}, /* 2^512 */
- tm256 = {{0x2ff00000, 0x00000000, 0x00000000, 0x00000000 }}; /* 2^-256 */
static const double
-two54 = 1.80143985094819840000e+16, /* 0x4350000000000000 */
-twom54 = 5.55111512312578270212e-17; /* 0x3C90000000000000 */
+ t512 = 0x1p512,
+ tm256 = 0x1p-256,
+ two54 = 0x1p54, /* 0x4350000000000000 */
+ twom54 = 0x1p-54; /* 0x3C90000000000000 */
/*********************************************************************/
/* An ultimate sqrt routine. Given an IEEE double machine number x */
@@ -54,56 +52,53 @@
static const long double big = 134217728.0, big1 = 134217729.0;
long double t,s,i;
mynumber a,c;
- int4 k, l, m;
- int n;
+ uint64_t k, l;
+ int64_t m, n;
double d;
a.x=x;
- k=a.i[0] & 0x7fffffff;
+ k=a.i[0] & INT64_C(0x7fffffffffffffff);
/*----------------- 2^-1022 <= | x |< 2^1024 -----------------*/
- if (k>0x000fffff && k<0x7ff00000) {
+ if (k>INT64_C(0x000fffff00000000) && k<INT64_C(0x7ff0000000000000)) {
if (x < 0) return (big1-big1)/(big-big);
- l = (k&0x001fffff)|0x3fe00000;
- if (((a.i[2] & 0x7fffffff) | a.i[3]) != 0) {
- n = (int) ((l - k) * 2) >> 21;
- m = (a.i[2] >> 20) & 0x7ff;
+ l = (k&INT64_C(0x001fffffffffffff))|INT64_C(0x3fe0000000000000);
+ if ((a.i[1] & INT64_C(0x7fffffffffffffff)) != 0) {
+ n = (int64_t) ((l - k) * 2) >> 53;
+ m = (a.i[1] >> 52) & 0x7ff;
if (m == 0) {
a.d[1] *= two54;
- m = ((a.i[2] >> 20) & 0x7ff) - 54;
+ m = ((a.i[1] >> 52) & 0x7ff) - 54;
}
m += n;
- if ((int) m > 0)
- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
- else if ((int) m <= -54) {
- a.i[2] &= 0x80000000;
- a.i[3] = 0;
+ if (m > 0)
+ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
+ else if (m <= -54) {
+ a.i[1] &= INT64_C(0x8000000000000000);
} else {
m += 54;
- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
+ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
a.d[1] *= twom54;
}
}
a.i[0] = l;
s = a.x;
d = __ieee754_sqrt (a.d[0]);
- c.i[0] = 0x20000000+((k&0x7fe00000)>>1);
+ c.i[0] = INT64_C(0x2000000000000000)+((k&INT64_C(0x7fe0000000000000))>>1);
c.i[1] = 0;
- c.i[2] = 0;
- c.i[3] = 0;
i = d;
t = 0.5L * (i + s / i);
i = 0.5L * (t + s / t);
return c.x * i;
}
else {
- if (k>=0x7ff00000) {
- if (a.i[0] == 0xfff00000 && a.i[1] == 0)
+ if (k>=INT64_C(0x7ff0000000000000)) {
+ if (a.i[0] == INT64_C(0xfff0000000000000))
return (big1-big1)/(big-big); /* sqrt (-Inf) = NaN. */
return x; /* sqrt (NaN) = NaN, sqrt (+Inf) = +Inf. */
}
if (x == 0) return x;
if (x < 0) return (big1-big1)/(big-big);
- return tm256.x*__ieee754_sqrtl(x*t512.x);
+ return tm256*__ieee754_sqrtl(x*t512);
}
}
strong_alias (__ieee754_sqrtl, __sqrtl_finite)

View File

@ -0,0 +1,75 @@
# commit 2ca85d2bbbaa60b9c83bf1f57a2801c84e0a3625
# Author: Anton Blanchard <anton@au1.ibm.com>
# Date: Sat Aug 17 18:28:06 2013 +0930
#
# PowerPC floating point little-endian [7 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00086.html
#
# * sysdeps/powerpc/bits/mathinline.h (__signbitf): Use builtin.
# (__signbit): Likewise. Correct for little-endian.
# (__signbitl): Call __signbit.
# (lrint): Correct for little-endian.
# (lrintf): Call lrint.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h 2014-05-27 22:28:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/bits/mathinline.h 2014-05-27 22:28:37.000000000 -0500
@@ -62,21 +62,28 @@
__MATH_INLINE int
__NTH (__signbitf (float __x))
{
+#if __GNUC_PREREQ (4, 0)
+ return __builtin_signbitf (__x);
+#else
__extension__ union { float __f; int __i; } __u = { __f: __x };
return __u.__i < 0;
+#endif
}
__MATH_INLINE int
__NTH (__signbit (double __x))
{
- __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
- return __u.__i[0] < 0;
+#if __GNUC_PREREQ (4, 0)
+ return __builtin_signbit (__x);
+#else
+ __extension__ union { double __d; long long __i; } __u = { __d: __x };
+ return __u.__i < 0;
+#endif
}
# ifdef __LONG_DOUBLE_128__
__MATH_INLINE int
__NTH (__signbitl (long double __x))
{
- __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
- return __u.__i[0] < 0;
+ return __signbit ((double) __x);
}
# endif
# endif
@@ -93,22 +100,17 @@
{
union {
double __d;
- int __ll[2];
+ long long __ll;
} __u;
__asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
- return __u.__ll[1];
+ return __u.__ll;
}
__MATH_INLINE long int lrintf (float __x) __THROW;
__MATH_INLINE long int
__NTH (lrintf (float __x))
{
- union {
- double __d;
- int __ll[2];
- } __u;
- __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
- return __u.__ll[1];
+ return lrint ((double) __x);
}
# endif

View File

@ -0,0 +1,283 @@
# commit 4a28b3ca4bc52d9a3ac0d9edb53d3de510e1b77c
# Author: Anton Blanchard <anton@au1.ibm.com>
# Date: Sat Aug 17 18:28:55 2013 +0930
#
# PowerPC floating point little-endian [8 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00199.html
#
# Corrects floating-point environment code for little-endian.
#
# * sysdeps/powerpc/fpu/fenv_libc.h (fenv_union_t): Replace int
# array with long long.
# * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Adjust.
# * sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Adjust.
# * sysdeps/powerpc/fpu/fclrexcpt.c (__feclearexcept): Adjust.
# * sysdeps/powerpc/fpu/fedisblxcpt.c (fedisableexcept): Adjust.
# * sysdeps/powerpc/fpu/feenablxcpt.c (feenableexcept): Adjust.
# * sysdeps/powerpc/fpu/fegetexcept.c (__fegetexcept): Adjust.
# * sysdeps/powerpc/fpu/feholdexcpt.c (feholdexcept): Adjust.
# * sysdeps/powerpc/fpu/fesetenv.c (__fesetenv): Adjust.
# * sysdeps/powerpc/fpu/feupdateenv.c (__feupdateenv): Adjust.
# * sysdeps/powerpc/fpu/fgetexcptflg.c (__fegetexceptflag): Adjust.
# * sysdeps/powerpc/fpu/fraiseexcpt.c (__feraiseexcept): Adjust.
# * sysdeps/powerpc/fpu/fsetexcptflg.c (__fesetexceptflag): Adjust.
# * sysdeps/powerpc/fpu/ftestexcept.c (fetestexcept): Adjust.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrt.c 2014-05-27 22:31:43.000000000 -0500
@@ -145,7 +145,7 @@
feraiseexcept (FE_INVALID_SQRT);
fenv_union_t u = { .fenv = fegetenv_register () };
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/e_sqrtf.c 2014-05-27 22:31:43.000000000 -0500
@@ -121,7 +121,7 @@
feraiseexcept (FE_INVALID_SQRT);
fenv_union_t u = { .fenv = fegetenv_register () };
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fclrexcpt.c 2014-05-27 22:31:43.000000000 -0500
@@ -28,8 +28,8 @@
u.fenv = fegetenv_register ();
/* Clear the relevant bits. */
- u.l[1] = u.l[1] & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
- | (excepts & FPSCR_STICKY_BITS));
+ u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
+ | (excepts & FPSCR_STICKY_BITS));
/* Put the new state in effect. */
fesetenv_register (u.fenv);
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fedisblxcpt.c 2014-05-27 22:31:43.000000000 -0500
@@ -32,15 +32,15 @@
fe.fenv = fegetenv_register ();
if (excepts & FE_INEXACT)
- fe.l[1] &= ~(1 << (31 - FPSCR_XE));
+ fe.l &= ~(1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
- fe.l[1] &= ~(1 << (31 - FPSCR_ZE));
+ fe.l &= ~(1 << (31 - FPSCR_ZE));
if (excepts & FE_UNDERFLOW)
- fe.l[1] &= ~(1 << (31 - FPSCR_UE));
+ fe.l &= ~(1 << (31 - FPSCR_UE));
if (excepts & FE_OVERFLOW)
- fe.l[1] &= ~(1 << (31 - FPSCR_OE));
+ fe.l &= ~(1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
- fe.l[1] &= ~(1 << (31 - FPSCR_VE));
+ fe.l &= ~(1 << (31 - FPSCR_VE));
fesetenv_register (fe.fenv);
new = __fegetexcept ();
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feenablxcpt.c 2014-05-27 22:31:43.000000000 -0500
@@ -32,15 +32,15 @@
fe.fenv = fegetenv_register ();
if (excepts & FE_INEXACT)
- fe.l[1] |= (1 << (31 - FPSCR_XE));
+ fe.l |= (1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
- fe.l[1] |= (1 << (31 - FPSCR_ZE));
+ fe.l |= (1 << (31 - FPSCR_ZE));
if (excepts & FE_UNDERFLOW)
- fe.l[1] |= (1 << (31 - FPSCR_UE));
+ fe.l |= (1 << (31 - FPSCR_UE));
if (excepts & FE_OVERFLOW)
- fe.l[1] |= (1 << (31 - FPSCR_OE));
+ fe.l |= (1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
- fe.l[1] |= (1 << (31 - FPSCR_VE));
+ fe.l |= (1 << (31 - FPSCR_VE));
fesetenv_register (fe.fenv);
new = __fegetexcept ();
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fegetexcept.c 2014-05-27 22:31:43.000000000 -0500
@@ -27,15 +27,15 @@
fe.fenv = fegetenv_register ();
- if (fe.l[1] & (1 << (31 - FPSCR_XE)))
+ if (fe.l & (1 << (31 - FPSCR_XE)))
result |= FE_INEXACT;
- if (fe.l[1] & (1 << (31 - FPSCR_ZE)))
+ if (fe.l & (1 << (31 - FPSCR_ZE)))
result |= FE_DIVBYZERO;
- if (fe.l[1] & (1 << (31 - FPSCR_UE)))
+ if (fe.l & (1 << (31 - FPSCR_UE)))
result |= FE_UNDERFLOW;
- if (fe.l[1] & (1 << (31 - FPSCR_OE)))
+ if (fe.l & (1 << (31 - FPSCR_OE)))
result |= FE_OVERFLOW;
- if (fe.l[1] & (1 << (31 - FPSCR_VE)))
+ if (fe.l & (1 << (31 - FPSCR_VE)))
result |= FE_INVALID;
return result;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feholdexcpt.c 2014-05-27 22:33:09.000000000 -0500
@@ -30,13 +30,12 @@
/* Clear everything except for the rounding modes and non-IEEE arithmetic
flag. */
- new.l[1] = old.l[1] & 7;
- new.l[0] = old.l[0];
+ new.l = old.l & 0xffffffff00000007LL;
/* If the old env had any eabled exceptions, then mask SIGFPE in the
MSR FE0/FE1 bits. This may allow the FPU to run faster because it
always takes the default action and can not generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) != 0)
(void)__fe_mask_env ();
/* Put the new state in effect. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fenv_libc.h 2014-05-27 22:31:43.000000000 -0500
@@ -69,7 +69,7 @@
typedef union
{
fenv_t fenv;
- unsigned int l[2];
+ unsigned long long l;
} fenv_union_t;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fesetenv.c 2014-05-27 22:35:18.000000000 -0500
@@ -36,14 +36,14 @@
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
hardware into "precise mode" and may cause the FPU to run slower on some
hardware. */
- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
(void)__fe_nomask_env ();
/* If the old env had any enabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
(void)__fe_mask_env ();
fesetenv_register (*envp);
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/feupdateenv.c 2014-05-27 22:34:23.000000000 -0500
@@ -36,20 +36,20 @@
/* Restore rounding mode and exception enable from *envp and merge
exceptions. Leave fraction rounded/inexact and FP result/CC bits
unchanged. */
- new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF);
+ new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff);
/* If the old env has no eabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put
the hardware into "precise mode" and may cause the FPU to run slower on
some hardware. */
- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
(void)__fe_nomask_env ();
/* If the old env had any eabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
(void)__fe_mask_env ();
/* Atomically enable and raise (if appropriate) exceptions set in `new'. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fgetexcptflg.c 2014-05-27 22:31:43.000000000 -0500
@@ -28,7 +28,7 @@
u.fenv = fegetenv_register ();
/* Return (all of) it. */
- *flagp = u.l[1] & excepts & FE_ALL_EXCEPT;
+ *flagp = u.l & excepts & FE_ALL_EXCEPT;
/* Success. */
return 0;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fraiseexcpt.c 2014-05-27 22:31:43.000000000 -0500
@@ -34,11 +34,11 @@
u.fenv = fegetenv_register ();
/* Add the exceptions */
- u.l[1] = (u.l[1]
- | (excepts & FPSCR_STICKY_BITS)
- /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */
- | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
- & FE_INVALID_SOFTWARE));
+ u.l = (u.l
+ | (excepts & FPSCR_STICKY_BITS)
+ /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */
+ | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+ & FE_INVALID_SOFTWARE));
/* Store the new status word (along with the rest of the environment),
triggering any appropriate exceptions. */
@@ -50,7 +50,7 @@
don't have FE_INVALID_SOFTWARE implemented. Detect this
case and raise FE_INVALID_SNAN instead. */
u.fenv = fegetenv_register ();
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
set_fpscr_bit (FPSCR_VXSNAN);
}
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fsetexcptflg.c 2014-05-27 22:31:43.000000000 -0500
@@ -32,10 +32,10 @@
flag = *flagp & excepts;
/* Replace the exception status */
- u.l[1] = ((u.l[1] & ~(FPSCR_STICKY_BITS & excepts))
- | (flag & FPSCR_STICKY_BITS)
- | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
- & FE_INVALID_SOFTWARE));
+ u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
+ | (flag & FPSCR_STICKY_BITS)
+ | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+ & FE_INVALID_SOFTWARE));
/* Store the new status word (along with the rest of the environment).
This may cause floating-point exceptions if the restored state
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c 2014-05-27 22:31:42.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/ftestexcept.c 2014-05-27 22:31:43.000000000 -0500
@@ -28,6 +28,6 @@
/* The FE_INVALID bit is dealt with correctly by the hardware, so we can
just: */
- return u.l[1] & excepts;
+ return u.l & excepts;
}
libm_hidden_def (fetestexcept)

View File

@ -0,0 +1,120 @@
# commit 603e84104cdc709c8e7dcbac54b9a585bf8dff78
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:29:43 2013 +0930
#
# PowerPC floating point little-endian [9 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00200.html
#
# This works around the fact that vsx is disabled in current
# little-endian gcc. Also, float constants take 4 bytes in memory
# vs. 16 bytes for vector constants, and we don't need to write one lot
# of masks for double (register format) and another for float (mem
# format).
#
# * sysdeps/powerpc/fpu/s_float_bitwise.h (__float_and_test28): Don't
# use vector int constants.
# (__float_and_test24, __float_and8, __float_get_exp): Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h 2014-05-27 22:37:18.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_float_bitwise.h 2014-05-27 22:37:20.000000000 -0500
@@ -23,18 +23,19 @@
#include <math_private.h>
/* Returns (int)(num & 0x7FFFFFF0 == value) */
-static inline
-int __float_and_test28 (float num, float value)
+static inline int
+__float_and_test28 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7ffffffe, 0x00000000, 0x00000000, 0x0000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7ffffff0 };
__asm__ (
- /* the 'f' constrain is use on mask because we just need
+ /* the 'f' constraint is used on mask because we just need
* to compare floats, not full vector */
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -46,16 +47,17 @@
}
/* Returns (int)(num & 0x7FFFFF00 == value) */
-static inline
-int __float_and_test24 (float num, float value)
+static inline int
+__float_and_test24 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7fffffe0, 0x00000000, 0x00000000, 0x0000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7fffff00 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -67,16 +69,17 @@
}
/* Returns (float)(num & 0x7F800000) */
-static inline
-float __float_and8 (float num)
+static inline float
+__float_and8 (float num)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7f800000 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -88,17 +91,18 @@
}
/* Returns ((int32_t)(num & 0x7F800000) >> 23) */
-static inline
-int32_t __float_get_exp (float num)
+static inline int32_t
+__float_get_exp (float num)
{
int32_t inum;
#ifdef _ARCH_PWR7
float ret;
- vector int mask = (vector int) {
- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7f800000 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
GET_FLOAT_WORD(inum, ret);
#else

View File

@ -0,0 +1,119 @@
# commit da13146da10360436941e843834c90a9aef5fd7a
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:30:23 2013 +0930
#
# PowerPC floating point little-endian [10 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00201.html
#
# These two functions oddly test x+1>0 when a double x is >= 0.0, and
# similarly when x is negative. I don't see the point of that since the
# test should always be true. I also don't see any need to convert x+1
# to integer rather than simply using xr+1. Note that the standard
# allows these functions to return any value when the input is outside
# the range of long long, but it's not too hard to prevent xr+1
# overflowing so that's what I've done.
#
# (With rounding mode FE_UPWARD, x+1 can be a lot more than what you
# might naively expect, but perhaps that situation was covered by the
# x - xrf < 1.0 test.)
#
# * sysdeps/powerpc/fpu/s_llround.c (__llround): Rewrite.
# * sysdeps/powerpc/fpu/s_llroundf.c (__llroundf): Rewrite.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2014-05-27 22:38:55.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llround.c 2014-05-27 22:38:58.000000000 -0500
@@ -19,29 +19,28 @@
#include <math.h>
#include <math_ldbl_opt.h>
-/* I think that what this routine is supposed to do is round a value
- to the nearest integer, with values exactly on the boundary rounded
- away from zero. */
-/* This routine relies on (long long)x, when x is out of range of a long long,
- clipping to MAX_LLONG or MIN_LLONG. */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+ rounded away from zero, regardless of the current rounding mode.
+ If (long long)x, when x is out of range of a long long, clips at
+ LLONG_MAX or LLONG_MIN, then this implementation also clips. */
long long int
__llround (double x)
{
- double xrf;
- long long int xr;
- xr = (long long int) x;
- xrf = (double) xr;
+ long long xr = (long long) x;
+ double xrf = (double) xr;
+
if (x >= 0.0)
- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
- return x+1;
- else
- return x;
+ {
+ if (x - xrf >= 0.5)
+ xr += (long long) ((unsigned long long) xr + 1) > 0;
+ }
else
- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
- return x-1;
- else
- return x;
+ {
+ if (xrf - x >= 0.5)
+ xr -= (long long) ((unsigned long long) xr - 1) < 0;
+ }
+ return xr;
}
weak_alias (__llround, llround)
#ifdef NO_LONG_DOUBLE
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c 2014-05-27 22:38:55.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/s_llroundf.c 2014-05-27 22:38:58.000000000 -0500
@@ -18,28 +18,27 @@
#include <math.h>
-/* I think that what this routine is supposed to do is round a value
- to the nearest integer, with values exactly on the boundary rounded
- away from zero. */
-/* This routine relies on (long long)x, when x is out of range of a long long,
- clipping to MAX_LLONG or MIN_LLONG. */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+ rounded away from zero, regardless of the current rounding mode.
+ If (long long)x, when x is out of range of a long long, clips at
+ LLONG_MAX or LLONG_MIN, then this implementation also clips. */
long long int
__llroundf (float x)
{
- float xrf;
- long long int xr;
- xr = (long long int) x;
- xrf = (float) xr;
+ long long xr = (long long) x;
+ float xrf = (float) xr;
+
if (x >= 0.0)
- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
- return x+1;
- else
- return x;
+ {
+ if (x - xrf >= 0.5)
+ xr += (long long) ((unsigned long long) xr + 1) > 0;
+ }
else
- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
- return x-1;
- else
- return x;
+ {
+ if (xrf - x >= 0.5)
+ xr -= (long long) ((unsigned long long) xr - 1) < 0;
+ }
+ return xr;
}
weak_alias (__llroundf, llroundf)

View File

@ -0,0 +1,163 @@
# commit 9c008155b7d5d1bd81d909497850a2ece28aec50
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:31:05 2013 +0930
#
# PowerPC floating point little-endian [11 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00202.html
#
# Another little-endian fix.
#
# * sysdeps/powerpc/fpu_control.h (_FPU_GETCW): Rewrite using
# 64-bit int/double union.
# (_FPU_SETCW): Likewise.
# * sysdeps/powerpc/fpu/tst-setcontext-fpscr.c (_GET_DI_FPSCR): Likewise.
# (_SET_DI_FPSCR, _GET_SI_FPSCR, _SET_SI_FPSCR): Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h 2014-05-27 22:40:18.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/fpu_control.h 2014-05-27 22:43:40.000000000 -0500
@@ -45,22 +45,26 @@
#define _FPU_IEEE 0x000000f0
/* Type of the control word. */
-typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
+typedef unsigned int fpu_control_t;
/* Macros for accessing the hardware control word. */
-#define _FPU_GETCW(__cw) ( { \
- union { double d; fpu_control_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__cw)=tmp.cw[1]; \
- tmp.cw[1]; } )
-#define _FPU_SETCW(__cw) { \
- union { double d; fpu_control_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \
- tmp.cw[1] = __cw; \
- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
-}
+#define _FPU_GETCW(cw) \
+ ({union { double __d; unsigned long long __ll; } __u; \
+ register double __fr; \
+ __asm__ ("mffs %0" : "=f" (__fr)); \
+ __u.__d = __fr; \
+ (cw) = (fpu_control_t) __u.__ll; \
+ (fpu_control_t) __u.__ll; \
+ })
+
+#define _FPU_SETCW(cw) \
+ { union { double __d; unsigned long long __ll; } __u; \
+ register double __fr; \
+ __u.__ll = 0xfff80000LL << 32; /* This is a QNaN. */ \
+ __u.__ll |= (cw) & 0xffffffffLL; \
+ __fr = __u.__d; \
+ __asm__ ("mtfsf 255,%0" : : "f" (__fr)); \
+ }
/* Default control word set at startup. */
extern fpu_control_t __fpu_control;
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c 2014-05-27 22:40:18.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c 2014-05-27 22:40:21.000000000 -0500
@@ -83,7 +83,7 @@
return 0;
}
-typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__)));
+typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__)));
typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
#define _FPSCR_RESERVED 0xfffffff8ffffff04ULL
@@ -95,50 +95,51 @@
#define _FPSCR_TEST1_RN 0x0000000000000002ULL
/* Macros for accessing the hardware control word on Power6[x]. */
-# define _GET_DI_FPSCR(__fpscr) ({ \
- union { double d; \
- di_fpscr_t fpscr; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__fpscr)=tmp.fpscr; \
- tmp.fpscr; })
+#define _GET_DI_FPSCR(__fpscr) \
+ ({union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ __asm__ ("mffs %0" : "=f" (fr)); \
+ u.d = fr; \
+ (__fpscr) = u.fpscr; \
+ u.fpscr; \
+ })
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+/* We make sure to zero fp after we use it in order to prevent stale data
in an fp register from making a test-case pass erroneously. */
-# define _SET_DI_FPSCR(__fpscr) { \
- union { double d; di_fpscr_t fpscr; } \
- tmp __attribute__ ((__aligned__(8))); \
- tmp.fpscr = __fpscr; \
- /* Set the entire 64-bit FPSCR. */ \
- __asm__ ("lfd%U0 0,%0; " \
- ".machine push; " \
- ".machine \"power6\"; " \
- "mtfsf 255,0,1,0; " \
- ".machine pop" : : "m" (tmp.d) : "fr0"); \
- tmp.d = 0; \
- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \
-}
-
-# define _GET_SI_FPSCR(__fpscr) ({ \
- union { double d; \
- si_fpscr_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__fpscr)=tmp.cw[1]; \
- tmp.cw[0]; })
+# define _SET_DI_FPSCR(__fpscr) \
+ { union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ u.fpscr = __fpscr; \
+ fr = u.d; \
+ /* Set the entire 64-bit FPSCR. */ \
+ __asm__ (".machine push; " \
+ ".machine \"power6\"; " \
+ "mtfsf 255,%0,1,0; " \
+ ".machine pop" : : "f" (fr)); \
+ fr = 0.0; \
+ }
+
+# define _GET_SI_FPSCR(__fpscr) \
+ ({union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ __asm__ ("mffs %0" : "=f" (fr)); \
+ u.d = fr; \
+ (__fpscr) = (si_fpscr_t) u.fpscr; \
+ (si_fpscr_t) u.fpscr; \
+ })
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+/* We make sure to zero fp after we use it in order to prevent stale data
in an fp register from making a test-case pass erroneously. */
-# define _SET_SI_FPSCR(__fpscr) { \
- union { double d; si_fpscr_t fpscr[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- /* More-or-less arbitrary; this is a QNaN. */ \
- tmp.fpscr[0] = 0xFFF80000; \
- tmp.fpscr[1] = __fpscr; \
- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
- tmp.d = 0; \
- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \
-}
+# define _SET_SI_FPSCR(__fpscr) \
+ { union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ /* More-or-less arbitrary; this is a QNaN. */ \
+ u.fpscr = 0xfff80000ULL << 32; \
+ u.fpscr |= __fpscr & 0xffffffffULL; \
+ fr = u.d; \
+ __asm__ ("mtfsf 255,%0" : : "f" (fr)); \
+ fr = 0.0; \
+ }
void prime_special_regs(int which)
{

View File

@ -0,0 +1,312 @@
# commit 7b88401f3b25325b1381798a0eccb3efe7751fec
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:31:45 2013 +0930
#
# PowerPC floating point little-endian [12 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00087.html
#
# Fixes for little-endian in 32-bit assembly.
#
# * sysdeps/powerpc/sysdep.h (LOWORD, HIWORD, HISHORT): Define.
# * sysdeps/powerpc/powerpc32/fpu/s_copysign.S: Load little-endian
# words of double from correct stack offsets.
# * sysdeps/powerpc/powerpc32/fpu/s_copysignl.S: Likewise.
# * sysdeps/powerpc/powerpc32/fpu/s_lrint.S: Likewise.
# * sysdeps/powerpc/powerpc32/fpu/s_lround.S: Likewise.
# * sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S: Likewise.
# * sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S: Likewise.
# * sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S: Likewise.
# * sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S: Likewise.
# * sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S: Likewise.
# * sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S: Likewise.
# * sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S: Likewise.
# * sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S: Likewise.
# * sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S: Likewise.
# * sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S: Likewise.
# * sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S: Likewise.
# * sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S: Likewise.
# * sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S: Use HISHORT.
# * sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysign.S 2014-05-27 22:45:46.000000000 -0500
@@ -29,7 +29,7 @@
stwu r1,-16(r1)
cfi_adjust_cfa_offset (16)
stfd fp2,8(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
cmpwi r3,0
addi r1,r1,16
cfi_adjust_cfa_offset (-16)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S 2014-05-27 22:45:46.000000000 -0500
@@ -30,7 +30,7 @@
fmr fp0,fp1
fabs fp1,fp1
fcmpu cr7,fp0,fp1
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
cmpwi cr6,r3,0
addi r1,r1,16
cfi_adjust_cfa_offset (-16)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lrint.S 2014-05-27 22:45:46.000000000 -0500
@@ -24,10 +24,10 @@
stwu r1,-16(r1)
fctiw fp13,fp1
stfd fp13,8(r1)
- nop /* Insure the following load is in a different dispatch group */
+ nop /* Ensure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1)
+ lwz r3,8+LOWORD(r1)
addi r1,r1,16
blr
END (__lrint)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_lround.S 2014-05-27 22:45:46.000000000 -0500
@@ -67,7 +67,7 @@
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1) /* Load return as integer. */
+ lwz r3,8+LOWORD(r1) /* Load return as integer. */
.Lout:
addi r1,r1,16
blr
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S 2014-05-27 22:48:09.000000000 -0500
@@ -29,8 +29,8 @@
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrint)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S 2014-05-27 22:48:44.000000000 -0500
@@ -28,8 +28,8 @@
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrintf)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500
@@ -27,8 +27,8 @@
ori r1,r1,0
stfd fp1,24(r1) /* copy FPR to GPR */
ori r1,r1,0
- lwz r4,24(r1)
- lwz r5,28(r1)
+ lwz r4,24+HIWORD(r1)
+ lwz r5,24+LOWORD(r1)
lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */
clrlwi r4,r4,1 /* x = fabs(x) */
cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S 2014-05-27 22:45:46.000000000 -0500
@@ -39,8 +39,8 @@
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r4,12(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llround)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S 2014-05-27 22:45:46.000000000 -0500
@@ -38,7 +38,7 @@
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1)
+ lwz r3,8+LOWORD(r1)
addi r1,r1,16
blr
END (__lround)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500
@@ -27,8 +27,8 @@
ori r1,r1,0
stfd fp1,24(r1) /* copy FPR to GPR */
ori r1,r1,0
- lwz r4,24(r1)
- lwz r5,28(r1)
+ lwz r4,24+HIWORD(r1)
+ lwz r5,24+LOWORD(r1)
lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */
clrlwi r4,r4,1 /* x = fabs(x) */
cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S 2014-05-27 22:46:52.000000000 -0500
@@ -29,8 +29,8 @@
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrint)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S 2014-05-27 22:47:29.000000000 -0500
@@ -28,8 +28,8 @@
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrintf)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S 2014-05-27 22:45:46.000000000 -0500
@@ -39,8 +39,8 @@
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r4,12(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llround)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S 2014-05-27 22:45:46.000000000 -0500
@@ -54,9 +54,8 @@
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r0,8(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
clrlwi r0,r0,17 /* r0 = abs(r0). */
addi r1,r1,16 /* Reset the stack pointer. */
cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S 2014-05-27 22:45:46.000000000 -0500
@@ -48,14 +48,13 @@
li r3,0
bflr 29 /* If not INF, return. */
- /* Either we have -INF/+INF or a denormal. */
+ /* Either we have +INF or -INF. */
stwu r1,-16(r1) /* Allocate stack space. */
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,8(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
addi r1,r1,16 /* Reset the stack pointer. */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
li r3,1
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S 2014-05-27 22:45:46.000000000 -0500
@@ -53,8 +53,8 @@
stwu r1,-16(r1) /* Allocate stack space. */
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lwz r4,8(r1) /* Load the upper half of the FP value. */
- lwz r5,12(r1) /* Load the lower half of the FP value. */
+ lwz r4,8+HIWORD(r1) /* Load the upper half of the FP value. */
+ lwz r5,8+LOWORD(r1) /* Load the lower half of the FP value. */
addi r1,r1,16 /* Reset the stack pointer. */
lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */
clrlwi r4,r4,1 /* r4 = abs(r4). */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S 2014-05-27 22:45:46.000000000 -0500
@@ -39,10 +39,8 @@
stfd fp1,-16(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
-
- lhz r4,-16(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
clrlwi r4,r4,17 /* r4 = abs(r4). */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
bltlr cr7 /* LT means finite, other non-finite. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S 2014-05-27 22:45:46.000000000 -0500
@@ -38,9 +38,8 @@
stfd fp1,-16(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,-16(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
li r3,1
beqlr cr7 /* EQ means INF, otherwise -INF. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h
--- glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h 2014-05-27 22:45:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/sysdep.h 2014-05-27 22:45:46.000000000 -0500
@@ -144,6 +144,21 @@
#define VRSAVE 256
+/* The 32-bit words of a 64-bit dword are at these offsets in memory. */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define LOWORD 0
+# define HIWORD 4
+#else
+# define LOWORD 4
+# define HIWORD 0
+#endif
+
+/* The high 16-bit word of a 64-bit dword is at this offset in memory. */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define HISHORT 6
+#else
+# define HISHORT 0
+#endif
/* This seems to always be the case on PPC. */
#define ALIGNARG(log2) log2

View File

@ -0,0 +1,81 @@
# commit 6a31fe7f9cce72b69fce8fe499a2c6ad492c2311
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:32:18 2013 +0930
#
# PowerPC floating point little-endian [13 of 15]
# http://sourceware.org/ml/libc-alpha/2013-08/msg00088.html
#
# * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Increase alignment of
# constants to usual value for .cst8 section, and remove redundant
# high address load.
# * sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S: Use float
# constant for 0x1p52. Load little-endian words of double from
# correct stack offsets.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2014-05-27 22:50:13.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/s_roundf.S 2014-05-27 22:50:13.000000000 -0500
@@ -19,7 +19,7 @@
#include <sysdep.h>
.section .rodata.cst8,"aM",@progbits,8
- .align 2
+ .align 3
.LC0: /* 2**23 */
.long 0x4b000000
.LC1: /* 0.5 */
@@ -60,7 +60,6 @@
#ifdef SHARED
lfs fp10,.LC1-.LC0(r9)
#else
- lis r9,.LC1@ha
lfs fp10,.LC1@l(r9)
#endif
ble- cr6,.L4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S 2014-05-27 22:50:13.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S 2014-05-27 22:50:13.000000000 -0500
@@ -19,12 +19,10 @@
#include <sysdep.h>
#include <math_ldbl_opt.h>
- .section .rodata.cst12,"aM",@progbits,12
+ .section .rodata.cst8,"aM",@progbits,8
.align 3
- .LC0: /* 0x1.0000000000000p+52 == 2^52 */
- .long 0x43300000
- .long 0x00000000
- .long 0x3f000000 /* Use this for 0.5 */
+ .LC0: .long (52+127)<<23 /* 0x1p+52 */
+ .long (-1+127)<<23 /* 0.5 */
.section ".text"
@@ -57,12 +55,12 @@
addi r9,r9,.LC0-got_label@l
mtlr r11
cfi_same_value (lr)
- lfd fp9,0(r9)
- lfs fp10,8(r9)
+ lfs fp9,0(r9)
+ lfs fp10,4(r9)
#else
lis r9,.LC0@ha
- lfd fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */
- lfs fp10,.LC0@l+8(r9) /* Load 0.5 into fpr10. */
+ lfs fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */
+ lfs fp10,.LC0@l+4(r9) /* Load 0.5 into fpr10. */
#endif
fabs fp2,fp1 /* Get the absolute value of x. */
fsub fp12,fp10,fp10 /* Compute 0.0 into fpr12. */
@@ -80,8 +78,8 @@
nop
nop
nop
- lwz r4,12(r1) /* Load return as integer. */
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1) /* Load return as integer. */
+ lwz r4,8+LOWORD(r1)
.Lout:
addi r1,r1,16
blr

View File

@ -0,0 +1,110 @@
# commit 76a66d510a3737674563133a420f4fd22da42c1b
# Author: Anton Blanchard <anton@au1.ibm.com>
# Date: Sat Aug 17 18:33:02 2013 +0930
#
# PowerPC floating point little-endian [14 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00205.html
#
# These all wrongly specified float constants in a 64-bit word.
#
# * sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Correct float constants
# for little-endian.
# * sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Likewise.
# * sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S: Likewise.
# * sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise.
# * sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Likewise.
# * sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S 2014-05-27 22:52:18.000000000 -0500
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__ceilf, 4, 0)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_floorf.S 2014-05-27 22:52:18.000000000 -0500
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__floorf, 4, 0)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S 2014-05-27 22:52:18.000000000 -0500
@@ -26,8 +26,10 @@
/* float [fp1] nearbyintf(float [fp1]) */
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__nearbyintf, 4, 0)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_rintf.S 2014-05-27 22:52:18.000000000 -0500
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__rintf, 4, 0)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_roundf.S 2014-05-27 22:52:18.000000000 -0500
@@ -19,10 +19,12 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
.LC1: /* 0.5 */
- .tc FD_3f000000_0[TC],0x3f00000000000000
+ .long 0x3f000000
+
.section ".text"
/* float [fp1] roundf (float x [fp1])
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S 2014-05-27 22:52:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_truncf.S 2014-05-27 22:52:18.000000000 -0500
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
/* float [fp1] truncf (float x [fp1])

View File

@ -0,0 +1,43 @@
# commit fef13a78ea30d4c26d6bab48d731ebe864ee31b0
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:33:45 2013 +0930
#
# PowerPC floating point little-endian [15 of 15]
# http://sourceware.org/ml/libc-alpha/2013-07/msg00206.html
#
# The union loses when little-endian.
#
# * sysdeps/powerpc/powerpc32/power4/hp-timing.h (HP_TIMING_NOW):
# Don't use a union to pack hi/low value.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h 2014-05-27 22:53:37.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/hp-timing.h 2014-05-27 22:53:39.000000000 -0500
@@ -87,18 +87,15 @@
#define HP_TIMING_NOW(Var) \
do { \
- union { long long ll; long ii[2]; } _var; \
- long tmp; \
- __asm__ __volatile__ ( \
- "1: mfspr %0,269;" \
- " mfspr %1,268;" \
- " mfspr %2,269;" \
- " cmpw %0,%2;" \
- " bne 1b;" \
- : "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp) \
- : : "cr0" \
- ); \
- Var = _var.ll; \
+ unsigned int hi, lo, tmp; \
+ __asm__ __volatile__ ("1: mfspr %0,269;" \
+ " mfspr %1,268;" \
+ " mfspr %2,269;" \
+ " cmpw %0,%2;" \
+ " bne 1b;" \
+ : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \
+ : : "cr0"); \
+ Var = ((hp_timing_t) hi << 32) | lo; \
} while (0)

View File

@ -0,0 +1,294 @@
# commit be1e5d311342e08ae1f8013342df27b7ded2c156
# Author: Anton Blanchard <anton@au1.ibm.com>
# Date: Sat Aug 17 18:34:40 2013 +0930
#
# PowerPC LE setjmp/longjmp
# http://sourceware.org/ml/libc-alpha/2013-08/msg00089.html
#
# Little-endian fixes for setjmp/longjmp. When writing these I noticed
# the setjmp code corrupts the non volatile VMX registers when using an
# unaligned buffer. Anton fixed this, and also simplified it quite a
# bit.
#
# The current code uses boilerplate for the case where we want to store
# 16 bytes to an unaligned address. For that we have to do a
# read/modify/write of two aligned 16 byte quantities. In our case we
# are storing a bunch of back to back data (consective VMX registers),
# and only the start and end of the region need the read/modify/write.
#
# [BZ #15723]
# * sysdeps/powerpc/jmpbuf-offsets.h: Comment fix.
# * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Correct
# _dl_hwcap access for little-endian.
# * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise. Don't
# destroy vmx regs when saving unaligned.
# * sysdeps/powerpc/powerpc64/__longjmp-common.S: Correct CR load.
# * sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise CR save. Don't
# destroy vmx regs when saving unaligned.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h
--- glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h 2014-05-27 22:55:23.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/jmpbuf-offsets.h 2014-05-27 22:55:27.000000000 -0500
@@ -21,12 +21,10 @@
#define JB_LR 2 /* The address we will return to */
#if __WORDSIZE == 64
# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */
-# define JB_CR 21 /* Condition code registers with the VRSAVE at */
- /* offset 172 (low half of the double word. */
+# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */
# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */
# define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */
-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
- /* 168 (high half of the double word). */
+# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */
# define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */
#else
# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S 2014-05-27 22:55:23.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S 2014-05-27 22:55:27.000000000 -0500
@@ -46,16 +46,16 @@
# endif
mtlr r6
cfi_same_value (lr)
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
mtlr r6
cfi_same_value (lr)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r5,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r5)
+ lis r5,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r5)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S 2014-05-27 22:55:23.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S 2014-05-27 22:55:27.000000000 -0500
@@ -97,14 +97,14 @@
# else
lwz r5,_rtld_global_ro@got(r5)
# endif
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r6,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r6)
+ lis r6,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r6)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
@@ -114,44 +114,43 @@
stw r0,((JB_VRSAVE)*4)(3)
addi r6,r5,16
beq+ L(aligned_save_vmx)
+
lvsr v0,0,r5
- vspltisb v1,-1 /* set v1 to all 1's */
- vspltisb v2,0 /* set v2 to all 0's */
- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */
-
-
- /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
- lvx v5,0,r5
- vperm v20,v20,v20,v0
- vsel v5,v5,v20,v3
- vsel v20,v20,v2,v3
- stvx v5,0,r5
-
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
- addi addgpr,addgpr,32; \
- vperm savevr,savevr,savevr,shiftvr; \
- vsel hivr,prev_savevr,savevr,maskvr; \
- stvx hivr,0,savegpr;
-
- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
-
- /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
- addi r5,r5,32
- vperm v31,v31,v31,v0
- lvx v4,0,r5
- vsel v5,v30,v31,v3
- stvx v5,0,r6
- vsel v4,v31,v4,v3
- stvx v4,0,r5
+ lvsl v1,0,r5
+ addi r6,r5,-16
+
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+ addi addgpr,addgpr,32; \
+ vperm tmpvr,prevvr,savevr,shiftvr; \
+ stvx tmpvr,0,savegpr
+
+ /*
+ * We have to be careful not to corrupt the data below v20 and
+ * above v31. To keep things simple we just rotate both ends in
+ * the opposite direction to our main permute so we can use
+ * the common macro.
+ */
+
+ /* load and rotate data below v20 */
+ lvx v2,0,r5
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+ save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+ save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+ save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+ save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+ save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+ save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+ save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+ save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+ save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+ save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+ save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+ /* load and rotate data above v31 */
+ lvx v2,0,r6
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
+
b L(no_vmx)
L(aligned_save_vmx):
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-27 22:55:23.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-27 22:55:27.000000000 -0500
@@ -60,7 +60,7 @@
beq L(no_vmx)
la r5,((JB_VRS)*8)(3)
andi. r6,r5,0xf
- lwz r0,((JB_VRSAVE)*8)(3)
+ lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */
mtspr VRSAVE,r0
beq+ L(aligned_restore_vmx)
addi r6,r5,16
@@ -156,7 +156,7 @@
lfd fp21,((JB_FPRS+7)*8)(r3)
ld r22,((JB_GPRS+8)*8)(r3)
lfd fp22,((JB_FPRS+8)*8)(r3)
- ld r0,(JB_CR*8)(r3)
+ lwz r0,((JB_CR*8)+4)(r3) /* 32-bit CR. */
ld r23,((JB_GPRS+9)*8)(r3)
lfd fp23,((JB_FPRS+9)*8)(r3)
ld r24,((JB_GPRS+10)*8)(r3)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-27 22:55:23.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-27 22:55:27.000000000 -0500
@@ -98,7 +98,7 @@
mfcr r0
std r16,((JB_GPRS+2)*8)(3)
stfd fp16,((JB_FPRS+2)*8)(3)
- std r0,(JB_CR*8)(3)
+ stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */
std r17,((JB_GPRS+3)*8)(3)
stfd fp17,((JB_FPRS+3)*8)(3)
std r18,((JB_GPRS+4)*8)(3)
@@ -142,50 +142,46 @@
la r5,((JB_VRS)*8)(3)
andi. r6,r5,0xf
mfspr r0,VRSAVE
- stw r0,((JB_VRSAVE)*8)(3)
+ stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */
addi r6,r5,16
beq+ L(aligned_save_vmx)
+
lvsr v0,0,r5
- vspltisb v1,-1 /* set v1 to all 1's */
- vspltisb v2,0 /* set v2 to all 0's */
- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes
- on left = misalignment */
-
-
- /* Special case for v20 we need to preserve what is in save area
- below v20 before obliterating it */
- lvx v5,0,r5
- vperm v20,v20,v20,v0
- vsel v5,v5,v20,v3
- vsel v20,v20,v2,v3
- stvx v5,0,r5
-
-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
- addi addgpr,addgpr,32; \
- vperm savevr,savevr,savevr,shiftvr; \
- vsel hivr,prev_savevr,savevr,maskvr; \
- stvx hivr,0,savegpr;
-
- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
-
- /* Special case for r31 we need to preserve what is in save area
- above v31 before obliterating it */
- addi r5,r5,32
- vperm v31,v31,v31,v0
- lvx v4,0,r5
- vsel v5,v30,v31,v3
- stvx v5,0,r6
- vsel v4,v31,v4,v3
- stvx v4,0,r5
+ lvsl v1,0,r5
+ addi r6,r5,-16
+
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+ addi addgpr,addgpr,32; \
+ vperm tmpvr,prevvr,savevr,shiftvr; \
+ stvx tmpvr,0,savegpr
+
+ /*
+ * We have to be careful not to corrupt the data below v20 and
+ * above v31. To keep things simple we just rotate both ends in
+ * the opposite direction to our main permute so we can use
+ * the common macro.
+ */
+
+ /* load and rotate data below v20 */
+ lvx v2,0,r5
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+ save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+ save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+ save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+ save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+ save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+ save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+ save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+ save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+ save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+ save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+ save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+ /* load and rotate data above v31 */
+ lvx v2,0,r6
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
+
b L(no_vmx)
L(aligned_save_vmx):

View File

@ -0,0 +1,228 @@
# commit 9b874b2f1eb2550e39d3e9c38772e64a767e9de2
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:35:40 2013 +0930
#
# PowerPC ugly symbol versioning
# http://sourceware.org/ml/libc-alpha/2013-08/msg00090.html
#
# This patch fixes symbol versioning in setjmp/longjmp. The existing
# code uses raw versions, which results in wrong symbol versioning when
# you want to build glibc with a base version of 2.19 for LE.
#
# Note that the merging the 64-bit and 32-bit versions in novmx-lonjmp.c
# and pt-longjmp.c doesn't result in GLIBC_2.0 versions for 64-bit, due
# to the base in shlib_versions.
#
# * sysdeps/powerpc/longjmp.c: Use proper symbol versioning macros.
# * sysdeps/powerpc/novmx-longjmp.c: Likewise.
# * sysdeps/powerpc/powerpc32/bsd-_setjmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/bsd-setjmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/fpu/__longjmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/fpu/setjmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/mcount.c: Likewise.
# * sysdeps/powerpc/powerpc32/setjmp.S: Likewise.
# * sysdeps/powerpc/powerpc64/setjmp.S: Likewise.
# * nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c: Likewise.
#
diff -urN glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c
--- glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c 2014-05-27 23:22:12.000000000 -0500
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/pt-longjmp.c 2014-05-27 23:23:44.000000000 -0500
@@ -41,13 +41,8 @@
__novmx__libc_longjmp (env, val);
}
-# if __WORDSIZE == 64
-symbol_version (__novmx_longjmp,longjmp,GLIBC_2.3);
-symbol_version (__novmx_siglongjmp,siglongjmp,GLIBC_2.3);
-# else
-symbol_version (__novmx_longjmp,longjmp,GLIBC_2.0);
-symbol_version (__novmx_siglongjmp,siglongjmp,GLIBC_2.0);
-# endif
+compat_symbol (libpthread, __novmx_longjmp, longjmp, GLIBC_2_0);
+compat_symbol (libpthread, __novmx_siglongjmp, siglongjmp, GLIBC_2_0);
#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */
void
@@ -62,5 +57,5 @@
__libc_siglongjmp (env, val);
}
-versioned_symbol (libc, __vmx_longjmp, longjmp, GLIBC_2_3_4);
-versioned_symbol (libc, __vmx_siglongjmp, siglongjmp, GLIBC_2_3_4);
+versioned_symbol (libpthread, __vmx_longjmp, longjmp, GLIBC_2_3_4);
+versioned_symbol (libpthread, __vmx_siglongjmp, siglongjmp, GLIBC_2_3_4);
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c
--- glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/longjmp.c 2014-05-27 23:22:12.000000000 -0500
@@ -56,6 +56,6 @@
default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE);
default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE);
-default_symbol_version (__vmx_longjmp, _longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxlongjmp, longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxsiglongjmp, siglongjmp, GLIBC_2.3.4);
+versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4);
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c
--- glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/novmx-longjmp.c 2014-05-27 23:22:12.000000000 -0500
@@ -51,13 +51,7 @@
weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp)
weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp)
-# if __WORDSIZE == 64
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.3);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.3);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.3);
-# else
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.0);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.0);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.0);
-# endif
+compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0);
#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-_setjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -32,7 +32,7 @@
/* Build a versioned object for libc. */
# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0);
ENTRY (BP_SYM (__novmx_setjmp))
li r4,0 /* Set second argument to 0. */
@@ -41,7 +41,7 @@
libc_hidden_def (__novmx_setjmp)
# endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
/* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined
as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c
if HAVE_CLEANUP_JMP_BUF is defined */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/bsd-setjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -27,7 +27,7 @@
b __novmx__sigsetjmp@local
END (__novmxsetjmp)
strong_alias (__novmxsetjmp, __novmx__setjmp)
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.0)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0)
#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */
@@ -37,4 +37,4 @@
END (__vmxsetjmp)
strong_alias (__vmxsetjmp, __vmx__setjmp)
strong_alias (__vmx__setjmp, __setjmp)
-default_symbol_version (__vmxsetjmp,setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/__longjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -26,14 +26,14 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4);
+versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4);
# define __longjmp __vmx__longjmp
# include "__longjmp-common.S"
# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
# define __NO_VMX__
# undef JB_SIZE
-symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0);
# undef __longjmp
# define __longjmp __novmx__longjmp
# include "__longjmp-common.S"
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/fpu/setjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -26,7 +26,7 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define __sigsetjmp __vmx__sigsetjmp
# define __sigjmp_save __vmx__sigjmp_save
# include "setjmp-common.S"
@@ -36,7 +36,7 @@
# undef __sigsetjmp
# undef __sigjmp_save
# undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
# define __sigsetjmp __novmx__sigsetjmp
# define __sigjmp_save __novmx__sigjmp_save
# include "setjmp-common.S"
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/mcount.c 2014-05-27 23:22:12.000000000 -0500
@@ -9,7 +9,7 @@
/* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE,
but it should have been put in version GLIBC_2.15. Mark the
GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead. */
-default_symbol_version (___mcount_internal, __mcount_internal, GLIBC_2.16);
+versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16);
#if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16)
strong_alias (___mcount_internal, ___mcount_internal_private);
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/setjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -25,7 +25,7 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define __sigsetjmp __vmx__sigsetjmp
# define __sigjmp_save __vmx__sigjmp_save
# include "setjmp-common.S"
@@ -35,7 +35,7 @@
# undef __sigsetjmp
# undef __sigjmp_save
# undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
# define __sigsetjmp __novmx__sigsetjmp
# define __sigjmp_save __novmx__sigjmp_save
# include "setjmp-common.S"
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S 2014-05-27 23:22:10.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp.S 2014-05-27 23:22:12.000000000 -0500
@@ -26,9 +26,9 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmxsetjmp, setjmp, GLIBC_2.3.4)
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define setjmp __vmxsetjmp
# define _setjmp __vmx_setjmp
# define __sigsetjmp __vmx__sigsetjmp
@@ -44,9 +44,9 @@
# undef __sigjmp_save
# undef JB_SIZE
# define __NO_VMX__
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3);
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.3)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3)
+compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3);
+compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3)
# define setjmp __novmxsetjmp
# define _setjmp __novmx_setjmp
# define __sigsetjmp __novmx__sigsetjmp

View File

@ -0,0 +1,102 @@
# commit 02f04a6c7fea2b474b026bbce721d8c658d71fda
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:36:11 2013 +0930
#
# PowerPC LE _dl_hwcap access
# http://sourceware.org/ml/libc-alpha/2013-08/msg00091.html
#
# More LE support, correcting word accesses to _dl_hwcap.
#
# * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S: Use
# HIWORD/LOWORD.
# * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S: Ditto.
# * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S: Ditto.
#
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2014-05-27 23:25:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S 2014-05-27 23:25:38.000000000 -0500
@@ -151,15 +151,15 @@
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2014-05-27 23:25:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S 2014-05-27 23:25:38.000000000 -0500
@@ -79,15 +79,15 @@
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
#else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
#endif
#ifdef __CONTEXT_ENABLE_FPRS
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2014-05-27 23:25:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S 2014-05-27 23:25:38.000000000 -0500
@@ -152,15 +152,15 @@
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
# ifdef __CONTEXT_ENABLE_VRS
@@ -308,14 +308,14 @@
mtlr r8
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
la r10,(_UC_VREGS)(r31)

View File

@ -0,0 +1,55 @@
# commit 0b2c2ace3601d5d59cf89130b16840e7f132f7a6
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:36:45 2013 +0930
#
# PowerPC makecontext
# http://sourceware.org/ml/libc-alpha/2013-08/msg00092.html
#
# Use conditional form of branch and link to avoid destroying the cpu
# link stack used to predict blr return addresses.
#
# * sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S: Use
# conditional form of branch and link when obtaining pc.
# * sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S 2014-05-28 12:25:49.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S 2014-05-28 12:25:51.000000000 -0500
@@ -47,7 +47,9 @@
#ifdef PIC
mflr r0
cfi_register(lr,r0)
- bl 1f
+ /* Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,1f
1: mflr r6
addi r6,r6,L(exitcode)-1b
mtlr r0
@@ -136,7 +138,9 @@
#ifdef PIC
mflr r0
cfi_register(lr,r0)
- bl 1f
+ /* Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,1f
1: mflr r6
addi r6,r6,L(novec_exitcode)-1b
mtlr r0
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-28 12:25:49.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-28 12:25:51.000000000 -0500
@@ -124,8 +124,10 @@
/* If the target function returns we need to do some cleanup. We use a
code trick to get the address of our cleanup function into the link
- register. Do not add any code between here and L(exitcode). */
- bl L(gotexitcodeaddr);
+ register. Do not add any code between here and L(exitcode).
+ Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,L(gotexitcodeaddr);
/* This is the helper code which gets called if a function which
is registered with 'makecontext' returns. In this case we

View File

@ -0,0 +1,411 @@
# commit db9b4570c5dc550074140ac1d1677077fba29a26
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:40:11 2013 +0930
#
# PowerPC LE strlen
# http://sourceware.org/ml/libc-alpha/2013-08/msg00097.html
#
# This is the first of nine patches adding little-endian support to the
# existing optimised string and memory functions. I did spend some
# time with a power7 simulator looking at cycle by cycle behaviour for
# memchr, but most of these patches have not been run on cpu simulators
# to check that we are going as fast as possible. I'm sure PowerPC can
# do better. However, the little-endian support mostly leaves main
# loops unchanged, so I'm banking on previous authors having done a
# good job on big-endian.. As with most code you stare at long enough,
# I found some improvements for big-endian too.
#
# Little-endian support for strlen. Like most of the string functions,
# I leave the main word or multiple-word loops substantially unchanged,
# just needing to modify the tail.
#
# Removing the branch in the power7 functions is just a tidy. .align
# produces a branch anyway. Modifying regs in the non-power7 functions
# is to suit the new little-endian tail.
#
# * sysdeps/powerpc/powerpc64/power7/strlen.S (strlen): Add little-endian
# support. Don't branch over align.
# * sysdeps/powerpc/powerpc32/power7/strlen.S: Likewise.
# * sysdeps/powerpc/powerpc64/strlen.S (strlen): Add little-endian support.
# Rearrange tmp reg use to suit. Comment.
# * sysdeps/powerpc/powerpc32/strlen.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S 2014-05-28 12:28:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strlen.S 2014-05-28 12:28:45.000000000 -0500
@@ -31,7 +31,11 @@
li r0,0 /* Word with null chars to use with cmpb. */
li r5,-1 /* MASK = 0xffffffffffffffff. */
lwz r12,0(r4) /* Load word from memory. */
+#ifdef __LITTLE_ENDIAN__
+ slw r5,r5,r6
+#else
srw r5,r5,r6 /* MASK = MASK >> padding. */
+#endif
orc r9,r12,r5 /* Mask bits that are not part of the string. */
cmpb r10,r9,r0 /* Check for null bytes in WORD1. */
cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
@@ -49,9 +53,6 @@
cmpb r10,r12,r0
cmpwi cr7,r10,0
bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
/* Main loop to look for the end of the string. Since it's a
small loop (< 8 instructions), align it to 32-bytes. */
@@ -88,9 +89,15 @@
0xff in the same position as the null byte in the original
word from the string. Use that to calculate the length. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
+#ifdef __LITTLE_ENDIAN__
+ addi r9, r10, -1 /* Form a mask from trailing zeros. */
+ andc r9, r9, r10
+ popcntw r0, r9 /* Count the bits in the mask. */
+#else
+ cntlzw r0,r10 /* Count leading zeros before the match. */
+#endif
subf r5,r3,r4
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
+ srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r5,r0 /* Compute final length. */
blr
END (BP_SYM (strlen))
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S 2014-05-28 12:28:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strlen.S 2014-05-28 12:32:24.000000000 -0500
@@ -31,7 +31,12 @@
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- there were no 0x00 bytes in the word.
+ there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ match, but possibly false 0x80 matches in the next more significant
+ byte to a true match due to carries. For little-endian this is
+ of no consequence since the least significant match is the one
+ we're interested in, but big-endian needs method 2 to find which
+ byte matches.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -74,7 +79,7 @@
ENTRY (BP_SYM (strlen))
-#define rTMP1 r0
+#define rTMP4 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
@@ -84,9 +89,9 @@
#define rWORD1 r8 /* current string word */
#define rWORD2 r9 /* next string word */
#define rMASK r9 /* mask for first string word */
-#define rTMP2 r10
-#define rTMP3 r11
-#define rTMP4 r12
+#define rTMP1 r10
+#define rTMP2 r11
+#define rTMP3 r12
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
@@ -96,15 +101,20 @@
lwz rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
- We make an exception and use method (2) on the first two words, to reduce
- overhead. */
+/* We use method (2) on the first two words, because rFEFE isn't
+ required which reduces setup overhead. Also gives a faster return
+ for small strings on big-endian due to needing to recalculate with
+ method (2) anyway. */
+#ifdef __LITTLE_ENDIAN__
+ slw rMASK, rMASK, rPADN
+#else
srw rMASK, rMASK, rPADN
+#endif
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor rTMP1, rTMP2, rTMP1
- and. rWORD1, rTMP1, rMASK
+ nor rTMP3, rTMP2, rTMP1
+ and. rTMP3, rTMP3, rMASK
mtcrf 0x01, rRTN
bne L(done0)
lis rFEFE, -0x101
@@ -113,11 +123,12 @@
bt 29, L(loop)
/* Handle second word of pair. */
+/* Perhaps use method (1) here for little-endian, saving one instruction? */
lwzu rWORD1, 4(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor. rWORD1, rTMP2, rTMP1
+ nor. rTMP3, rTMP2, rTMP1
bne L(done0)
/* The loop. */
@@ -131,29 +142,53 @@
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
- and. rTMP1, rTMP3, rTMP4
+ and. rTMP3, rTMP3, rTMP4
beq L(loop)
+#ifndef __LITTLE_ENDIAN__
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP4, rTMP1
+ andc rTMP3, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP2, rTMP1
+ andc rTMP3, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that
- contains a zero byte, and the most significant set bit in rWORD1 is in that
- byte. */
+ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero,
+ and 0x00 otherwise. */
L(done0):
- cntlzw rTMP3, rWORD1
+ cntlzw rTMP3, rTMP3
subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr
+#else
+
+L(done0):
+ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */
+ andc rTMP1, rTMP1, rTMP3
+ cntlzw rTMP1, rTMP1 /* Count bits not in the mask. */
+ subf rTMP3, rRTN, rSTR
+ subfic rTMP1, rTMP1, 32-7
+ srwi rTMP1, rTMP1, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+
+L(done1):
+ addi rTMP3, rTMP1, -1
+ andc rTMP3, rTMP3, rTMP1
+ cntlzw rTMP3, rTMP3
+ subf rTMP1, rRTN, rSTR
+ subfic rTMP3, rTMP3, 32-7-32
+ srawi rTMP3, rTMP3, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+#endif
+
END (BP_SYM (strlen))
libc_hidden_builtin_def (strlen)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S 2014-05-28 12:28:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strlen.S 2014-05-28 12:28:45.000000000 -0500
@@ -32,7 +32,11 @@
with cmpb. */
li r5,-1 /* MASK = 0xffffffffffffffff. */
ld r12,0(r4) /* Load doubleword from memory. */
+#ifdef __LITTLE_ENDIAN__
+ sld r5,r5,r6
+#else
srd r5,r5,r6 /* MASK = MASK >> padding. */
+#endif
orc r9,r12,r5 /* Mask bits that are not part of the string. */
cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */
cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */
@@ -50,9 +54,6 @@
cmpb r10,r12,r0
cmpdi cr7,r10,0
bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
/* Main loop to look for the end of the string. Since it's a
small loop (< 8 instructions), align it to 32-bytes. */
@@ -89,9 +90,15 @@
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the length. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
+#ifdef __LITTLE_ENDIAN__
+ addi r9, r10, -1 /* Form a mask from trailing zeros. */
+ andc r9, r9, r10
+ popcntd r0, r9 /* Count the bits in the mask. */
+#else
+ cntlzd r0,r10 /* Count leading zeros before the match. */
+#endif
subf r5,r3,r4
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
+ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
add r3,r5,r0 /* Compute final length. */
blr
END (BP_SYM (strlen))
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S 2014-05-28 12:28:44.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strlen.S 2014-05-28 12:38:17.000000000 -0500
@@ -31,7 +31,12 @@
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- there were no 0x00 bytes in the word.
+ there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ match, but possibly false 0x80 matches in the next more significant
+ byte to a true match due to carries. For little-endian this is
+ of no consequence since the least significant match is the one
+ we're interested in, but big-endian needs method 2 to find which
+ byte matches.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -64,7 +69,7 @@
Answer:
1) Added a Data Cache Block Touch early to prefetch the first 128
byte cache line. Adding dcbt instructions to the loop would not be
- effective since most strings will be shorter than the cache line.*/
+ effective since most strings will be shorter than the cache line. */
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
@@ -80,7 +85,7 @@
ENTRY (BP_SYM (strlen))
CALL_MCOUNT 1
-#define rTMP1 r0
+#define rTMP4 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
@@ -90,9 +95,9 @@
#define rWORD1 r8 /* current string doubleword */
#define rWORD2 r9 /* next string doubleword */
#define rMASK r9 /* mask for first string doubleword */
-#define rTMP2 r10
-#define rTMP3 r11
-#define rTMP4 r12
+#define rTMP1 r10
+#define rTMP2 r11
+#define rTMP3 r12
/* Note: The Bounded pointer support in this code is broken. This code
was inherited from PPC32 and that support was never completed.
@@ -109,30 +114,36 @@
addi r7F7F, r7F7F, 0x7f7f
li rMASK, -1
insrdi r7F7F, r7F7F, 32, 0
-/* That's the setup done, now do the first pair of doublewords.
- We make an exception and use method (2) on the first two doublewords,
- to reduce overhead. */
- srd rMASK, rMASK, rPADN
+/* We use method (2) on the first two doublewords, because rFEFE isn't
+ required which reduces setup overhead. Also gives a faster return
+ for small strings on big-endian due to needing to recalculate with
+ method (2) anyway. */
+#ifdef __LITTLE_ENDIAN__
+ sld rMASK, rMASK, rPADN
+#else
+ srd rMASK, rMASK, rPADN
+#endif
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
lis rFEFE, -0x101
add rTMP1, rTMP1, r7F7F
addi rFEFE, rFEFE, -0x101
- nor rTMP1, rTMP2, rTMP1
- and. rWORD1, rTMP1, rMASK
+ nor rTMP3, rTMP2, rTMP1
+ and. rTMP3, rTMP3, rMASK
mtcrf 0x01, rRTN
bne L(done0)
- sldi rTMP1, rFEFE, 32
- add rFEFE, rFEFE, rTMP1
+ sldi rTMP1, rFEFE, 32
+ add rFEFE, rFEFE, rTMP1
/* Are we now aligned to a doubleword boundary? */
bt 28, L(loop)
/* Handle second doubleword of pair. */
+/* Perhaps use method (1) here for little-endian, saving one instruction? */
ldu rWORD1, 8(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor. rWORD1, rTMP2, rTMP1
+ nor. rTMP3, rTMP2, rTMP1
bne L(done0)
/* The loop. */
@@ -146,29 +157,53 @@
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
- and. rTMP1, rTMP3, rTMP4
+ and. rTMP3, rTMP3, rTMP4
beq L(loop)
+#ifndef __LITTLE_ENDIAN__
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP4, rTMP1
+ andc rTMP3, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 8
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP2, rTMP1
+ andc rTMP3, rTMP2, rTMP1
/* When we get to here, rSTR points to the first doubleword in the string that
- contains a zero byte, and the most significant set bit in rWORD1 is in that
- byte. */
+ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00
+ otherwise. */
L(done0):
- cntlzd rTMP3, rWORD1
+ cntlzd rTMP3, rTMP3
subf rTMP1, rRTN, rSTR
srdi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr
+#else
+
+L(done0):
+ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */
+ andc rTMP1, rTMP1, rTMP3
+ cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */
+ subf rTMP3, rRTN, rSTR
+ subfic rTMP1, rTMP1, 64-7
+ srdi rTMP1, rTMP1, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+
+L(done1):
+ addi rTMP3, rTMP1, -1
+ andc rTMP3, rTMP3, rTMP1
+ cntlzd rTMP3, rTMP3
+ subf rTMP1, rRTN, rSTR
+ subfic rTMP3, rTMP3, 64-7-64
+ sradi rTMP3, rTMP3, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+#endif
+
END (BP_SYM (strlen))
libc_hidden_builtin_def (strlen)

View File

@ -0,0 +1,379 @@
# commit 33ee81de05e83ce12f32a491270bb4c1611399c7
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:40:48 2013 +0930
#
# PowerPC LE strnlen
# http://sourceware.org/ml/libc-alpha/2013-08/msg00098.html
#
# The existing strnlen code has a number of defects, so this patch is more
# than just adding little-endian support. The changes here are similar to
# those for memchr.
#
# * sysdeps/powerpc/powerpc64/power7/strnlen.S (strnlen): Add
# little-endian support. Remove unnecessary "are we done" tests.
# Handle "s" wrapping around zero and extremely large "size".
# Correct main loop count. Handle single left-over word from main
# loop inline rather than by using small_loop. Correct comments.
# Delete "zero" tail, use "end_max" instead.
# * sysdeps/powerpc/powerpc32/power7/strnlen.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S 2014-05-28 12:40:17.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strnlen.S 2014-05-28 12:44:52.000000000 -0500
@@ -30,51 +30,47 @@
add r7,r3,r4 /* Calculate the last acceptable address. */
cmplwi r4,16
li r0,0 /* Word with null chars. */
+ addi r7,r7,-1
ble L(small_range)
- cmplw cr7,r3,r7 /* Is the address equal or less than r3? If
- it's equal or less, it means size is either 0
- or a negative number. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ slw r10,r10,r6
+#else
slw r10,r10,r6
srw r10,r10,r6
+#endif
cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(end_max)
-
+ clrrwi r7,r7,2 /* Address of last word. */
mtcrf 0x01,r8
/* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
bt 29,L(loop_setup)
- /* Handle DWORD2 of pair. */
+ /* Handle WORD2 of pair. */
lwzu r12,4(r8)
cmpb r10,r12,r0
cmplwi cr7,r10,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(end_max)
-
L(loop_setup):
- sub r5,r7,r9
+ /* The last word we want to read in the loop below is the one
+ containing the last byte of the string, ie. the word at
+ (s + size - 1) & ~3, or r7. The first word read is at
+ r8 + 4, we read 2 * cnt words, so the last word read will
+ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives
+ cnt = (r7 - r8) / 8 */
+ sub r5,r7,r8
srwi r6,r5,3 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for the null byte backwards in the string. Since
+
+ /* Main loop to look for the null byte in the string. Since
it's a small loop (< 8 instructions), align it to 32-bytes. */
.p2align 5
L(loop):
@@ -90,15 +86,18 @@
cmplwi cr7,r5,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for null in the whole range. Just return
- the original size. */
- addi r9,r8,4
- cmplw cr6,r9,r7
- blt cr6,L(loop_small)
+
+ /* We may have one more word to read. */
+ cmplw cr6,r8,r7
+ beq cr6,L(end_max)
+
+ lwzu r12,4(r8)
+ cmpb r10,r12,r0
+ cmplwi cr6,r10,0
+ bne cr6,L(done)
L(end_max):
- sub r3,r7,r3
+ mr r3,r4
blr
/* OK, one (or both) of the words contains a null byte. Check
@@ -123,49 +122,56 @@
We need to make sure the null char is *before* the end of the
range. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
- add r9,r8,r0
- sub r6,r9,r3 /* Length until the match. */
- cmplw r9,r7
- bgt L(end_max)
- mr r3,r6
- blr
-
- .align 4
-L(zero):
- li r3,0
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r10,-1
+ andc r0,r0,r10
+ popcntw r0,r0
+#else
+ cntlzw r0,r10 /* Count leading zeros before the match. */
+#endif
+ sub r3,r8,r3
+ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r3,r0 /* Length until the match. */
+ cmplw r3,r4
+ blelr
+ mr r3,r4
blr
-/* Deals with size <= 32. */
+/* Deals with size <= 16. */
.align 4
L(small_range):
cmplwi r4,0
- beq L(zero)
+ beq L(end_max)
+
+ clrrwi r7,r7,2 /* Address of last word. */
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
cmpb r10,r12,r0 /* Check for null bytes in WORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ slw r10,r10,r6
+#else
slw r10,r10,r6
srw r10,r10,r6
+#endif
cmplwi cr7,r10,0
bne cr7,L(done)
- addi r9,r8,4
- cmplw r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmplw r8,r7
+ beq L(end_max)
.p2align 5
L(loop_small):
lwzu r12,4(r8)
cmpb r10,r12,r0
- addi r9,r8,4
cmplwi cr6,r10,0
bne cr6,L(done)
- cmplw r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmplw r8,r7
+ bne L(loop_small)
+ mr r3,r4
+ blr
+
END (BP_SYM (__strnlen))
weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen))
libc_hidden_builtin_def (strnlen)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S 2014-05-28 12:40:17.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strnlen.S 2014-05-28 13:24:41.000000000 -0500
@@ -26,33 +26,29 @@
ENTRY (BP_SYM (__strnlen))
CALL_MCOUNT 2
dcbt 0,r3
- clrrdi r8,r3,3
+ clrrdi r8,r3,3
add r7,r3,r4 /* Calculate the last acceptable address. */
cmpldi r4,32
li r0,0 /* Doubleword with null chars. */
+ addi r7,r7,-1
+
/* If we have less than 33 bytes to search, skip to a faster code. */
ble L(small_range)
- cmpld cr7,r3,r7 /* Is the address equal or less than r3? If
- it's equal or less, it means size is either 0
- or a negative number. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
rlwinm r6,r3,3,26,28 /* Calculate padding. */
ld r12,0(r8) /* Load doubleword from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ sld r10,r10,r6
+#else
sld r10,r10,r6
srd r10,r10,r6
+#endif
cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(end_max)
-
+ clrrdi r7,r7,3 /* Address of last doubleword. */
mtcrf 0x01,r8
/* Are we now aligned to a quadword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
@@ -65,17 +61,18 @@
cmpldi cr7,r10,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(end_max)
-
L(loop_setup):
- sub r5,r7,r9
+ /* The last dword we want to read in the loop below is the one
+ containing the last byte of the string, ie. the dword at
+ (s + size - 1) & ~7, or r7. The first dword read is at
+ r8 + 8, we read 2 * cnt dwords, so the last dword read will
+ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
+ cnt = (r7 - r8) / 16 */
+ sub r5,r7,r8
srdi r6,r5,4 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for the null byte backwards in the string. Since
+
+ /* Main loop to look for the null byte in the string. Since
it's a small loop (< 8 instructions), align it to 32-bytes. */
.p2align 5
L(loop):
@@ -91,15 +88,18 @@
cmpldi cr7,r5,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for null in the whole range. Just return
- the original size. */
- addi r9,r8,8
- cmpld cr6,r9,r7
- blt cr6,L(loop_small)
+
+ /* We may have one more dword to read. */
+ cmpld cr6,r8,r7
+ beq cr6,L(end_max)
+
+ ldu r12,8(r8)
+ cmpb r10,r12,r0
+ cmpldi cr6,r10,0
+ bne cr6,L(done)
L(end_max):
- sub r3,r7,r3
+ mr r3,r4
blr
/* OK, one (or both) of the doublewords contains a null byte. Check
@@ -121,52 +121,59 @@
/* r10 has the output of the cmpb instruction, that is, it contains
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the length.
- We need to make sure the null char is *before* the start of the
- range (since we're going backwards). */
+ We need to make sure the null char is *before* the end of the
+ range. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
- add r9,r8,r0
- sub r6,r9,r3 /* Length until the match. */
- cmpld r9,r7
- bgt L(end_max)
- mr r3,r6
- blr
-
- .align 4
-L(zero):
- li r3,0
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r10,-1
+ andc r0,r0,r10
+ popcntd r0,r0
+#else
+ cntlzd r0,r10 /* Count leading zeros before the match. */
+#endif
+ sub r3,r8,r3
+ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r3,r0 /* Length until the match. */
+ cmpld r3,r4
+ blelr
+ mr r3,r4
blr
/* Deals with size <= 32. */
.align 4
L(small_range):
cmpldi r4,0
- beq L(zero)
+ beq L(end_max)
+
+ clrrdi r7,r7,3 /* Address of last doubleword. */
rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load word from memory. */
+ ld r12,0(r8) /* Load doubleword from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ sld r10,r10,r6
+#else
sld r10,r10,r6
srd r10,r10,r6
+#endif
cmpldi cr7,r10,0
bne cr7,L(done)
- addi r9,r8,8
- cmpld r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmpld r8,r7
+ beq L(end_max)
.p2align 5
L(loop_small):
ldu r12,8(r8)
cmpb r10,r12,r0
- addi r9,r8,8
cmpldi cr6,r10,0
bne cr6,L(done)
- cmpld r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmpld r8,r7
+ bne L(loop_small)
+ mr r3,r4
+ blr
+
END (BP_SYM (__strnlen))
weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen))
libc_hidden_builtin_def (strnlen)

View File

@ -0,0 +1,861 @@
# commit 8a7413f9b036da83ffde491a37d9d2340bc321a7
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:41:17 2013 +0930
#
# PowerPC LE strcmp and strncmp
# http://sourceware.org/ml/libc-alpha/2013-08/msg00099.html
#
# More little-endian support. I leave the main strcmp loops unchanged,
# (well, except for renumbering rTMP to something other than r0 since
# it's needed in an addi insn) and modify the tail for little-endian.
#
# I noticed some of the big-endian tail code was a little untidy so have
# cleaned that up too.
#
# * sysdeps/powerpc/powerpc64/strcmp.S (rTMP2): Define as r0.
# (rTMP): Define as r11.
# (strcmp): Add little-endian support. Optimise tail.
# * sysdeps/powerpc/powerpc32/strcmp.S: Similarly.
# * sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/strncmp.S: Likewise.
# * sysdeps/powerpc/powerpc64/power4/strncmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/power4/strncmp.S: Likewise.
# * sysdeps/powerpc/powerpc64/power7/strncmp.S: Likewise.
# * sysdeps/powerpc/powerpc32/power7/strncmp.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/strncmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -26,7 +26,7 @@
EALIGN (BP_SYM(strncmp), 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -42,6 +42,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -80,12 +81,45 @@
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzw rBITDIF, rBITDIF
@@ -93,28 +127,20 @@
addi rNEG, rNEG, 7
cmpw cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+ cr1
L(equal):
li rRTN, 0
blr
L(different):
- lwzu rWORD1, -4(rSTR1)
+ lwz rWORD1, -4(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+
L(highbit):
- srwi rWORD2, rWORD2, 24
- srwi rWORD1, rWORD1, 24
- sub rRTN, rWORD1, rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strncmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -28,7 +28,7 @@
EALIGN (BP_SYM(strncmp),5,0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -44,6 +44,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
nop
@@ -83,13 +84,45 @@
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr
+ ori rRTN, rTMP2, 1
+ blr
+#else
L(endstring):
and rTMP,r7F7F,rWORD1
beq cr1,L(equal)
add rTMP,rTMP,r7F7F
xor. rBITDIF,rWORD1,rWORD2
-
andc rNEG,rNEG,rTMP
blt L(highbit)
cntlzw rBITDIF,rBITDIF
@@ -97,28 +130,20 @@
addi rNEG,rNEG,7
cmpw cr1,rNEG,rBITDIF
sub rRTN,rWORD1,rWORD2
- blt cr1,L(equal)
- srawi rRTN,rRTN,31
- ori rRTN,rRTN,1
- blr
+ bgelr cr1
L(equal):
li rRTN,0
blr
L(different):
- lwzu rWORD1,-4(rSTR1)
+ lwz rWORD1,-4(rSTR1)
xor. rBITDIF,rWORD1,rWORD2
sub rRTN,rWORD1,rWORD2
- blt L(highbit)
- srawi rRTN,rRTN,31
- ori rRTN,rRTN,1
- blr
+ bgelr
L(highbit):
- srwi rWORD2,rWORD2,24
- srwi rWORD1,rWORD1,24
- sub rRTN,rWORD1,rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -26,7 +26,7 @@
EALIGN (BP_SYM (strcmp), 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -40,6 +40,7 @@
#define r7F7F r8 /* constant 0x7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
+#define rTMP r11
CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1)
CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2)
@@ -64,10 +65,45 @@
and. rTMP, rTMP, rNEG
cmpw cr1, rWORD1, rWORD2
beq+ L(g0)
-L(endstring):
+
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ rlwimi rTMP2, rTMP2, 1, 0, 30
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
+L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
@@ -94,7 +130,7 @@
ori rRTN, rWORD2, 1
/* GKM FIXME: check high bounds. */
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strncmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -26,7 +26,7 @@
EALIGN (BP_SYM(strncmp), 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -40,6 +40,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -78,12 +79,45 @@
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzw rBITDIF, rBITDIF
@@ -91,28 +125,20 @@
addi rNEG, rNEG, 7
cmpw cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+ cr1
L(equal):
li rRTN, 0
blr
L(different):
- lwzu rWORD1, -4(rSTR1)
+ lwz rWORD1, -4(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+
L(highbit):
- srwi rWORD2, rWORD2, 24
- srwi rWORD1, rWORD1, 24
- sub rRTN, rWORD1, rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/strncmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -27,7 +27,7 @@
EALIGN (BP_SYM(strncmp), 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -43,6 +43,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -84,12 +85,59 @@
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -98,7 +146,7 @@
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -106,7 +154,7 @@
blr
L(different):
- ldu rWORD1, -8(rSTR1)
+ ld rWORD1, -8(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
blt- L(highbit)
@@ -114,11 +162,10 @@
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strncmp.S 2014-05-28 13:27:02.000000000 -0500
@@ -29,7 +29,7 @@
EALIGN (BP_SYM(strncmp),5,0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -45,6 +45,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
nop
@@ -88,12 +89,57 @@
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
+ addi rNEG, rBITDIF, 1
+ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
+ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
+ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
+ andc rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
+ addi rNEG, rBITDIF, 1
+ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
+ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
+ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
+ andc rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP,r7F7F,rWORD1
beq cr1,L(equal)
add rTMP,rTMP,r7F7F
xor. rBITDIF,rWORD1,rWORD2
-
andc rNEG,rNEG,rTMP
blt L(highbit)
cntlzd rBITDIF,rBITDIF
@@ -102,7 +148,7 @@
cmpd cr1,rNEG,rBITDIF
sub rRTN,rWORD1,rWORD2
blt cr1,L(equal)
- sradi rRTN,rRTN,63
+ sradi rRTN,rRTN,63 /* must return an int. */
ori rRTN,rRTN,1
blr
L(equal):
@@ -110,7 +156,7 @@
blr
L(different):
- ldu rWORD1,-8(rSTR1)
+ ld rWORD1,-8(rSTR1)
xor. rBITDIF,rWORD1,rWORD2
sub rRTN,rWORD1,rWORD2
blt L(highbit)
@@ -118,11 +164,10 @@
ori rRTN,rRTN,1
blr
L(highbit):
- srdi rWORD2,rWORD2,56
- srdi rWORD1,rWORD1,56
- sub rRTN,rWORD1,rWORD2
+ sradi rRTN,rWORD2,63
+ ori rRTN,rRTN,1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcmp.S 2014-05-28 13:37:15.000000000 -0500
@@ -27,7 +27,7 @@
EALIGN (BP_SYM(strcmp), 4, 0)
CALL_MCOUNT 2
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -46,6 +46,7 @@
#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
+#define rTMP r11
CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1)
CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2)
@@ -72,19 +73,66 @@
ldu rWORD2, 8(rSTR2)
L(g1): add rTMP, rFEFE, rWORD1
nor rNEG, r7F7F, rWORD1
-
and. rTMP, rTMP, rNEG
cmpd cr1, rWORD1, rWORD2
beq+ L(g0)
-L(endstring):
+
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
+L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -93,7 +141,7 @@
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -110,12 +158,11 @@
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
/* GKM FIXME: check high bounds. */
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S 2014-05-28 13:26:59.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strncmp.S 2014-05-28 13:38:31.000000000 -0500
@@ -27,7 +27,7 @@
EALIGN (BP_SYM(strncmp), 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -41,6 +41,7 @@
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -81,13 +82,60 @@
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
-
+
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -96,7 +144,7 @@
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -104,7 +152,7 @@
blr
L(different):
- ldu rWORD1, -8(rSTR1)
+ ld rWORD1, -8(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
blt- L(highbit)
@@ -112,11 +160,10 @@
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4

View File

@ -0,0 +1,167 @@
# commit 43b84013714c46e6dcae4a5564c5527777ad5e08
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:45:31 2013 +0930
#
# PowerPC LE strcpy
# http://sourceware.org/ml/libc-alpha/2013-08/msg00100.html
#
# The strcpy changes for little-endian are quite straight-forward, just
# a matter of rotating the last word differently.
#
# I'll note that the powerpc64 version of stpcpy is just begging to be
# converted to use 64-bit loads and stores..
#
# * sysdeps/powerpc/powerpc64/strcpy.S: Add little-endian support:
# * sysdeps/powerpc/powerpc32/strcpy.S: Likewise.
# * sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
# * sysdeps/powerpc/powerpc32/stpcpy.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S 2014-05-28 13:40:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/stpcpy.S 2014-05-28 13:40:01.000000000 -0500
@@ -74,7 +74,22 @@
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stbu rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stbu rTMP, 1(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stbu rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -87,6 +102,7 @@
CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
STORE_RETURN_VALUE (rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S 2014-05-28 13:40:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strcpy.S 2014-05-28 13:40:01.000000000 -0500
@@ -78,7 +78,22 @@
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stb rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stb rTMP, 5(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stb rTMP, 6(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stb rTMP, 7(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stb rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -90,6 +105,7 @@
stb rALT, 7(rDEST)
/* GKM FIXME: check high bound. */
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S 2014-05-28 13:40:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/stpcpy.S 2014-05-28 13:40:01.000000000 -0500
@@ -75,7 +75,22 @@
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stbu rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stbu rTMP, 1(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stbu rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -88,6 +103,7 @@
CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt)
STORE_RETURN_VALUE (rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S 2014-05-28 13:40:01.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strcpy.S 2014-05-28 13:40:01.000000000 -0500
@@ -90,6 +90,32 @@
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
L(g1):
+#ifdef __LITTLE_ENDIAN__
+ extrdi. rTMP, rALT, 8, 56
+ stb rALT, 8(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 48
+ stb rTMP, 9(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 40
+ stb rTMP, 10(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 32
+ stb rTMP, 11(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 24
+ stb rTMP, 12(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 16
+ stb rTMP, 13(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 8
+ stb rTMP, 14(rDEST)
+ beqlr-
+ extrdi rTMP, rALT, 8, 0
+ stb rTMP, 15(rDEST)
+ blr
+#else
extrdi. rTMP, rALT, 8, 0
stb rTMP, 8(rDEST)
beqlr-
@@ -114,6 +140,7 @@
stb rALT, 15(rDEST)
/* GKM FIXME: check high bound. */
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4

View File

@ -0,0 +1,642 @@
# commit 664318c3eb07032e2bfcf47cb2aa3c89280c19e7
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:46:05 2013 +0930
#
# PowerPC LE strchr
# http://sourceware.org/ml/libc-alpha/2013-08/msg00101.html
#
# Adds little-endian support to optimised strchr assembly. I've also
# tweaked the big-endian code a little. In power7/strchr.S there's a
# check in the tail of the function that we didn't match 0 before
# finding a c match, done by comparing leading zero counts. It's just
# as valid, and quicker, to compare the raw output from cmpb.
#
# Another little tweak is to use rldimi/insrdi in place of rlwimi for
# the power7 strchr functions. Since rlwimi is cracked, it is a few
# cycles slower. rldimi can be used on the 32-bit power7 functions
# too.
#
# * sysdeps/powerpc/powerpc64/power7/strchr.S (strchr): Add little-endian
# support. Correct typos, formatting. Optimize tail. Use insrdi
# rather than rlwimi.
# * sysdeps/powerpc/powerpc32/power7/strchr.S: Likewise.
# * sysdeps/powerpc/powerpc64/power7/strchrnul.S (__strchrnul): Add
# little-endian support. Correct typos.
# * sysdeps/powerpc/powerpc32/power7/strchrnul.S: Likewise. Use insrdi
# rather than rlwimi.
# * sysdeps/powerpc/powerpc64/strchr.S (rTMP4, rTMP5): Define. Use
# in loop and entry code to keep "and." results.
# (strchr): Add little-endian support. Comment. Move cntlzd
# earlier in tail.
# * sysdeps/powerpc/powerpc32/strchr.S: Likewise.
#
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchr.S
@@ -37,8 +37,8 @@ ENTRY (BP_SYM(strchr))
beq cr7,L(null_match)
/* Replicate byte to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
/* Now r4 has a word of c bytes and r0 has
a word of null bytes. */
@@ -48,11 +48,17 @@ ENTRY (BP_SYM(strchr))
/* Move the words left and right to discard the bits that are
not part of the string and to bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ srw r11,r11,r6
+ slw r10,r10,r6
+ slw r11,r11,r6
+#else
slw r10,r10,r6
slw r11,r11,r6
srw r10,r10,r6
srw r11,r11,r6
+#endif
or r5,r10,r11 /* OR the results to speed things up. */
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -67,7 +73,7 @@ ENTRY (BP_SYM(strchr))
/* Handle WORD2 of pair. */
lwzu r12,4(r8)
- cmpb r10,r12,r4
+ cmpb r10,r12,r4
cmpb r11,r12,r0
or r5,r10,r11
cmpwi cr7,r5,0
@@ -102,22 +108,31 @@ L(loop):
bne cr6,L(done)
/* The c/null byte must be in the second word. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- pointer. */
+ again and move the result of cmpb to r10/r11 so we can calculate
+ the pointer. */
mr r10,r6
mr r11,r7
addi r8,r8,4
- /* r5 has the output of the cmpb instruction, that is, it contains
+ /* r10/r11 have the output of the cmpb instructions, that is,
0xff in the same position as the c/null byte in the original
word from the string. Use that to calculate the pointer. */
L(done):
- cntlzw r4,r10 /* Count leading zeroes before c matches. */
- cntlzw r0,r11 /* Count leading zeroes before null matches. */
- cmplw cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+ addi r3,r10,-1
+ andc r3,r3,r10
+ popcntw r0,r3
+ addi r4,r11,-1
+ andc r4,r4,r11
+ cmplw cr7,r3,r4
+ bgt cr7,L(no_match)
+#else
+ cntlzw r0,r10 /* Count leading zeros before c matches. */
+ cmplw cr7,r11,r10
bgt cr7,L(no_match)
- srwi r0,r4,3 /* Convert leading zeroes to bytes. */
+#endif
+ srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching c byte
or null in case c was not found. */
blr
@@ -135,10 +150,14 @@ L(null_match):
cmpb r5,r12,r0 /* Compare each byte against null bytes. */
/* Move the words left and right to discard the bits that are
- not part of the string and to bring them back as zeros. */
-
+ not part of the string and bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srw r5,r5,r6
+ slw r5,r5,r6
+#else
slw r5,r5,r6
srw r5,r5,r6
+#endif
cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes
have been found. */
bne cr7,L(done_null)
@@ -193,7 +212,13 @@ L(loop_null):
0xff in the same position as the null byte in the original
word from the string. Use that to calculate the pointer. */
L(done_null):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntw r0,r0
+#else
cntlzw r0,r5 /* Count leading zeros before the match. */
+#endif
srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching null byte. */
blr
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/strchrnul.S
@@ -29,8 +29,8 @@ ENTRY (BP_SYM(__strchrnul))
clrrwi r8,r3,2 /* Align the address to word boundary. */
/* Replicate byte to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
@@ -45,10 +45,17 @@ ENTRY (BP_SYM(__strchrnul))
/* Move the words left and right to discard the bits that are
not part of the string and bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ srw r9,r9,r6
+ slw r10,r10,r6
+ slw r9,r9,r6
+#else
slw r10,r10,r6
slw r9,r9,r6
srw r10,r10,r6
srw r9,r9,r6
+#endif
or r5,r9,r10 /* OR the results to speed things up. */
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -56,7 +63,7 @@ ENTRY (BP_SYM(__strchrnul))
mtcrf 0x01,r8
- /* Are we now aligned to a quadword boundary? If so, skip to
+ /* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
bt 29,L(loop)
@@ -78,7 +85,7 @@ L(loop):
single register for speed. This is an attempt
to speed up the null-checking process for bigger strings. */
lwz r12,4(r8)
- lwzu r11,8(r8)
+ lwzu r11,8(r8)
cmpb r10,r12,r0
cmpb r9,r12,r4
cmpb r6,r11,r0
@@ -97,9 +104,9 @@ L(loop):
addi r8,r8,-4
bne cr6,L(done)
- /* The c/null byte must be in the second word. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
- the pointer. */
+ /* The c/null byte must be in the second word. Adjust the address
+ again and move the result of cmpb to r5 so we can calculate the
+ pointer. */
mr r5,r10
addi r8,r8,4
@@ -107,7 +114,13 @@ L(loop):
0xff in the same position as the c/null byte in the original
word from the string. Use that to calculate the pointer. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntw r0,r0
+#else
cntlzw r0,r5 /* Count leading zeros before the match. */
+#endif
srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of matching c/null byte. */
blr
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/strchr.S
@@ -44,6 +44,8 @@ ENTRY (BP_SYM (strchr))
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
#define rTMP3 r12
+#define rTMP4 rIGN
+#define rTMP5 rMASK
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
@@ -59,53 +61,74 @@ ENTRY (BP_SYM (strchr))
addi r7F7F, r7F7F, 0x7f7f
/* Test the first (partial?) word. */
lwz rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+ slw rMASK, rMASK, rIGN
+#else
srw rMASK, rMASK, rIGN
+#endif
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2
xor rTMP3, rCHR, rWORD
orc rTMP3, rTMP3, rMASK
b L(loopentry)
/* The loop. */
-L(loop):lwzu rWORD, 4(rSTR)
- and. rTMP1, rTMP1, rTMP2
+L(loop):
+ lwzu rWORD, 4(rSTR)
+ and. rTMP5, rTMP1, rTMP2
/* Test for 0. */
- add rTMP1, rFEFE, rWORD
- nor rTMP2, r7F7F, rWORD
+ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
+ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
bne L(foundit)
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
/* Start test for the bytes we're looking for. */
xor rTMP3, rCHR, rWORD
L(loopentry):
add rTMP1, rFEFE, rTMP3
nor rTMP2, r7F7F, rTMP3
beq L(loop)
+
/* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a
- zero byte, in which case we return a match. We guess that this hasn't
- happened, though. */
-L(missed):
- and. rTMP1, rTMP1, rTMP2
+ zero byte, in which case we return a match. */
+ and. rTMP5, rTMP1, rTMP2
li rRTN, 0
STORE_RETURN_VALUE (rSTR)
beqlr
-/* It did happen. Decide which one was first...
- I'm not sure if this is actually faster than a sequence of
- rotates, compares, and branches (we use it anyway because it's shorter). */
+/* At this point:
+ rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+ But there may be false matches in the next most significant byte from
+ a true match due to carries. This means we need to recalculate the
+ matches using a longer method for big-endian. */
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzw rCLZB, rTMP1
+ addi rTMP2, rTMP4, -1
+ andc rTMP2, rTMP2, rTMP4
+ cmplw rTMP1, rTMP2
+ bgtlr
+ subfic rCLZB, rCLZB, 32-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+ results from the loop for reuse here. See strlen.S tail. Similarly
+ one instruction could be pruned from L(foundit). */
and rFEFE, r7F7F, rWORD
- or rMASK, r7F7F, rWORD
+ or rTMP5, r7F7F, rWORD
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rFEFE, rFEFE, r7F7F
add rTMP1, rTMP1, r7F7F
- nor rWORD, rMASK, rFEFE
- nor rTMP2, rIGN, rTMP1
+ nor rWORD, rTMP5, rFEFE
+ nor rTMP2, rTMP4, rTMP1
+ cntlzw rCLZB, rTMP2
cmplw rWORD, rTMP2
bgtlr
- cntlzw rCLZB, rTMP2
+#endif
srwi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
@@ -113,13 +136,21 @@ L(missed):
blr
L(foundit):
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzw rCLZB, rTMP1
+ subfic rCLZB, rCLZB, 32-7-32
+ srawi rCLZB, rCLZB, 3
+#else
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
- nor rTMP2, rIGN, rTMP1
+ nor rTMP2, rTMP4, rTMP1
cntlzw rCLZB, rTMP2
subi rSTR, rSTR, 4
srwi rCLZB, rCLZB, 3
+#endif
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, twlge)
STORE_RETURN_VALUE (rSTR)
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -37,8 +37,8 @@ ENTRY (BP_SYM(strchr))
beq cr7,L(null_match)
/* Replicate byte to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
/* Now r4 has a doubleword of c bytes and r0 has
@@ -49,11 +49,17 @@ ENTRY (BP_SYM(strchr))
/* Move the doublewords left and right to discard the bits that are
not part of the string and bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ srd r11,r11,r6
+ sld r10,r10,r6
+ sld r11,r11,r6
+#else
sld r10,r10,r6
sld r11,r11,r6
srd r10,r10,r6
srd r11,r11,r6
+#endif
or r5,r10,r11 /* OR the results to speed things up. */
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -110,15 +116,24 @@ L(loop):
mr r11,r7
addi r8,r8,8
- /* r5 has the output of the cmpb instruction, that is, it contains
+ /* r10/r11 have the output of the cmpb instructions, that is,
0xff in the same position as the c/null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done):
- cntlzd r4,r10 /* Count leading zeroes before c matches. */
- cntlzd r0,r11 /* Count leading zeroes before null matches. */
- cmpld cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+ addi r3,r10,-1
+ andc r3,r3,r10
+ popcntd r0,r3
+ addi r4,r11,-1
+ andc r4,r4,r11
+ cmpld cr7,r3,r4
bgt cr7,L(no_match)
- srdi r0,r4,3 /* Convert leading zeroes to bytes. */
+#else
+ cntlzd r0,r10 /* Count leading zeros before c matches. */
+ cmpld cr7,r11,r10
+ bgt cr7,L(no_match)
+#endif
+ srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching c byte
or null in case c was not found. */
blr
@@ -137,9 +152,13 @@ L(null_match):
/* Move the doublewords left and right to discard the bits that are
not part of the string and bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srd r5,r5,r6
+ sld r5,r5,r6
+#else
sld r5,r5,r6
srd r5,r5,r6
+#endif
cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes
have been found. */
bne cr7,L(done_null)
@@ -194,7 +213,13 @@ L(loop_null):
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done_null):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntd r0,r0
+#else
cntlzd r0,r5 /* Count leading zeros before the match. */
+#endif
srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching null byte. */
blr
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -29,8 +29,8 @@ ENTRY (BP_SYM(__strchrnul))
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
/* Replicate byte to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
rlwinm r6,r3,3,26,28 /* Calculate padding. */
@@ -46,10 +46,17 @@ ENTRY (BP_SYM(__strchrnul))
/* Move the doublewords left and right to discard the bits that are
not part of the string and to bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ srd r9,r9,r6
+ sld r10,r10,r6
+ sld r9,r9,r6
+#else
sld r10,r10,r6
sld r9,r9,r6
srd r10,r10,r6
srd r9,r9,r6
+#endif
or r5,r9,r10 /* OR the results to speed things up. */
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -99,7 +106,7 @@ L(loop):
bne cr6,L(done)
/* The c/null byte must be in the second doubleword. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
+ address again and move the result of cmpb to r5 so we can calculate
the pointer. */
mr r5,r10
addi r8,r8,8
@@ -108,7 +115,13 @@ L(loop):
0xff in the same position as the c/null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntd r0,r0
+#else
cntlzd r0,r5 /* Count leading zeros before the match. */
+#endif
srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of matching c/null byte. */
blr
Index: glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S
===================================================================
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S.orig
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/strchr.S
@@ -50,14 +50,16 @@ ENTRY (BP_SYM (strchr))
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
#define rTMP3 r12
+#define rTMP4 rIGN
+#define rTMP5 rMASK
CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2)
STORE_RETURN_BOUNDS (rTMP1, rTMP2)
dcbt 0,rRTN
- rlwimi rCHR, rCHR, 8, 16, 23
+ insrdi rCHR, rCHR, 8, 48
li rMASK, -1
- rlwimi rCHR, rCHR, 16, 0, 15
+ insrdi rCHR, rCHR, 16, 32
rlwinm rIGN, rRTN, 3, 26, 28
insrdi rCHR, rCHR, 32, 0
lis rFEFE, -0x101
@@ -70,53 +72,74 @@ ENTRY (BP_SYM (strchr))
add rFEFE, rFEFE, rTMP1
/* Test the first (partial?) word. */
ld rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+ sld rMASK, rMASK, rIGN
+#else
srd rMASK, rMASK, rIGN
+#endif
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2
xor rTMP3, rCHR, rWORD
orc rTMP3, rTMP3, rMASK
b L(loopentry)
/* The loop. */
-L(loop):ldu rWORD, 8(rSTR)
- and. rTMP1, rTMP1, rTMP2
+L(loop):
+ ldu rWORD, 8(rSTR)
+ and. rTMP5, rTMP1, rTMP2
/* Test for 0. */
- add rTMP1, rFEFE, rWORD
- nor rTMP2, r7F7F, rWORD
+ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
+ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
bne L(foundit)
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
/* Start test for the bytes we're looking for. */
xor rTMP3, rCHR, rWORD
L(loopentry):
add rTMP1, rFEFE, rTMP3
nor rTMP2, r7F7F, rTMP3
beq L(loop)
+
/* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a
- zero byte, in which case we return a match. We guess that this hasn't
- happened, though. */
-L(missed):
- and. rTMP1, rTMP1, rTMP2
+ zero byte, in which case we return a match. */
+ and. rTMP5, rTMP1, rTMP2
li rRTN, 0
STORE_RETURN_VALUE (rSTR)
beqlr
-/* It did happen. Decide which one was first...
- I'm not sure if this is actually faster than a sequence of
- rotates, compares, and branches (we use it anyway because it's shorter). */
+/* At this point:
+ rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+ But there may be false matches in the next most significant byte from
+ a true match due to carries. This means we need to recalculate the
+ matches using a longer method for big-endian. */
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ addi rTMP2, rTMP4, -1
+ andc rTMP2, rTMP2, rTMP4
+ cmpld rTMP1, rTMP2
+ bgtlr
+ subfic rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+ results from the loop for reuse here. See strlen.S tail. Similarly
+ one instruction could be pruned from L(foundit). */
and rFEFE, r7F7F, rWORD
- or rMASK, r7F7F, rWORD
+ or rTMP5, r7F7F, rWORD
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rFEFE, rFEFE, r7F7F
add rTMP1, rTMP1, r7F7F
- nor rWORD, rMASK, rFEFE
- nor rTMP2, rIGN, rTMP1
+ nor rWORD, rTMP5, rFEFE
+ nor rTMP2, rTMP4, rTMP1
+ cntlzd rCLZB, rTMP2
cmpld rWORD, rTMP2
bgtlr
- cntlzd rCLZB, rTMP2
+#endif
srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
@@ -124,13 +147,21 @@ L(missed):
blr
L(foundit):
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ subfic rCLZB, rCLZB, 64-7-64
+ sradi rCLZB, rCLZB, 3
+#else
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
- nor rTMP2, rIGN, rTMP1
+ nor rTMP2, rTMP4, rTMP1
cntlzd rCLZB, rTMP2
subi rSTR, rSTR, 8
srdi rCLZB, rCLZB, 3
+#endif
add rRTN, rSTR, rCLZB
CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge)
STORE_RETURN_VALUE (rSTR)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,272 @@
# commit 3be87c77d24c4456ccca4034363b6d1814cd0c84
# Author: Alan Modra <amodra@gmail.com>
# Date: Sat Aug 17 18:47:59 2013 +0930
#
# PowerPC LE memset
# http://sourceware.org/ml/libc-alpha/2013-08/msg00104.html
#
# One of the things I noticed when looking at power7 timing is that rlwimi
# is cracked and the two resulting insns have a register dependency.
# That makes it a little slower than the equivalent rldimi.
#
# * sysdeps/powerpc/powerpc64/memset.S: Replace rlwimi with
# insrdi. Formatting.
# * sysdeps/powerpc/powerpc64/power4/memset.S: Likewise.
# * sysdeps/powerpc/powerpc64/power6/memset.S: Likewise.
# * sysdeps/powerpc/powerpc64/power7/memset.S: Likewise.
# * sysdeps/powerpc/powerpc32/power4/memset.S: Likewise.
# * sysdeps/powerpc/powerpc32/power6/memset.S: Likewise.
# * sysdeps/powerpc/powerpc32/power7/memset.S: Likewise.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power4/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -52,7 +52,7 @@
/* Align to word boundary. */
cmplwi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 4
@@ -67,7 +67,7 @@
/* Handle the case of size < 31. */
L(aligned):
mtcrf 0x01, rLEN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x1C
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power6/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -50,7 +50,7 @@
ble- cr1, L(small)
/* Align to word boundary. */
cmplwi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 4
@@ -66,7 +66,7 @@
/* Handle the case of size < 31. */
L(aligned):
mtcrf 0x01, rLEN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x1C
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc32/power7/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -37,8 +37,8 @@
cfi_offset(31,-8)
/* Replicate byte to word. */
- rlwimi 4,4,8,16,23
- rlwimi 4,4,16,0,15
+ insrdi 4,4,8,48
+ insrdi 4,4,16,32
ble cr6,L(small) /* If length <= 8, use short copy code. */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -73,14 +73,14 @@
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -102,14 +102,14 @@
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -230,7 +230,7 @@
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power4/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -68,14 +68,14 @@
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -97,14 +97,14 @@
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -164,24 +164,24 @@
L(getCacheAligned):
cmpldi cr1,rLEN,32
andi. rTMP,rMEMP,127
- blt cr1,L(handletail32)
- beq L(cacheAligned)
+ blt cr1,L(handletail32)
+ beq L(cacheAligned)
addi rMEMP,rMEMP,32
addi rLEN,rLEN,-32
- std rCHR,-32(rMEMP)
- std rCHR,-24(rMEMP)
- std rCHR,-16(rMEMP)
- std rCHR,-8(rMEMP)
- b L(getCacheAligned)
+ std rCHR,-32(rMEMP)
+ std rCHR,-24(rMEMP)
+ std rCHR,-16(rMEMP)
+ std rCHR,-8(rMEMP)
+ b L(getCacheAligned)
/* Now we are aligned to the cache line and can use dcbz. */
L(cacheAligned):
cmpld cr1,rLEN,rCLS
- blt cr1,L(handletail32)
+ blt cr1,L(handletail32)
dcbz 0,rMEMP
subf rLEN,rCLS,rLEN
- add rMEMP,rMEMP,rCLS
- b L(cacheAligned)
+ add rMEMP,rMEMP,rCLS
+ b L(cacheAligned)
/* We are here because the cache line size was set and was not 32-bytes
and the remainder (rLEN) is less than the actual cache line size.
@@ -218,7 +218,7 @@
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power6/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -65,14 +65,14 @@
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -94,14 +94,14 @@
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -362,7 +362,7 @@
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S 2014-05-29 13:07:41.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/power7/memset.S 2014-05-29 13:07:46.000000000 -0500
@@ -34,8 +34,8 @@
mr 10,3
/* Replicate byte to word. */
- rlwimi 4,4,8,16,23
- rlwimi 4,4,16,0,15
+ insrdi 4,4,8,48
+ insrdi 4,4,16,32
ble cr6,L(small) /* If length <= 8, use short copy code. */
neg 0,3
@@ -323,7 +323,7 @@
clrldi 0,0,62
beq L(medium_aligned)
- /* Force 4-bytes alignment for SRC. */
+ /* Force 4-bytes alignment for DST. */
mtocrf 0x01,0
subf 5,0,5
1: /* Copy 1 byte. */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,68 @@
# commit 8f9ebb08af1368962d9f24c4cfacb55cf8eee560
# Author: Alan Modra <amodra@gmail.com>
# Date: Thu Oct 3 14:03:03 2013 +0930
#
# PowerPC LE configury
# http://sourceware.org/ml/libc-alpha/2013-08/msg00096.html
#
# This adds the basic configury bits for powerpc64le and powerpcle.
#
# * configure.in: Map powerpc64le and powerpcle to base_machine/machine.
# * configure: Regenerate.
# * nptl/shlib-versions: Powerpc*le starts at 2.18.
# * shlib-versions: Likewise.
#
# commit 0ff8246327401ae8779e2697d5c7348611cdbf8a
# Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com>
# Date: Tue Feb 4 09:49:08 2014 -0200
#
# PowerPC: Change powerpc64le start ABI to 2.17.
#
diff -urN glibc-2.17-c758a686/configure glibc-2.17-c758a686/configure
--- glibc-2.17-c758a686/configure 2014-05-26 19:52:31.000000000 -0500
+++ glibc-2.17-c758a686/configure 2014-05-26 19:54:13.000000000 -0500
@@ -4195,8 +4195,8 @@
# base_machine, we don't change it.
test -n "$base_machine" || case "$machine" in
i[34567]86) base_machine=i386 machine=i386/$machine ;;
-powerpc) base_machine=powerpc machine=powerpc/powerpc32 ;;
-powerpc64) base_machine=powerpc machine=powerpc/powerpc64 ;;
+powerpc64*) base_machine=powerpc machine=powerpc/powerpc64 ;;
+powerpc*) base_machine=powerpc machine=powerpc/powerpc32 ;;
s390) base_machine=s390 machine=s390/s390-32 ;;
s390x) base_machine=s390 machine=s390/s390-64 ;;
sh3*) base_machine=sh machine=sh/sh3 ;;
diff -urN glibc-2.17-c758a686/configure.in glibc-2.17-c758a686/configure.in
--- glibc-2.17-c758a686/configure.in 2014-05-26 19:52:30.000000000 -0500
+++ glibc-2.17-c758a686/configure.in 2014-05-26 19:54:45.000000000 -0500
@@ -549,8 +549,8 @@
# base_machine, we don't change it.
test -n "$base_machine" || case "$machine" in
i[34567]86) base_machine=i386 machine=i386/$machine ;;
-powerpc) base_machine=powerpc machine=powerpc/powerpc32 ;;
-powerpc64) base_machine=powerpc machine=powerpc/powerpc64 ;;
+powerpc64*) base_machine=powerpc machine=powerpc/powerpc64 ;;
+powerpc*) base_machine=powerpc machine=powerpc/powerpc32 ;;
s390) base_machine=s390 machine=s390/s390-32 ;;
s390x) base_machine=s390 machine=s390/s390-64 ;;
sh3*) base_machine=sh machine=sh/sh3 ;;
diff -urN glibc-2.17-c758a686/nptl/shlib-versions glibc-2.17-c758a686/nptl/shlib-versions
--- glibc-2.17-c758a686/nptl/shlib-versions 2014-05-26 19:52:31.000000000 -0500
+++ glibc-2.17-c758a686/nptl/shlib-versions 2014-05-26 19:53:31.000000000 -0500
@@ -2,4 +2,5 @@
sh.*-.*-linux.* libpthread=0 GLIBC_2.2
s390x-.*-linux.* libpthread=0 GLIBC_2.2
powerpc64-.*-linux.* libpthread=0 GLIBC_2.3
+powerpc.*le-.*-linux.* libpthread=0 GLIBC_2.17
.*-.*-linux.* libpthread=0
diff -urN glibc-2.17-c758a686/shlib-versions glibc-2.17-c758a686/shlib-versions
--- glibc-2.17-c758a686/shlib-versions 2014-05-26 19:52:31.000000000 -0500
+++ glibc-2.17-c758a686/shlib-versions 2014-05-26 19:53:31.000000000 -0500
@@ -23,6 +23,7 @@
s390x-.*-linux.* DEFAULT GLIBC_2.2
powerpc64-.*-linux.* DEFAULT GLIBC_2.3
+powerpc.*le-.*-linux.* DEFAULT GLIBC_2.17
.*-.*-gnu-gnu.* DEFAULT GLIBC_2.2.6
# Configuration ABI Identifier for ABI data files

View File

@ -0,0 +1,106 @@
# commit 5162e7dd96efcd9b45c1dc1471a964d45278b1e1
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:41:52 2013 -0600
#
# PowerPC64: Fix incorrect CFI in *context routines
#
# The context established by "makecontext" has a link register pointing
# back to an error path within the makecontext routine. This is currently
# covered by the CFI FDE for makecontext itself, which is simply wrong
# for the stack frame *inside* the context. When trying to unwind (e.g.
# doing a backtrace) in a routine inside a context created by makecontext,
# this can lead to uninitialized stack slots being accessed, causing the
# unwinder to crash in the worst case.
#
# Similarly, during parts of the "setcontext" routine, when the stack
# pointer has already been switched to point to the new context, the
# address range is still covered by the CFI FDE for setcontext. When
# trying to unwind in that situation (e.g. backtrace from an async
# signal handler for profiling), it is again possible that the unwinder
# crashes.
#
# Theses are all problems in existing code, but the changes in stack
# frame layout appear to make the "worst case" much more likely in
# the ELFv2 ABI context. This causes regressions e.g. in the libgo
# testsuite on ELFv2.
#
# This patch fixes this by ending the makecontext/setcontext FDEs
# before those problematic parts of the assembler, similar to what
# is already done on other platforms. This fixes the libgo
# regression on ELFv2.
#
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:16:16.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:16:17.000000000 -0500
@@ -129,6 +129,10 @@
the cpu link stack used to predict blr return addresses. */
bcl 20,31,L(gotexitcodeaddr);
+ /* End FDE now, because while executing on the context's stack
+ the unwind info would be wrong otherwise. */
+ cfi_endproc
+
/* This is the helper code which gets called if a function which
is registered with 'makecontext' returns. In this case we
have to install the context listed in the uc_link element of
@@ -157,6 +161,11 @@
#endif
b L(do_exit)
+ /* Re-establish FDE for the rest of the actual makecontext routine. */
+ cfi_startproc
+ cfi_offset (lr, FRAME_LR_SAVE)
+ cfi_adjust_cfa_offset (128)
+
/* The address of the exit code is in the link register. Store the lr
in the ucontext as LNK so the target function will return to our
exit code. */
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S 2014-05-29 13:16:16.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S 2014-05-29 13:16:17.000000000 -0500
@@ -129,6 +129,10 @@
lfd fp1,(SIGCONTEXT_FP_REGS+(PT_R1*8))(r31)
lfd fp0,(SIGCONTEXT_FP_REGS+(PT_R0*8))(r31)
+ /* End FDE now, because the unwind info would be wrong while
+ we're reloading registers to switch to the new context. */
+ cfi_endproc
+
ld r0,(SIGCONTEXT_GP_REGS+(PT_LNK*8))(r31)
ld r1,(SIGCONTEXT_GP_REGS+(PT_R1*8))(r31)
mtlr r0
@@ -177,6 +181,11 @@
ld r31,(SIGCONTEXT_GP_REGS+(PT_R31*8))(r31)
bctr
+ /* Re-establish FDE for the rest of the actual setcontext routine. */
+ cfi_startproc
+ cfi_offset (lr, FRAME_LR_SAVE)
+ cfi_adjust_cfa_offset (128)
+
L(nv_error_exit):
ld r0,128+FRAME_LR_SAVE(r1)
addi r1,r1,128
@@ -403,6 +412,10 @@
lfd fp1,(SIGCONTEXT_FP_REGS+(PT_R1*8))(r31)
lfd fp0,(SIGCONTEXT_FP_REGS+(PT_R0*8))(r31)
+ /* End FDE now, because the unwind info would be wrong while
+ we're reloading registers to switch to the new context. */
+ cfi_endproc
+
ld r0,(SIGCONTEXT_GP_REGS+(PT_LNK*8))(r31)
ld r1,(SIGCONTEXT_GP_REGS+(PT_R1*8))(r31)
mtlr r0
@@ -451,6 +464,11 @@
ld r31,(SIGCONTEXT_GP_REGS+(PT_R31*8))(r31)
bctr
+ /* Re-establish FDE for the rest of the actual setcontext routine. */
+ cfi_startproc
+ cfi_offset (lr, FRAME_LR_SAVE)
+ cfi_adjust_cfa_offset (128)
+
L(error_exit):
ld r0,128+FRAME_LR_SAVE(r1)
addi r1,r1,128

View File

@ -0,0 +1,105 @@
# commit 7ec07d9a7b501f1b7d740fda02ba5f39d6d684e5
# Author: Alan Modra <amodra@gmail.com>
# Date: Wed Dec 4 06:44:06 2013 -0600
#
# PowerPC64: Report overflow on @h and @ha relocations
#
# This patch updates glibc in accordance with the binutils patch checked in here:
# https://sourceware.org/ml/binutils/2013-10/msg00372.html
#
# This changes the various R_PPC64_..._HI and _HA relocations to report
# 32-bit overflows. The motivation is that existing uses of @h / @ha
# are to build up 32-bit offsets (for the "medium model" TOC access
# that GCC now defaults to), and we'd really like to see failures at
# link / load time rather than silent truncations.
#
# For those rare cases where a modifier is needed to build up a 64-bit
# constant, new relocations _HIGH / _HIGHA are supported.
#
# The patch also fixes a bug in overflow checking for the R_PPC64_ADDR30
# and R_PPC64_ADDR32 relocations.
#
diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h
--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:17:35.000000000 -0500
+++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:17:35.000000000 -0500
@@ -2243,6 +2243,17 @@
#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */
#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */
#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD 107 /* none (sym+add)@tlsgd */
+#define R_PPC64_TLSLD 108 /* none (sym+add)@tlsld */
+#define R_PPC64_TOCSAVE 109 /* none */
+
+/* Added when HA and HI relocs were changed to report overflows. */
+#define R_PPC64_ADDR16_HIGH 110
+#define R_PPC64_ADDR16_HIGHA 111
+#define R_PPC64_TPREL16_HIGH 112
+#define R_PPC64_TPREL16_HIGHA 113
+#define R_PPC64_DTPREL16_HIGH 114
+#define R_PPC64_DTPREL16_HIGHA 115
/* GNU extension to support local ifunc. */
#define R_PPC64_JMP_IREL 247
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:17:34.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:17:35.000000000 -0500
@@ -663,11 +663,25 @@
case R_PPC64_TPREL16_HI:
value = elf_machine_tprel (map, sym_map, sym, reloc);
+ if (dont_expect (value + 0x80000000 >= 0x100000000LL))
+ _dl_reloc_overflow (map, "R_PPC64_TPREL16_HI", reloc_addr, refsym);
+ *(Elf64_Half *) reloc_addr = PPC_HI (value);
+ break;
+
+ case R_PPC64_TPREL16_HIGH:
+ value = elf_machine_tprel (map, sym_map, sym, reloc);
*(Elf64_Half *) reloc_addr = PPC_HI (value);
break;
case R_PPC64_TPREL16_HA:
value = elf_machine_tprel (map, sym_map, sym, reloc);
+ if (dont_expect (value + 0x80008000 >= 0x100000000LL))
+ _dl_reloc_overflow (map, "R_PPC64_TPREL16_HA", reloc_addr, refsym);
+ *(Elf64_Half *) reloc_addr = PPC_HA (value);
+ break;
+
+ case R_PPC64_TPREL16_HIGHA:
+ value = elf_machine_tprel (map, sym_map, sym, reloc);
*(Elf64_Half *) reloc_addr = PPC_HA (value);
break;
@@ -703,17 +717,23 @@
break;
case R_PPC64_ADDR16_HI:
+ if (dont_expect (value + 0x80000000 >= 0x100000000LL))
+ _dl_reloc_overflow (map, "R_PPC64_ADDR16_HI", reloc_addr, refsym);
+ case R_PPC64_ADDR16_HIGH:
*(Elf64_Half *) reloc_addr = PPC_HI (value);
break;
case R_PPC64_ADDR16_HA:
+ if (dont_expect (value + 0x80008000 >= 0x100000000LL))
+ _dl_reloc_overflow (map, "R_PPC64_ADDR16_HA", reloc_addr, refsym);
+ case R_PPC64_ADDR16_HIGHA:
*(Elf64_Half *) reloc_addr = PPC_HA (value);
break;
case R_PPC64_ADDR30:
{
Elf64_Addr delta = value - (Elf64_Xword) reloc_addr;
- if (dont_expect ((delta + 0x80000000) >= 0x10000000
+ if (dont_expect ((delta + 0x80000000) >= 0x100000000LL
|| (delta & 3) != 0))
_dl_reloc_overflow (map, "R_PPC64_ADDR30", reloc_addr, refsym);
BIT_INSERT (*(Elf64_Word *) reloc_addr, delta, 0xfffffffc);
@@ -762,7 +782,7 @@
return;
case R_PPC64_ADDR32:
- if (dont_expect ((value + 0x80000000) >= 0x10000000))
+ if (dont_expect ((value + 0x80000000) >= 0x100000000LL))
_dl_reloc_overflow (map, "R_PPC64_ADDR32", reloc_addr, refsym);
*(Elf64_Word *) reloc_addr = value;
return;

View File

@ -0,0 +1,31 @@
# commit b525166bb93b060e1146f0263b76a9c1e7455b06
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:45:56 2013 -0600
#
# PowerPC64: Add __private_ss field to TCB header
#
# The TCB header on Intel contains a field __private_ss that is used
# to efficiently implement the -fsplit-stack GCC feature.
#
# In order to prepare for a possible future implementation of that
# feature on powerpc64, we'd like to reserve a similar field in
# the TCB header as well. (It would be good if this went in with
# or before the ELFv2 patches to ensure that this field will be
# available always in the ELFv2 environment.)
#
# The field needs to be added at the front of tcbhead_t structure
# to avoid changing the ABI; see the recent discussion when adding
# the EBB fields.
#
diff -urN glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h
--- glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h 2014-05-29 13:19:25.000000000 -0500
+++ glibc-2.17-c758a686/nptl/sysdeps/powerpc/tls.h 2014-05-29 13:19:25.000000000 -0500
@@ -61,6 +61,8 @@
are private. */
typedef struct
{
+ /* GCC split stack support. */
+ void *__private_ss;
/* Reservation for the Event-Based Branching ABI. */
uintptr_t ebb_handler;
uintptr_t ebb_ctx_pointer;

View File

@ -0,0 +1,262 @@
# commit d31beafa8e4ca69faa4cf362784796ef17299341
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:49:15 2013 -0600
#
# PowerPC64 ELFv2 ABI 1/6: Code refactoring
#
# This is the first patch to support the new ELFv2 ABI in glibc.
#
# As preparation, this patch simply refactors some of the powerpc64 assembler
# code to move all code related to creating function descriptors (.opd section)
# or using function descriptors (function pointer call) into a central place
# in sysdep.h.
#
# Note that most locations creating .opd entries were already using macros
# in sysdep.h, this patch simply extends this to the remaining places.
#
# No relevant change in generated code expected.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:56:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:56:37.000000000 -0500
@@ -60,18 +60,8 @@
.LC0:
.tc PREINIT_FUNCTION[TC], PREINIT_FUNCTION
#endif
- .type BODY_LABEL (_init), @function
- .globl _init
- .section ".opd", "aw"
- .align 3
-_init: OPD_ENT (_init)
-#ifdef HAVE_ASM_GLOBAL_DOT_NAME
- .globl BODY_LABEL (_init)
- .size _init, 24
-#else
- .type _init, @function
-#endif
.section ".init", "ax", @progbits
+ ENTRY_2(_init)
.align ALIGNARG (2)
BODY_LABEL (_init):
mflr 0
@@ -87,18 +77,8 @@
nop
1:
- .type BODY_LABEL (_fini), @function
- .globl _fini
- .section ".opd", "aw"
- .align 3
-_fini: OPD_ENT (_fini)
-#ifdef HAVE_ASM_GLOBAL_DOT_NAME
- .globl BODY_LABEL (_fini)
- .size _fini, 24
-#else
- .type _fini, @function
-#endif
.section ".fini", "ax", @progbits
+ ENTRY_2(_fini)
.align ALIGNARG (2)
BODY_LABEL (_fini):
mflr 0
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:56:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:56:37.000000000 -0500
@@ -122,14 +122,7 @@
#define RTLD_START \
asm (".pushsection \".text\"\n" \
" .align 2\n" \
-" .type " BODY_PREFIX "_start,@function\n" \
-" .pushsection \".opd\",\"aw\"\n" \
-" .align 3\n" \
-" .globl _start\n" \
" " ENTRY_2(_start) "\n" \
-"_start:\n" \
-" " OPD_ENT(_start) "\n" \
-" .popsection\n" \
BODY_PREFIX "_start:\n" \
/* We start with the following on the stack, from top: \
argc (4 bytes); \
@@ -154,11 +147,6 @@
".LT__start_name_end:\n" \
" .align 2\n" \
" " END_2(_start) "\n" \
-" .globl _dl_start_user\n" \
-" .pushsection \".opd\",\"aw\"\n" \
-"_dl_start_user:\n" \
-" " OPD_ENT(_dl_start_user) "\n" \
-" .popsection\n" \
" .pushsection \".toc\",\"aw\"\n" \
DL_STARTING_UP_DEF \
".LC__rtld_local:\n" \
@@ -170,7 +158,6 @@
".LC__dl_fini:\n" \
" .tc _dl_fini[TC],_dl_fini\n" \
" .popsection\n" \
-" .type " BODY_PREFIX "_dl_start_user,@function\n" \
" " ENTRY_2(_dl_start_user) "\n" \
/* Now, we do our main work of calling initialisation procedures. \
The ELF ABI doesn't say anything about parameters for these, \
@@ -228,10 +215,7 @@
/* Now, call the start function descriptor at r30... */ \
" .globl ._dl_main_dispatch\n" \
"._dl_main_dispatch:\n" \
-" ld 0,0(30)\n" \
-" ld 2,8(30)\n" \
-" mtctr 0\n" \
-" ld 11,16(30)\n" \
+" " PPC64_LOAD_FUNCPTR(30) "\n" \
" bctr\n" \
".LT__dl_start_user:\n" \
" .long 0\n" \
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 13:56:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 13:56:37.000000000 -0500
@@ -71,12 +71,8 @@
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
mtcrf 0xFF,r0
-/* Load the target address, toc and static chain reg from the function
- descriptor returned by fixup. */
- ld r0,0(r3)
- ld r2,8(r3)
- mtctr r0
- ld r11,16(r3)
+/* Prepare for calling the function returned by fixup. */
+ PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
/* Unwind the stack frame, and jump. */
addi r1,r1,FRAME_SIZE
@@ -322,13 +318,9 @@
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
mtcrf 0xFF,r0
-/* Load the target address, toc and static chain reg from the function
- descriptor returned by fixup. */
- ld r0,0(r3)
- ld r2,8(r3)
- ld r11,16(r3)
+/* Prepare for calling the function returned by fixup. */
+ PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
- mtctr r0
/* Load the floating point registers. */
lfd fp1,FPR_PARMS+0(r1)
lfd fp2,FPR_PARMS+8(r1)
@@ -386,14 +378,10 @@
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
mtcrf 0xFF,r0
-/* Load the target address, toc and static chain reg from the function
- descriptor returned by fixup. */
- ld r0,0(r3)
+/* Prepare for calling the function returned by fixup. */
std r2,40(r1)
- ld r2,8(r3)
- ld r11,16(r3)
+ PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
- mtctr r0
/* Load the floating point registers. */
lfd fp1,FPR_PARMS+0(r1)
lfd fp2,FPR_PARMS+8(r1)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:56:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:56:37.000000000 -0500
@@ -74,6 +74,14 @@
#endif
.endm
+/* Macro to prepare for calling via a function pointer. */
+ .macro PPC64_LOAD_FUNCPTR PTR
+ ld r12,0(\PTR)
+ ld r2,8(\PTR)
+ mtctr r12
+ ld r11,16(\PTR)
+ .endm
+
#ifdef USE_PPC64_OVERLAPPING_OPD
# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase
#else
@@ -81,7 +89,6 @@
#endif
#define ENTRY_1(name) \
- .section ".text"; \
.type BODY_LABEL(name),@function; \
.globl name; \
.section ".opd","aw"; \
@@ -110,6 +117,7 @@
#endif
#define ENTRY(name) \
+ .section ".text"; \
ENTRY_2(name) \
.align ALIGNARG(2); \
BODY_LABEL(name): \
@@ -127,6 +135,7 @@
/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes
past a 2^alignt boundary. */
#define EALIGN(name, alignt, words) \
+ .section ".text"; \
ENTRY_2(name) \
.align ALIGNARG(alignt); \
EALIGN_W_##words; \
@@ -286,24 +295,42 @@
#else /* !__ASSEMBLER__ */
+#define PPC64_LOAD_FUNCPTR(ptr) \
+ "ld 12,0(" #ptr ");\n" \
+ "ld 2,8(" #ptr ");\n" \
+ "mtctr 12;\n" \
+ "ld 11,16(" #ptr ");"
+
#ifdef USE_PPC64_OVERLAPPING_OPD
# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase;"
#else
# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase, 0;"
#endif
+#define ENTRY_1(name) \
+ ".type " BODY_PREFIX #name ",@function;\n" \
+ ".globl " #name ";\n" \
+ ".pushsection \".opd\",\"aw\";\n" \
+ ".align 3;\n" \
+#name ":\n" \
+ OPD_ENT (name) "\n" \
+ ".popsection;"
+
#ifdef HAVE_ASM_GLOBAL_DOT_NAME
# define DOT_PREFIX "."
# define BODY_PREFIX "."
# define ENTRY_2(name) \
".globl " BODY_PREFIX #name ";\n" \
+ ENTRY_1(name) "\n" \
".size " #name ", 24;"
# define END_2(name) \
".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";"
#else
# define DOT_PREFIX ""
# define BODY_PREFIX ".LY"
-# define ENTRY_2(name) ".type " #name ",@function;"
+# define ENTRY_2(name) \
+ ".type " #name ",@function;\n" \
+ ENTRY_1(name)
# define END_2(name) \
".size " #name ",.-" BODY_PREFIX #name ";\n" \
".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";"
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 13:56:35.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 13:56:37.000000000 -0500
@@ -104,9 +104,7 @@
std r2,40(r1)
/* Call procedure. */
- ld r0,0(r30)
- ld r2,8(r30)
- mtctr r0
+ PPC64_LOAD_FUNCPTR r30
mr r3,r31
bctrl
ld r2,40(r1)

View File

@ -0,0 +1,508 @@
# commit 696caf1d002ff059ddd20fd5eaccd76229c14850
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:51:11 2013 -0600
#
# PowerPC64 ELFv2 ABI 2/6: Remove function descriptors
#
# This patch adds support for the ELFv2 ABI feature to remove function
# descriptors. See this GCC patch for in-depth discussion:
# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01141.html
#
# This mostly involves two types of changes: updating assembler source
# files to the new logic, and updating the dynamic loader.
#
# After the refactoring in the previous patch, most of the assembler source
# changes can be handled simply by providing ELFv2 versions of the
# macros in sysdep.h. One somewhat non-obvious change is in __GI__setjmp:
# this used to "fall through" to the immediately following __setjmp ENTRY
# point. This is no longer safe in the ELFv2 since ENTRY defines both
# a global and a local entry point, and you cannot simply fall through
# to a global entry point as it requires r12 to be set up.
#
# Also, makecontext needs to be updated to set up registers according to
# the new ABI for calling into the context's start routine.
#
# The dynamic linker changes mostly consist of removing special code
# to handle function descriptors. We also need to support the new PLT
# and glink format used by the the ELFv2 linker, see:
# https://sourceware.org/ml/binutils/2013-10/msg00376.html
#
# In addition, the dynamic linker now verifies that the dynamic libraries
# it loads match its own ABI.
#
# The hack in VDSO_IFUNC_RET to "synthesize" a function descriptor
# for vDSO routines is also no longer necessary for ELFv2.
#
diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h
--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 13:58:25.000000000 -0500
@@ -2263,6 +2263,12 @@
#define R_PPC64_REL16_HI 251 /* half16 (sym+add-.)@h */
#define R_PPC64_REL16_HA 252 /* half16 (sym+add-.)@ha */
+/* e_flags bits specifying ABI.
+ 1 for original function descriptor using ABI,
+ 2 for revised ABI without function descriptors,
+ 0 for unspecified or not using any features affected by the differences. */
+#define EF_PPC64_ABI 3
+
/* PowerPC64 specific values for the Dyn d_tag field. */
#define DT_PPC64_GLINK (DT_LOPROC + 0)
#define DT_PPC64_OPD (DT_LOPROC + 1)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 13:58:25.000000000 -0500
@@ -64,6 +64,7 @@
ENTRY_2(_init)
.align ALIGNARG (2)
BODY_LABEL (_init):
+ LOCALENTRY(_init)
mflr 0
std 0, 16(r1)
stdu r1, -112(r1)
@@ -81,6 +82,7 @@
ENTRY_2(_fini)
.align ALIGNARG (2)
BODY_LABEL (_fini):
+ LOCALENTRY(_fini)
mflr 0
std 0, 16(r1)
stdu r1, -112(r1)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-irel.h 2014-05-29 13:58:25.000000000 -0500
@@ -50,7 +50,11 @@
{
Elf64_Addr *const reloc_addr = (void *) reloc->r_offset;
Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend);
+#if _CALL_ELF != 2
*(Elf64_FuncDesc *) reloc_addr = *(Elf64_FuncDesc *) value;
+#else
+ *reloc_addr = value;
+#endif
}
else
__libc_fatal ("unexpected reloc type in static binary");
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:05:46.000000000 -0500
@@ -31,6 +31,7 @@
in l_info array. */
#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM)
+#if _CALL_ELF != 2
/* A PowerPC64 function descriptor. The .plt (procedure linkage
table) and .opd (official procedure descriptor) sections are
arrays of these. */
@@ -40,6 +41,7 @@
Elf64_Addr fd_toc;
Elf64_Addr fd_aux;
} Elf64_FuncDesc;
+#endif
#define ELF_MULT_MACHINES_SUPPORTED
@@ -47,6 +49,18 @@
static inline int
elf_machine_matches_host (const Elf64_Ehdr *ehdr)
{
+ /* Verify that the binary matches our ABI version. */
+ if ((ehdr->e_flags & EF_PPC64_ABI) != 0)
+ {
+#if _CALL_ELF != 2
+ if ((ehdr->e_flags & EF_PPC64_ABI) != 1)
+ return 0;
+#else
+ if ((ehdr->e_flags & EF_PPC64_ABI) != 2)
+ return 0;
+#endif
+ }
+
return ehdr->e_machine == EM_PPC64;
}
@@ -124,6 +138,7 @@
" .align 2\n" \
" " ENTRY_2(_start) "\n" \
BODY_PREFIX "_start:\n" \
+" " LOCALENTRY(_start) "\n" \
/* We start with the following on the stack, from top: \
argc (4 bytes); \
arguments for program (terminated by NULL); \
@@ -165,6 +180,7 @@
Changing these is strongly discouraged (not least because argc is \
passed by value!). */ \
BODY_PREFIX "_dl_start_user:\n" \
+" " LOCALENTRY(_dl_start_user) "\n" \
/* the address of _start in r30. */ \
" mr 30,3\n" \
/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \
@@ -256,8 +272,22 @@
relocations behave "normally", ie. always use the real address
like PLT relocations. So always set ELF_RTYPE_CLASS_PLT. */
+#if _CALL_ELF != 2
#define elf_machine_type_class(type) \
(ELF_RTYPE_CLASS_PLT | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY))
+#else
+/* And now that you have read that large comment, you can disregard it
+ all for ELFv2. ELFv2 does need the special SHN_UNDEF treatment. */
+#define IS_PPC64_TLS_RELOC(R) \
+ (((R) >= R_PPC64_TLS && (R) <= R_PPC64_DTPREL16_HIGHESTA) \
+ || ((R) >= R_PPC64_TPREL16_HIGH && (R) <= R_PPC64_DTPREL16_HIGHA))
+
+#define elf_machine_type_class(type) \
+ ((((type) == R_PPC64_JMP_SLOT \
+ || (type) == R_PPC64_ADDR24 \
+ || IS_PPC64_TLS_RELOC (type)) * ELF_RTYPE_CLASS_PLT) \
+ | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY))
+#endif
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT
@@ -266,8 +296,19 @@
#define ELF_MACHINE_NO_REL 1
/* Stuff for the PLT. */
+#if _CALL_ELF != 2
#define PLT_INITIAL_ENTRY_WORDS 3
+#define PLT_ENTRY_WORDS 3
#define GLINK_INITIAL_ENTRY_WORDS 8
+/* The first 32k entries of glink can set an index and branch using two
+ instructions; past that point, glink uses three instructions. */
+#define GLINK_ENTRY_WORDS(I) (((I) < 0x8000)? 2 : 3)
+#else
+#define PLT_INITIAL_ENTRY_WORDS 2
+#define PLT_ENTRY_WORDS 1
+#define GLINK_INITIAL_ENTRY_WORDS 8
+#define GLINK_ENTRY_WORDS(I) 1
+#endif
#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory")
#define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory")
@@ -312,17 +353,12 @@
if (lazy)
{
- /* The function descriptor of the appropriate trampline
- routine is used to set the 1st and 2nd doubleword of the
- plt_reserve. */
- Elf64_FuncDesc *resolve_fd;
Elf64_Word glink_offset;
- /* the plt_reserve area is the 1st 3 doublewords of the PLT */
- Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt;
Elf64_Word offset;
+ Elf64_Addr dlrr;
- resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve
- : _dl_runtime_resolve);
+ dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve
+ : _dl_runtime_resolve);
if (profile && GLRO(dl_profile) != NULL
&& _dl_name_match_p (GLRO(dl_profile), map))
/* This is the object we are looking for. Say that we really
@@ -330,20 +366,33 @@
GL(dl_profile_map) = map;
+#if _CALL_ELF != 2
/* We need to stuff the address/TOC of _dl_runtime_resolve
into doublewords 0 and 1 of plt_reserve. Then we need to
stuff the map address into doubleword 2 of plt_reserve.
This allows the GLINK0 code to transfer control to the
correct trampoline which will transfer control to fixup
in dl-machine.c. */
- plt_reserve->fd_func = resolve_fd->fd_func;
- plt_reserve->fd_toc = resolve_fd->fd_toc;
- plt_reserve->fd_aux = (Elf64_Addr) map;
+ {
+ /* The plt_reserve area is the 1st 3 doublewords of the PLT. */
+ Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt;
+ Elf64_FuncDesc *resolve_fd = (Elf64_FuncDesc *) dlrr;
+ plt_reserve->fd_func = resolve_fd->fd_func;
+ plt_reserve->fd_toc = resolve_fd->fd_toc;
+ plt_reserve->fd_aux = (Elf64_Addr) map;
#ifdef RTLD_BOOTSTRAP
- /* When we're bootstrapping, the opd entry will not have
- been relocated yet. */
- plt_reserve->fd_func += l_addr;
- plt_reserve->fd_toc += l_addr;
+ /* When we're bootstrapping, the opd entry will not have
+ been relocated yet. */
+ plt_reserve->fd_func += l_addr;
+ plt_reserve->fd_toc += l_addr;
+#endif
+ }
+#else
+ /* When we don't have function descriptors, the first doubleword
+ of the PLT holds the address of _dl_runtime_resolve, and the
+ second doubleword holds the map address. */
+ plt[0] = dlrr;
+ plt[1] = (Elf64_Addr) map;
#endif
/* Set up the lazy PLT entries. */
@@ -354,14 +403,8 @@
{
plt[offset] = (Elf64_Xword) &glink[glink_offset];
- offset += 3;
- /* The first 32k entries of glink can set an index and
- branch using two instructions; Past that point,
- glink uses three instructions. */
- if (i < 0x8000)
- glink_offset += 2;
- else
- glink_offset += 3;
+ offset += PLT_ENTRY_WORDS;
+ glink_offset += GLINK_ENTRY_WORDS (i);
}
/* Now, we've modified data. We need to write the changes from
@@ -389,6 +432,7 @@
const Elf64_Rela *reloc,
Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
{
+#if _CALL_ELF != 2
Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr;
Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr;
Elf64_Addr offset = 0;
@@ -426,6 +470,9 @@
plt->fd_func = rel->fd_func + offset;
PPC_DCBST (&plt->fd_func);
PPC_ISYNC;
+#else
+ *reloc_addr = finaladdr;
+#endif
return finaladdr;
}
@@ -433,6 +480,7 @@
static inline void __attribute__ ((always_inline))
elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
{
+#if _CALL_ELF != 2
Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr;
Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr;
@@ -443,6 +491,9 @@
PPC_DCBST (&plt->fd_aux);
PPC_DCBST (&plt->fd_toc);
PPC_SYNC;
+#else
+ *reloc_addr = finaladdr;
+#endif
}
/* Return the final value of a plt relocation. */
@@ -512,6 +563,7 @@
resolve_ifunc (Elf64_Addr value,
const struct link_map *map, const struct link_map *sym_map)
{
+#if _CALL_ELF != 2
#ifndef RESOLVE_CONFLICT_FIND_MAP
/* The function we are calling may not yet have its opd entry relocated. */
Elf64_FuncDesc opd;
@@ -529,6 +581,7 @@
value = (Elf64_Addr) &opd;
}
#endif
+#endif
return ((Elf64_Addr (*) (unsigned long int)) value) (GLRO(dl_hwcap));
}
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:00:27.000000000 -0500
@@ -55,21 +55,22 @@
that saves r2 since the call won't go via a plt call stub. See
bugz #269. __GI__setjmp is used in csu/libc-start.c when
HAVE_CLEANUP_JMP_BUF is defined. */
-ENTRY (BP_SYM (__GI__setjmp))
+ENTRY (__GI__setjmp)
std r2,40(r1) /* Save the callers TOC in the save area. */
- cfi_endproc
-END_2 (BP_SYM (__GI__setjmp))
-/* Fall thru. */
+ CALL_MCOUNT 1
+ li r4,0 /* Set second argument to 0. */
+ b JUMPTARGET (GLUE(__sigsetjmp,_ent))
+END (__GI__setjmp)
#endif
-ENTRY (BP_SYM (_setjmp))
+ENTRY (_setjmp)
CALL_MCOUNT 1
li r4,0 /* Set second argument to 0. */
b JUMPTARGET (GLUE(__sigsetjmp,_ent))
-END (BP_SYM (_setjmp))
+END (_setjmp)
libc_hidden_def (_setjmp)
-ENTRY (BP_SYM (__sigsetjmp))
+ENTRY (__sigsetjmp)
CALL_MCOUNT 2
JUMPTARGET(GLUE(__sigsetjmp,_ent)):
CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE)
@@ -215,18 +216,18 @@
li r3,0
blr
#elif defined SHARED
- b JUMPTARGET (BP_SYM (__sigjmp_save))
+ b JUMPTARGET (__sigjmp_save)
#else
mflr r0
std r0,16(r1)
stdu r1,-112(r1)
cfi_adjust_cfa_offset(112)
cfi_offset(lr,16)
- bl JUMPTARGET (BP_SYM (__sigjmp_save))
+ bl JUMPTARGET (__sigjmp_save)
nop
ld r0,112+16(r1)
addi r1,r1,112
mtlr r0
blr
#endif
-END (BP_SYM (__sigsetjmp))
+END (__sigsetjmp)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:58:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 13:58:25.000000000 -0500
@@ -74,6 +74,8 @@
#endif
.endm
+#if _CALL_ELF != 2
+
/* Macro to prepare for calling via a function pointer. */
.macro PPC64_LOAD_FUNCPTR PTR
ld r12,0(\PTR)
@@ -115,13 +117,37 @@
.size name,.-BODY_LABEL(name); \
.size BODY_LABEL(name),.-BODY_LABEL(name);
#endif
+#define LOCALENTRY(name)
+
+#else /* _CALL_ELF */
+
+/* Macro to prepare for calling via a function pointer. */
+ .macro PPC64_LOAD_FUNCPTR PTR
+ mr r12,\PTR
+ mtctr r12
+ .endm
+
+#define DOT_LABEL(X) X
+#define BODY_LABEL(X) X
+#define ENTRY_2(name) \
+ .globl name; \
+ .type name,@function;
+#define END_2(name) \
+ .size name,.-name;
+#define LOCALENTRY(name) \
+1: addis r2,r12,.TOC.-1b@ha; \
+ addi r2,r2,.TOC.-1b@l; \
+ .localentry name,.-name;
+
+#endif /* _CALL_ELF */
#define ENTRY(name) \
.section ".text"; \
ENTRY_2(name) \
.align ALIGNARG(2); \
BODY_LABEL(name): \
- cfi_startproc;
+ cfi_startproc; \
+ LOCALENTRY(name)
#define EALIGN_W_0 /* No words to insert. */
#define EALIGN_W_1 nop
@@ -140,7 +166,8 @@
.align ALIGNARG(alignt); \
EALIGN_W_##words; \
BODY_LABEL(name): \
- cfi_startproc;
+ cfi_startproc; \
+ LOCALENTRY(name)
/* Local labels stripped out by the linker. */
#undef L
@@ -295,6 +322,8 @@
#else /* !__ASSEMBLER__ */
+#if _CALL_ELF != 2
+
#define PPC64_LOAD_FUNCPTR(ptr) \
"ld 12,0(" #ptr ");\n" \
"ld 2,8(" #ptr ");\n" \
@@ -335,5 +364,26 @@
".size " #name ",.-" BODY_PREFIX #name ";\n" \
".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";"
#endif
+#define LOCALENTRY(name)
+
+#else /* _CALL_ELF */
+
+#define PPC64_LOAD_FUNCPTR(ptr) \
+ "mr 12," #ptr ";\n" \
+ "mtctr 12;"
+
+#define DOT_PREFIX ""
+#define BODY_PREFIX ""
+#define ENTRY_2(name) \
+ ".type " #name ",@function;\n" \
+ ".globl " #name ";"
+#define END_2(name) \
+ ".size " #name ",.-" #name ";"
+#define LOCALENTRY(name) \
+ "1: addis 2,12,.TOC.-1b@ha;\n" \
+ "addi 2,2,.TOC.-1b@l;\n" \
+ ".localentry " #name ",.-" #name ";"
+
+#endif /* _CALL_ELF */
#endif /* __ASSEMBLER__ */
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h 2014-05-29 13:58:24.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ldsodefs.h 2014-05-29 13:58:25.000000000 -0500
@@ -23,6 +23,8 @@
/* Now define our stuff. */
+#if _CALL_ELF != 2
+
static __always_inline bool
_dl_ppc64_is_opd_sym (const struct link_map *l, const ElfW(Sym) *sym)
{
@@ -73,4 +75,6 @@
#define DL_ADDR_SYM_MATCH(L, SYM, MATCHSYM, ADDR) \
_dl_ppc64_addr_sym_match (L, SYM, MATCHSYM, ADDR)
+#endif
+
#endif /* ldsodefs.h */
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:58:24.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S 2014-05-29 13:58:25.000000000 -0500
@@ -111,6 +111,7 @@
L(noparms):
+#if _CALL_ELF != 2
/* Load the function address and TOC from the function descriptor
and store them in the ucontext as NIP and r2. Store the 3rd
field of the function descriptor into the ucontext as r11 in case
@@ -121,6 +122,12 @@
std r0,(SIGCONTEXT_GP_REGS+(PT_NIP*8))(r3)
std r10,(SIGCONTEXT_GP_REGS+(PT_R2*8))(r3)
std r9,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r3)
+#else
+ /* In the ELFv2 ABI, the function pointer is already the address.
+ Store it as NIP and r12 as required by the ABI. */
+ std r4,(SIGCONTEXT_GP_REGS+(PT_NIP*8))(r3)
+ std r4,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r3)
+#endif
/* If the target function returns we need to do some cleanup. We use a
code trick to get the address of our cleanup function into the link

View File

@ -0,0 +1,159 @@
# commit 122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:52:40 2013 -0600
#
# PowerPC64 ELFv2 ABI 3/6: PLT local entry point optimization
#
# This is a follow-on to the previous patch to support the ELFv2 ABI in the
# dynamic loader, split off into its own patch since it is just an optional
# optimization.
#
# In the ELFv2 ABI, most functions define both a global and a local entry
# point; the local entry requires r2 to be already set up by the caller
# to point to the callee's TOC; while the global entry does not require
# the caller to know about the callee's TOC, but it needs to set up r12
# to the callee's entry point address.
#
# Now, when setting up a PLT slot, the dynamic linker will usually need
# to enter the target function's global entry point. However, if the
# linker can prove that the target function is in the same DSO as the
# PLT slot itself, and the whole DSO only uses a single TOC (which the
# linker will let ld.so know via a DT_PPC64_OPT entry), then it is
# possible to actually enter the local entry point address into the
# PLT slot, for a slight improvement in performance.
#
# Note that this uncovered a problem on the first call via _dl_runtime_resolve,
# because that routine neglected to restore the caller's TOC before calling
# the target function for the first time, since it assumed that function
# would always reload its own TOC anyway ...
#
diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h
--- glibc-2.17-c758a686/elf/elf.h 2014-05-29 14:08:44.000000000 -0500
+++ glibc-2.17-c758a686/elf/elf.h 2014-05-29 14:08:44.000000000 -0500
@@ -2273,8 +2273,19 @@
#define DT_PPC64_GLINK (DT_LOPROC + 0)
#define DT_PPC64_OPD (DT_LOPROC + 1)
#define DT_PPC64_OPDSZ (DT_LOPROC + 2)
+#define DT_PPC64_OPT (DT_LOPROC + 3)
#define DT_PPC64_NUM 3
+/* PowerPC64 specific values for the DT_PPC64_OPT Dyn entry. */
+#define PPC64_OPT_TLS 1
+#define PPC64_OPT_MULTI_TOC 2
+
+/* PowerPC64 specific values for the Elf64_Sym st_other field. */
+#define STO_PPC64_LOCAL_BIT 5
+#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
+#define PPC64_LOCAL_ENTRY_OFFSET(other) \
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
+
/* ARM specific declarations */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:08:40.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:08:44.000000000 -0500
@@ -425,6 +425,42 @@
return lazy;
}
+#if _CALL_ELF == 2
+/* If the PLT entry whose reloc is 'reloc' resolves to a function in
+ the same object, return the target function's local entry point
+ offset if usable. */
+static inline Elf64_Addr __attribute__ ((always_inline))
+ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map,
+ const Elf64_Rela *reloc)
+{
+ const Elf64_Sym *symtab;
+ const Elf64_Sym *sym;
+
+ /* If the target function is in a different object, we cannot
+ use the local entry point. */
+ if (sym_map != map)
+ return 0;
+
+ /* If the linker inserted multiple TOCs, we cannot use the
+ local entry point. */
+ if (map->l_info[DT_PPC64(OPT)]
+ && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC))
+ return 0;
+
+ /* Otherwise, we can use the local entry point. Retrieve its offset
+ from the symbol's ELF st_other field. */
+ symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+ sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
+
+ /* If the target function is an ifunc then the local entry offset is
+ for the resolver, not the final destination. */
+ if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0))
+ return 0;
+
+ return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other);
+}
+#endif
+
/* Change the PLT entry whose reloc is 'reloc' to call the actual
routine. */
static inline Elf64_Addr __attribute__ ((always_inline))
@@ -471,6 +507,7 @@
PPC_DCBST (&plt->fd_func);
PPC_ISYNC;
#else
+ finaladdr += ppc64_local_entry_offset (map, sym_map, reloc);
*reloc_addr = finaladdr;
#endif
@@ -478,7 +515,9 @@
}
static inline void __attribute__ ((always_inline))
-elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
+elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map,
+ const Elf64_Rela *reloc,
+ Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
{
#if _CALL_ELF != 2
Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr;
@@ -492,6 +531,7 @@
PPC_DCBST (&plt->fd_toc);
PPC_SYNC;
#else
+ finaladdr += ppc64_local_entry_offset (map, sym_map, reloc);
*reloc_addr = finaladdr;
#endif
}
@@ -641,7 +681,7 @@
/* Fall thru */
case R_PPC64_JMP_SLOT:
#ifdef RESOLVE_CONFLICT_FIND_MAP
- elf_machine_plt_conflict (reloc_addr, value);
+ elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value);
#else
elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value);
#endif
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:08:40.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:08:44.000000000 -0500
@@ -74,6 +74,10 @@
/* Prepare for calling the function returned by fixup. */
PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
+#if _CALL_ELF == 2
+/* Restore the caller's TOC in case we jump to a local entry point. */
+ ld r2,FRAME_SIZE+40(r1)
+#endif
/* Unwind the stack frame, and jump. */
addi r1,r1,FRAME_SIZE
bctr
@@ -321,6 +325,10 @@
/* Prepare for calling the function returned by fixup. */
PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
+#if _CALL_ELF == 2
+/* Restore the caller's TOC in case we jump to a local entry point. */
+ ld r2,FRAME_SIZE+40(r1)
+#endif
/* Load the floating point registers. */
lfd fp1,FPR_PARMS+0(r1)
lfd fp2,FPR_PARMS+8(r1)

View File

@ -0,0 +1,764 @@
# commit 8b8a692cfd7d80f1ee7c8b9ab356a259367dd187
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:55:03 2013 -0600
#
# PowerPC64 ELFv2 ABI 4/6: Stack frame layout changes
#
# This updates glibc for the changes in the ELFv2 relating to the
# stack frame layout. These are described in more detail here:
# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01149.html
# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01146.html
#
# Specifically, the "compiler and linker doublewords" were removed,
# which has the effect that the save slot for the TOC register is
# now at offset 24 rather than 40 to the stack pointer.
#
# In addition, a function may now no longer necessarily assume that
# its caller has set up a 64-byte register save area its use.
#
# To address the first change, the patch goes through all assembler
# files and replaces immediate offsets in instructions accessing the
# ABI-defined stack slots by symbolic offsets. Those already were
# defined in ucontext_i.sym and used in some of the context routines,
# but that doesn't really seem like the right place for those defines.
#
# The patch instead defines those symbolic offsets in sysdeps.h,
# in two variants for the old and new ABI, and uses them systematically
# in all assembler files, not just the context routines.
#
# The second change only affected a few assembler files that used
# the save area to temporarily store some registers. In those
# cases where this happens within a leaf function, this patch
# changes the code to store those registers to the "red zone"
# below the stack pointer. Otherwise, the functions already allocate
# a stack frame, and the patch changes them to add extra space in
# these frames as temporary space for the ELFv2 ABI.
#
diff -urN glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h
--- glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h 2014-05-29 14:10:00.000000000 -0500
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h 2014-05-29 14:10:00.000000000 -0500
@@ -31,6 +31,14 @@
# define DASHDASHPFX(str) __##str
# endif
+#if _CALL_ELF == 2
+#define CANCEL_FRAMESIZE (FRAME_MIN_SIZE+16+48)
+#define CANCEL_PARM_SAVE (FRAME_MIN_SIZE+16)
+#else
+#define CANCEL_FRAMESIZE (FRAME_MIN_SIZE+16)
+#define CANCEL_PARM_SAVE (CANCEL_FRAMESIZE+FRAME_PARM_SAVE)
+#endif
+
# undef PSEUDO
# define PSEUDO(name, syscall_name, args) \
.section ".text"; \
@@ -44,52 +52,52 @@
PSEUDO_RET; \
.size DASHDASHPFX(syscall_name##_nocancel),.-DASHDASHPFX(syscall_name##_nocancel); \
.Lpseudo_cancel: \
- stdu 1,-128(1); \
- cfi_adjust_cfa_offset (128); \
+ stdu 1,-CANCEL_FRAMESIZE(1); \
+ cfi_adjust_cfa_offset (CANCEL_FRAMESIZE); \
mflr 9; \
- std 9,128+16(1); \
- cfi_offset (lr, 16); \
+ std 9,CANCEL_FRAMESIZE+FRAME_LR_SAVE(1); \
+ cfi_offset (lr, FRAME_LR_SAVE); \
DOCARGS_##args; /* save syscall args around CENABLE. */ \
CENABLE; \
- std 3,112(1); /* store CENABLE return value (MASK). */ \
+ std 3,FRAME_MIN_SIZE(1); /* store CENABLE return value (MASK). */ \
UNDOCARGS_##args; /* restore syscall args. */ \
DO_CALL (SYS_ify (syscall_name)); \
mfcr 0; /* save CR/R3 around CDISABLE. */ \
- std 3,120(1); \
- std 0,128+8(1); \
- cfi_offset (cr, 8); \
- ld 3,112(1); /* pass MASK to CDISABLE. */ \
+ std 3,FRAME_MIN_SIZE+8(1); \
+ std 0,CANCEL_FRAMESIZE+FRAME_CR_SAVE(1); \
+ cfi_offset (cr, FRAME_CR_SAVE); \
+ ld 3,FRAME_MIN_SIZE(1); /* pass MASK to CDISABLE. */ \
CDISABLE; \
- ld 9,128+16(1); \
- ld 0,128+8(1); /* restore CR/R3. */ \
- ld 3,120(1); \
+ ld 9,CANCEL_FRAMESIZE+FRAME_LR_SAVE(1); \
+ ld 0,CANCEL_FRAMESIZE+FRAME_CR_SAVE(1); /* restore CR/R3. */ \
+ ld 3,FRAME_MIN_SIZE+8(1); \
mtlr 9; \
mtcr 0; \
- addi 1,1,128; \
- cfi_adjust_cfa_offset (-128); \
+ addi 1,1,CANCEL_FRAMESIZE; \
+ cfi_adjust_cfa_offset (-CANCEL_FRAMESIZE); \
cfi_restore (lr); \
cfi_restore (cr)
# define DOCARGS_0
# define UNDOCARGS_0
-# define DOCARGS_1 std 3,128+48(1); DOCARGS_0
-# define UNDOCARGS_1 ld 3,128+48(1); UNDOCARGS_0
+# define DOCARGS_1 std 3,CANCEL_PARM_SAVE(1); DOCARGS_0
+# define UNDOCARGS_1 ld 3,CANCEL_PARM_SAVE(1); UNDOCARGS_0
-# define DOCARGS_2 std 4,128+56(1); DOCARGS_1
-# define UNDOCARGS_2 ld 4,128+56(1); UNDOCARGS_1
+# define DOCARGS_2 std 4,CANCEL_PARM_SAVE+8(1); DOCARGS_1
+# define UNDOCARGS_2 ld 4,CANCEL_PARM_SAVE+8(1); UNDOCARGS_1
-# define DOCARGS_3 std 5,128+64(1); DOCARGS_2
-# define UNDOCARGS_3 ld 5,128+64(1); UNDOCARGS_2
+# define DOCARGS_3 std 5,CANCEL_PARM_SAVE+16(1); DOCARGS_2
+# define UNDOCARGS_3 ld 5,CANCEL_PARM_SAVE+16(1); UNDOCARGS_2
-# define DOCARGS_4 std 6,128+72(1); DOCARGS_3
-# define UNDOCARGS_4 ld 6,128+72(1); UNDOCARGS_3
+# define DOCARGS_4 std 6,CANCEL_PARM_SAVE+24(1); DOCARGS_3
+# define UNDOCARGS_4 ld 6,CANCEL_PARM_SAVE+24(1); UNDOCARGS_3
-# define DOCARGS_5 std 7,128+80(1); DOCARGS_4
-# define UNDOCARGS_5 ld 7,128+80(1); UNDOCARGS_4
+# define DOCARGS_5 std 7,CANCEL_PARM_SAVE+32(1); DOCARGS_4
+# define UNDOCARGS_5 ld 7,CANCEL_PARM_SAVE+32(1); UNDOCARGS_4
-# define DOCARGS_6 std 8,128+88(1); DOCARGS_5
-# define UNDOCARGS_6 ld 8,128+88(1); UNDOCARGS_5
+# define DOCARGS_6 std 8,CANCEL_PARM_SAVE+40(1); DOCARGS_5
+# define UNDOCARGS_6 ld 8,CANCEL_PARM_SAVE+40(1); UNDOCARGS_5
# ifdef IS_IN_libpthread
# ifdef SHARED
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/__longjmp-common.S 2014-05-29 14:10:00.000000000 -0500
@@ -133,7 +133,7 @@
ld r14,((JB_GPRS+0)*8)(r3)
lfd fp14,((JB_FPRS+0)*8)(r3)
#if defined SHARED && !defined IS_IN_rtld
- std r2,40(r1) /* Restore the callers TOC save area. */
+ std r2,FRAME_TOC_SAVE(r1) /* Restore the callers TOC save area. */
#endif
ld r15,((JB_GPRS+1)*8)(r3)
lfd fp15,((JB_FPRS+1)*8)(r3)
@@ -151,7 +151,7 @@
PTR_DEMANGLE2 (r0, r25)
#endif
mtlr r0
-/* std r2,40(r1) Restore the TOC save area. */
+/* std r2,FRAME_TOC_SAVE(r1) Restore the TOC save area. */
ld r21,((JB_GPRS+7)*8)(r3)
lfd fp21,((JB_FPRS+7)*8)(r3)
ld r22,((JB_GPRS+8)*8)(r3)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crti.S 2014-05-29 14:10:00.000000000 -0500
@@ -66,8 +66,8 @@
BODY_LABEL (_init):
LOCALENTRY(_init)
mflr 0
- std 0, 16(r1)
- stdu r1, -112(r1)
+ std 0, FRAME_LR_SAVE(r1)
+ stdu r1, -FRAME_MIN_SIZE_PARM(r1)
#if PREINIT_FUNCTION_WEAK
addis r9, r2, .LC0@toc@ha
ld r0, .LC0@toc@l(r9)
@@ -84,5 +84,5 @@
BODY_LABEL (_fini):
LOCALENTRY(_fini)
mflr 0
- std 0, 16(r1)
- stdu r1, -112(r1)
+ std 0, FRAME_LR_SAVE(r1)
+ stdu r1, -FRAME_MIN_SIZE_PARM(r1)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/crtn.S 2014-05-29 14:10:00.000000000 -0500
@@ -39,13 +39,13 @@
#include <sysdep.h>
.section .init,"ax",@progbits
- addi r1, r1, 112
- ld r0, 16(r1)
+ addi r1, r1, FRAME_MIN_SIZE_PARM
+ ld r0, FRAME_LR_SAVE(r1)
mtlr r0
blr
.section .fini,"ax",@progbits
- addi r1, r1, 112
- ld r0, 16(r1)
+ addi r1, r1, FRAME_MIN_SIZE_PARM
+ ld r0, FRAME_LR_SAVE(r1)
mtlr r0
blr
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:10:00.000000000 -0500
@@ -26,13 +26,13 @@
parm1 (r3) and the index (r0) need to be converted to an offset
(index * 24) in parm2 (r4). */
-#define FRAME_SIZE 176
+#define FRAME_SIZE (FRAME_MIN_SIZE+64)
/* We need to save the registers used to pass parameters, ie. r3 thru
r10; Use local var space rather than the parameter save area,
because gcc as of 2010/05 doesn't allocate a proper stack frame for
a function that makes no calls except for __tls_get_addr and we
might be here resolving the __tls_get_addr call. */
-#define INT_PARMS 112
+#define INT_PARMS FRAME_MIN_SIZE
EALIGN(_dl_runtime_resolve, 4, 0)
stdu r1,-FRAME_SIZE(r1)
cfi_adjust_cfa_offset (FRAME_SIZE)
@@ -48,25 +48,25 @@
mflr r0
std r8,INT_PARMS+40(r1)
/* Store the LR in the LR Save area. */
- std r0,FRAME_SIZE+16(r1)
- cfi_offset (lr, 16)
+ std r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
+ cfi_offset (lr, FRAME_LR_SAVE)
mfcr r0
std r9,INT_PARMS+48(r1)
std r10,INT_PARMS+56(r1)
/* I'm almost certain we don't have to save cr... be safe. */
- std r0,FRAME_SIZE+8(r1)
+ std r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
bl JUMPTARGET(_dl_fixup)
#ifndef SHARED
nop
#endif
/* Put the registers back. */
- ld r0,FRAME_SIZE+16(r1)
+ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
ld r10,INT_PARMS+56(r1)
ld r9,INT_PARMS+48(r1)
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+8(r1)
+ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
@@ -76,7 +76,7 @@
ld r3,INT_PARMS+0(r1)
#if _CALL_ELF == 2
/* Restore the caller's TOC in case we jump to a local entry point. */
- ld r2,FRAME_SIZE+40(r1)
+ ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1)
#endif
/* Unwind the stack frame, and jump. */
addi r1,r1,FRAME_SIZE
@@ -86,6 +86,7 @@
#undef INT_PARMS
/* Stack layout:
+ (Note: some of these are not required for the ELFv2 ABI.)
+592 previous backchain
+584 spill_r31
+576 spill_r30
@@ -147,10 +148,11 @@
+64 parm3
+56 parm2
+48 parm1
- * Parameter save area, Allocated by the call, at least 8 double words
- +40 TOC save area
- +32 Reserved for linker
- +24 Reserved for compiler
+ * Parameter save area
+ * (v1 ABI: Allocated by the call, at least 8 double words)
+ +40 v1 ABI: TOC save area
+ +32 v1 ABI: Reserved for linker
+ +24 v1 ABI: Reserved for compiler / v2 ABI: TOC save area
+16 LR save area
+8 CR save area
r1+0 stack back chain
@@ -206,15 +208,15 @@
/* Store the LR in the LR Save area of the previous frame. */
/* XXX Do we have to do this? */
la r8,FRAME_SIZE(r1)
- std r5,FRAME_SIZE+16(r1)
- cfi_offset (lr, 16)
+ std r5,FRAME_SIZE+FRAME_LR_SAVE(r1)
+ cfi_offset (lr, FRAME_LR_SAVE)
std r5,CALLING_LR(r1)
mfcr r0
std r9,INT_PARMS+48(r1)
std r10,INT_PARMS+56(r1)
std r8,CALLING_SP(r1)
/* I'm almost certain we don't have to save cr... be safe. */
- std r0,FRAME_SIZE+8(r1)
+ std r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r12,.LC__dl_hwcap@toc(r2)
#ifdef SHARED
/* Load _rtld_local_ro._dl_hwcap. */
@@ -311,13 +313,13 @@
lvx v12,r11,r10
lvx v13,r11,r9
L(restoreFXR):
- ld r0,FRAME_SIZE+16(r1)
+ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
ld r10,INT_PARMS+56(r1)
ld r9,INT_PARMS+48(r1)
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+8(r1)
+ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
@@ -327,7 +329,7 @@
ld r3,INT_PARMS+0(r1)
#if _CALL_ELF == 2
/* Restore the caller's TOC in case we jump to a local entry point. */
- ld r2,FRAME_SIZE+40(r1)
+ ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1)
#endif
/* Load the floating point registers. */
lfd fp1,FPR_PARMS+0(r1)
@@ -375,19 +377,19 @@
lvx v12,r11,r10
lvx v13,r11,r9
L(restoreFXR2):
- ld r0,FRAME_SIZE+16(r1)
+ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
ld r10,INT_PARMS+56(r1)
ld r9,INT_PARMS+48(r1)
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+8(r1)
+ ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
mtcrf 0xFF,r0
/* Prepare for calling the function returned by fixup. */
- std r2,40(r1)
+ std r2,FRAME_TOC_SAVE(r1)
PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
/* Load the floating point registers. */
@@ -406,7 +408,7 @@
lfd fp13,FPR_PARMS+96(r1)
/* Call the target function. */
bctrl
- ld r2,40(r1)
+ ld r2,FRAME_TOC_SAVE(r1)
lwz r12,VR_VRSAVE(r1)
/* But return here and store the return values. */
std r3,INT_RTN(r1)
@@ -441,7 +443,7 @@
beq L(pltexitreturn)
lvx v2,0,r10
L(pltexitreturn):
- ld r0,FRAME_SIZE+16(r1)
+ ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
ld r31,584(r1)
ld r30,576(r1)
mtlr r0
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/ppc-mcount.S 2014-05-29 14:10:00.000000000 -0500
@@ -24,16 +24,16 @@
ENTRY(_mcount)
mflr r4
ld r11, 0(r1)
- stdu r1,-112(r1)
- cfi_adjust_cfa_offset (112)
- std r4, 128(r1)
- cfi_offset (lr, 16)
- ld r3, 16(r11)
+ stdu r1,-FRAME_MIN_SIZE(r1)
+ cfi_adjust_cfa_offset (FRAME_MIN_SIZE)
+ std r4, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1)
+ cfi_offset (lr, FRAME_LR_SAVE)
+ ld r3, FRAME_LR_SAVE(r11)
bl JUMPTARGET(__mcount_internal)
nop
- ld r0, 128(r1)
+ ld r0, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1)
mtlr r0
- addi r1,r1,112
+ addi r1,r1,FRAME_MIN_SIZE
blr
END(_mcount)
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/setjmp-common.S 2014-05-29 14:10:00.000000000 -0500
@@ -56,7 +56,7 @@
bugz #269. __GI__setjmp is used in csu/libc-start.c when
HAVE_CLEANUP_JMP_BUF is defined. */
ENTRY (__GI__setjmp)
- std r2,40(r1) /* Save the callers TOC in the save area. */
+ std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */
CALL_MCOUNT 1
li r4,0 /* Set second argument to 0. */
b JUMPTARGET (GLUE(__sigsetjmp,_ent))
@@ -83,7 +83,7 @@
#endif
mflr r0
#if defined SHARED && !defined IS_IN_rtld
- ld r5,40(r1) /* Retrieve the callers TOC. */
+ ld r5,FRAME_TOC_SAVE(r1) /* Retrieve the callers TOC. */
std r5,(JB_GPR2*8)(3)
#else
std r2,(JB_GPR2*8)(3)
@@ -219,14 +219,14 @@
b JUMPTARGET (__sigjmp_save)
#else
mflr r0
- std r0,16(r1)
- stdu r1,-112(r1)
- cfi_adjust_cfa_offset(112)
- cfi_offset(lr,16)
+ std r0,FRAME_LR_SAVE(r1)
+ stdu r1,-FRAME_MIN_SIZE(r1)
+ cfi_adjust_cfa_offset(FRAME_MIN_SIZE)
+ cfi_offset(lr,FRAME_LR_SAVE)
bl JUMPTARGET (__sigjmp_save)
nop
- ld r0,112+16(r1)
- addi r1,r1,112
+ ld r0,FRAME_MIN_SIZE+FRAME_LR_SAVE(r1)
+ addi r1,r1,FRAME_MIN_SIZE
mtlr r0
blr
#endif
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/sysdep.h 2014-05-29 14:10:00.000000000 -0500
@@ -20,25 +20,67 @@
#ifdef __ASSEMBLER__
+/* Stack frame offsets. */
+#if _CALL_ELF != 2
+#define FRAME_MIN_SIZE 112
+#define FRAME_MIN_SIZE_PARM 112
+#define FRAME_BACKCHAIN 0
+#define FRAME_CR_SAVE 8
+#define FRAME_LR_SAVE 16
+#define FRAME_TOC_SAVE 40
+#define FRAME_PARM_SAVE 48
+#define FRAME_PARM1_SAVE 48
+#define FRAME_PARM2_SAVE 56
+#define FRAME_PARM3_SAVE 64
+#define FRAME_PARM4_SAVE 72
+#define FRAME_PARM5_SAVE 80
+#define FRAME_PARM6_SAVE 88
+#define FRAME_PARM7_SAVE 96
+#define FRAME_PARM8_SAVE 104
+#define FRAME_PARM9_SAVE 112
+#else
+#define FRAME_MIN_SIZE 32
+#define FRAME_MIN_SIZE_PARM 96
+#define FRAME_BACKCHAIN 0
+#define FRAME_CR_SAVE 8
+#define FRAME_LR_SAVE 16
+#define FRAME_TOC_SAVE 24
+#define FRAME_PARM_SAVE 32
+#define FRAME_PARM1_SAVE 32
+#define FRAME_PARM2_SAVE 40
+#define FRAME_PARM3_SAVE 48
+#define FRAME_PARM4_SAVE 56
+#define FRAME_PARM5_SAVE 64
+#define FRAME_PARM6_SAVE 72
+#define FRAME_PARM7_SAVE 80
+#define FRAME_PARM8_SAVE 88
+#define FRAME_PARM9_SAVE 96
+#endif
+
/* Support macros for CALL_MCOUNT. */
+#if _CALL_ELF == 2
+#define call_mcount_parm_offset (-64)
+#else
+#define call_mcount_parm_offset FRAME_PARM_SAVE
+#endif
.macro SAVE_ARG NARG
.if \NARG
SAVE_ARG \NARG-1
- std 2+\NARG,40+8*(\NARG)(1)
+ std 2+\NARG,call_mcount_parm_offset-8+8*(\NARG)(1)
.endif
.endm
.macro REST_ARG NARG
.if \NARG
REST_ARG \NARG-1
- ld 2+\NARG,112+40+8*(\NARG)(1)
+ ld 2+\NARG,FRAME_MIN_SIZE_PARM+call_mcount_parm_offset-8+8*(\NARG)(1)
.endif
.endm
.macro CFI_SAVE_ARG NARG
.if \NARG
CFI_SAVE_ARG \NARG-1
- cfi_offset(2+\NARG,40+8*(\NARG))
+ cfi_offset(2+\NARG,call_mcount_parm_offset-8+8*(\NARG))
.endif
.endm
@@ -55,20 +97,20 @@
#ifdef PROF
mflr r0
SAVE_ARG \NARG
- std r0,16(r1)
- stdu r1,-112(r1)
- cfi_adjust_cfa_offset(112)
- cfi_offset(lr,16)
+ std r0,FRAME_LR_SAVE(r1)
+ stdu r1,-FRAME_MIN_SIZE_PARM(r1)
+ cfi_adjust_cfa_offset(FRAME_MIN_SIZE_PARM)
+ cfi_offset(lr,FRAME_LR_SAVE)
CFI_SAVE_ARG \NARG
bl JUMPTARGET (_mcount)
#ifndef SHARED
nop
#endif
- ld r0,128(r1)
+ ld r0,FRAME_MIN_SIZE_PARM+FRAME_LR_SAVE(r1)
REST_ARG \NARG
mtlr r0
- addi r1,r1,112
- cfi_adjust_cfa_offset(-112)
+ addi r1,r1,FRAME_MIN_SIZE_PARM
+ cfi_adjust_cfa_offset(-FRAME_MIN_SIZE_PARM)
cfi_restore(lr)
CFI_REST_ARG \NARG
#endif
@@ -267,15 +309,15 @@
.else; \
.Local_syscall_error: \
mflr 0; \
- std 0,16(1); \
- stdu 1,-112(1); \
- cfi_adjust_cfa_offset(112); \
- cfi_offset(lr,16); \
+ std 0,FRAME_LR_SAVE(1); \
+ stdu 1,-FRAME_MIN_SIZE(1); \
+ cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \
+ cfi_offset(lr,FRAME_LR_SAVE); \
bl JUMPTARGET(__syscall_error); \
nop; \
- ld 0,112+16(1); \
- addi 1,1,112; \
- cfi_adjust_cfa_offset(-112); \
+ ld 0,FRAME_MIN_SIZE+FRAME_LR_SAVE(1); \
+ addi 1,1,FRAME_MIN_SIZE; \
+ cfi_adjust_cfa_offset(-FRAME_MIN_SIZE); \
mtlr 0; \
cfi_restore(lr); \
blr; \
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/____longjmp_chk.S 2014-05-29 14:10:00.000000000 -0500
@@ -33,24 +33,24 @@
cmpld reg, r1; \
bge+ .Lok; \
mflr r0; \
- std r0,16(r1); \
+ std r0,FRAME_LR_SAVE(r1); \
mr r31,r3; \
mr r30,r4; \
- stdu r1,-144(r1); \
+ stdu r1,-FRAME_MIN_SIZE-32(r1); \
cfi_remember_state; \
- cfi_adjust_cfa_offset (144); \
- cfi_offset (lr, 16); \
+ cfi_adjust_cfa_offset (FRAME_MIN_SIZE+32); \
+ cfi_offset (lr, FRAME_LR_SAVE); \
li r3,0; \
- addi r4,r1,112; \
+ addi r4,r1,FRAME_MIN_SIZE; \
li r0,__NR_sigaltstack; \
sc; \
/* Without working sigaltstack we cannot perform the test. */ \
bso .Lok2; \
- lwz r0,112+8(r1); \
+ lwz r0,FRAME_MIN_SIZE+8(r1); \
andi. r4,r0,1; \
beq .Lfail; \
- ld r0,112+16(r1); \
- ld r4,112(r1); \
+ ld r0,FRAME_MIN_SIZE+16(r1); \
+ ld r4,FRAME_MIN_SIZE(r1); \
add r4,r4,r0; \
sub r3,r3,reg; \
cmpld r3,r0; \
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S 2014-05-29 14:10:00.000000000 -0500
@@ -31,9 +31,9 @@
CALL_MCOUNT 1
DISCARD_BOUNDS (r3) /* the bounds are meaningless, so toss 'em. */
- std r3,48(r1)
+ std r3,-8(r1)
DO_CALL(SYS_ify(brk))
- ld r6,48(r1)
+ ld r6,-8(r1)
ld r5,.LC__curbrk@toc(r2)
std r3,0(r5)
cmpld r6,r3
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S 2014-05-29 14:10:00.000000000 -0500
@@ -45,22 +45,22 @@
cror cr0*4+eq,cr1*4+eq,cr0*4+eq
beq- cr0,L(badargs)
- /* Save some regs in parm save area. */
+ /* Save some regs in the "red zone". */
#ifdef RESET_PID
- std r29,48(r1)
+ std r29,-24(r1)
#endif
- std r30,56(r1)
- std r31,64(r1)
+ std r30,-16(r1)
+ std r31,-8(r1)
#ifdef RESET_PID
- cfi_offset(r29,48)
+ cfi_offset(r29,-24)
#endif
- cfi_offset(r30,56)
- cfi_offset(r31,64)
+ cfi_offset(r30,-16)
+ cfi_offset(r31,-8)
/* Set up stack frame for child. */
clrrdi r4,r4,4
li r0,0
- stdu r0,-112(r4) /* min stack frame is 112 bytes per ABI */
+ stdu r0,-FRAME_MIN_SIZE_PARM(r4)
/* Save fn, args, stack across syscall. */
mr r30,r3 /* Function in r30. */
@@ -102,12 +102,12 @@
L(oldpid):
#endif
- std r2,40(r1)
+ std r2,FRAME_TOC_SAVE(r1)
/* Call procedure. */
PPC64_LOAD_FUNCPTR r30
mr r3,r31
bctrl
- ld r2,40(r1)
+ ld r2,FRAME_TOC_SAVE(r1)
/* Call _exit with result from procedure. */
#ifdef SHARED
b JUMPTARGET(__GI__exit)
@@ -126,15 +126,15 @@
L(parent):
/* Parent. Restore registers & return. */
#ifdef RESET_PID
- cfi_offset(r29,48)
+ cfi_offset(r29,-24)
#endif
- cfi_offset(r30,56)
- cfi_offset(r31,64)
+ cfi_offset(r30,-16)
+ cfi_offset(r31,-8)
#ifdef RESET_PID
- ld r29,48(r1)
+ ld r29,-24(r1)
#endif
- ld r30,56(r1)
- ld r31,64(r1)
+ ld r30,-16(r1)
+ ld r31,-8(r1)
#ifdef RESET_PID
cfi_restore(r29)
#endif
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S 2014-05-29 14:10:00.000000000 -0500
@@ -46,8 +46,13 @@
# endif
#endif
-#define FRAMESIZE 128
-#define stackblock FRAMESIZE+48 /* offset to parm save area. */
+#if _CALL_ELF == 2
+#define FRAMESIZE (FRAME_MIN_SIZE+16+64)
+#define stackblock (FRAME_MIN_SIZE+16)
+#else
+#define FRAMESIZE (FRAME_MIN_SIZE+16)
+#define stackblock (FRAMESIZE+FRAME_PARM_SAVE) /* offset to parm save area. */
+#endif
.text
ENTRY(__socket)
@@ -98,22 +103,22 @@
.Lsocket_cancel:
cfi_adjust_cfa_offset(FRAMESIZE)
mflr r9
- std r9,FRAMESIZE+16(r1)
- cfi_offset (lr, 16)
+ std r9,FRAMESIZE+FRAME_LR_SAVE(r1)
+ cfi_offset (lr, FRAME_LR_SAVE)
CENABLE
- std r3,120(r1)
+ std r3,FRAME_MIN_SIZE+8(r1)
li r3,P(SOCKOP_,socket)
addi r4,r1,stackblock
DO_CALL(SYS_ify(socketcall))
mfcr r0
- std r3,112(r1)
- std r0,FRAMESIZE+8(r1)
- cfi_offset (cr, 8)
- ld r3,120(r1)
+ std r3,FRAME_MIN_SIZE(r1)
+ std r0,FRAMESIZE+FRAME_CR_SAVE(r1)
+ cfi_offset (cr, FRAME_CR_SAVE)
+ ld r3,FRAME_MIN_SIZE+8(r1)
CDISABLE
- ld r4,FRAMESIZE+16(r1)
- ld r0,FRAMESIZE+8(r1)
- ld r3,112(r1)
+ ld r4,FRAMESIZE+FRAME_LR_SAVE(r1)
+ ld r0,FRAMESIZE+FRAME_CR_SAVE(r1)
+ ld r3,FRAME_MIN_SIZE(r1)
mtlr r4
mtcr r0
addi r1,r1,FRAMESIZE
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym 2014-05-29 14:09:56.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/ucontext_i.sym 2014-05-29 14:10:00.000000000 -0500
@@ -8,27 +8,6 @@
SIG_SETMASK
--- Offsets of the fields in the powerpc64 ABI stack frame.
--- XXX Do these correspond to some struct?
-
-FRAME_BACKCHAIN 0
-FRAME_CR_SAVE 8
-FRAME_LR_SAVE 16
-FRAME_COMPILER_DW 24
-FRAME_LINKER_DW 32
-FRAME_TOC_SAVE 40
-FRAME_PARM_SAVE 48
-FRAME_PARM1_SAVE 48
-FRAME_PARM2_SAVE 56
-FRAME_PARM3_SAVE 64
-FRAME_PARM4_SAVE 72
-FRAME_PARM5_SAVE 80
-FRAME_PARM6_SAVE 88
-FRAME_PARM7_SAVE 96
-FRAME_PARM8_SAVE 104
-FRAME_PARM9_SAVE 112
-
-
-- Offsets of the fields in the ucontext_t structure.
#define ucontext(member) offsetof (ucontext_t, member)
#define mcontext(member) ucontext (uc_mcontext.member)

View File

@ -0,0 +1,404 @@
# commit 61cd8fe4017c251617dd300818917e61a12ab48e
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 06:59:37 2013 -0600
#
# PowerPC64 ELFv2 ABI 5/6: LD_AUDIT interface changes
#
# The ELFv2 ABI changes the calling convention by passing and returning
# structures in registers in more cases than the old ABI:
# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01145.html
# http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01147.html
#
# For the most part, this does not affect glibc, since glibc assembler
# files do not use structure parameters / return values. However, one
# place is affected: the LD_AUDIT interface provides a structure to
# the audit routine that contains all registers holding function
# argument and return values for the intercepted PLT call.
#
# Since the new ABI now sometimes uses registers to return values
# that were never used for this purpose in the old ABI, this structure
# has to be extended. To force audit routines to be modified for the
# new ABI if necessary, the patch defines v2 variants of the la_ppc64
# types and routines.
#
# In addition, the patch contains two unrelated changes to the
# PLT trampoline routines: it fixes a bug where FPR return values
# were stored in the wrong place, and it removes the unnecessary
# save/restore of CR.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h
--- glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h 2014-05-29 14:11:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/bits/link.h 2014-05-29 14:11:20.000000000 -0500
@@ -63,7 +63,7 @@
__END_DECLS
-#else
+#elif _CALL_ELF != 2
/* Registers for entry into PLT on PPC64. */
typedef struct La_ppc64_regs
@@ -107,4 +107,48 @@
__END_DECLS
+#else
+
+/* Registers for entry into PLT on PPC64 in the ELFv2 ABI. */
+typedef struct La_ppc64v2_regs
+{
+ uint64_t lr_reg[8];
+ double lr_fp[13];
+ uint32_t __padding;
+ uint32_t lr_vrsave;
+ uint32_t lr_vreg[12][4] __attribute__ ((aligned (16)));
+ uint64_t lr_r1;
+ uint64_t lr_lr;
+} La_ppc64v2_regs;
+
+/* Return values for calls from PLT on PPC64 in the ELFv2 ABI. */
+typedef struct La_ppc64v2_retval
+{
+ uint64_t lrv_r3;
+ uint64_t lrv_r4;
+ double lrv_fp[10];
+ uint32_t lrv_vreg[8][4] __attribute__ ((aligned (16)));
+} La_ppc64v2_retval;
+
+
+__BEGIN_DECLS
+
+extern Elf64_Addr la_ppc64v2_gnu_pltenter (Elf64_Sym *__sym,
+ unsigned int __ndx,
+ uintptr_t *__refcook,
+ uintptr_t *__defcook,
+ La_ppc64v2_regs *__regs,
+ unsigned int *__flags,
+ const char *__symname,
+ long int *__framesizep);
+extern unsigned int la_ppc64v2_gnu_pltexit (Elf64_Sym *__sym,
+ unsigned int __ndx,
+ uintptr_t *__refcook,
+ uintptr_t *__defcook,
+ const La_ppc64v2_regs *__inregs,
+ La_ppc64v2_retval *__outregs,
+ const char *__symname);
+
+__END_DECLS
+
#endif
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h
--- glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h 2014-05-29 14:11:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/ldsodefs.h 2014-05-29 14:11:20.000000000 -0500
@@ -25,6 +25,8 @@
struct La_ppc32_retval;
struct La_ppc64_regs;
struct La_ppc64_retval;
+struct La_ppc64v2_regs;
+struct La_ppc64v2_retval;
#define ARCH_PLTENTER_MEMBERS \
Elf32_Addr (*ppc32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \
@@ -34,7 +36,12 @@
Elf64_Addr (*ppc64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *, \
uintptr_t *, struct La_ppc64_regs *, \
unsigned int *, const char *name, \
- long int *framesizep)
+ long int *framesizep); \
+ Elf64_Addr (*ppc64v2_gnu_pltenter) (Elf64_Sym *, unsigned int, \
+ uintptr_t *, uintptr_t *, \
+ struct La_ppc64v2_regs *, \
+ unsigned int *, const char *name, \
+ long int *framesizep)
#define ARCH_PLTEXIT_MEMBERS \
unsigned int (*ppc32_gnu_pltexit) (Elf32_Sym *, unsigned int, \
@@ -47,7 +54,14 @@
uintptr_t *, \
uintptr_t *, \
const struct La_ppc64_regs *, \
- struct La_ppc64_retval *, const char *)
+ struct La_ppc64_retval *, \
+ const char *); \
+ unsigned int (*ppc64v2_gnu_pltexit) (Elf64_Sym *, unsigned int, \
+ uintptr_t *, \
+ uintptr_t *, \
+ const struct La_ppc64v2_regs *,\
+ struct La_ppc64v2_retval *, \
+ const char *)
#include_next <ldsodefs.h>
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:11:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h 2014-05-29 14:11:20.000000000 -0500
@@ -546,8 +546,13 @@
/* Names of the architecture-specific auditing callback functions. */
+#if _CALL_ELF != 2
#define ARCH_LA_PLTENTER ppc64_gnu_pltenter
#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit
+#else
+#define ARCH_LA_PLTENTER ppc64v2_gnu_pltenter
+#define ARCH_LA_PLTEXIT ppc64v2_gnu_pltexit
+#endif
#endif /* dl_machine_h */
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:11:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S 2014-05-29 14:11:20.000000000 -0500
@@ -50,11 +50,8 @@
/* Store the LR in the LR Save area. */
std r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
cfi_offset (lr, FRAME_LR_SAVE)
- mfcr r0
std r9,INT_PARMS+48(r1)
std r10,INT_PARMS+56(r1)
-/* I'm almost certain we don't have to save cr... be safe. */
- std r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
bl JUMPTARGET(_dl_fixup)
#ifndef SHARED
nop
@@ -66,11 +63,9 @@
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
- mtcrf 0xFF,r0
/* Prepare for calling the function returned by fixup. */
PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
@@ -85,18 +80,30 @@
#undef FRAME_SIZE
#undef INT_PARMS
- /* Stack layout:
- (Note: some of these are not required for the ELFv2 ABI.)
- +592 previous backchain
- +584 spill_r31
- +576 spill_r30
- +560 v1
- +552 fp4
- +544 fp3
- +536 fp2
- +528 fp1
- +520 r4
- +512 r3
+ /* Stack layout: ELFv2 ABI.
+ +752 previous backchain
+ +744 spill_r31
+ +736 spill_r30
+ +720 v8
+ +704 v7
+ +688 v6
+ +672 v5
+ +656 v4
+ +640 v3
+ +624 v2
+ +608 v1
+ +600 fp10
+ ELFv1 ABI +592 fp9
+ +592 previous backchain +584 fp8
+ +584 spill_r31 +576 fp7
+ +576 spill_r30 +568 fp6
+ +560 v1 +560 fp5
+ +552 fp4 +552 fp4
+ +544 fp3 +544 fp3
+ +536 fp2 +536 fp2
+ +528 fp1 +528 fp1
+ +520 r4 +520 r4
+ +512 r3 +512 r3
return values
+504 free
+496 stackframe
@@ -157,10 +164,15 @@
+8 CR save area
r1+0 stack back chain
*/
-#define FRAME_SIZE 592
+#if _CALL_ELF == 2
+# define FRAME_SIZE 752
+# define VR_RTN 608
+#else
+# define FRAME_SIZE 592
+# define VR_RTN 560
+#endif
#define INT_RTN 512
#define FPR_RTN 528
-#define VR_RTN 560
#define STACK_FRAME 496
#define CALLING_LR 488
#define CALLING_SP 480
@@ -205,18 +217,14 @@
mflr r5
std r7,INT_PARMS+32(r1)
std r8,INT_PARMS+40(r1)
-/* Store the LR in the LR Save area of the previous frame. */
-/* XXX Do we have to do this? */
+/* Store the LR in the LR Save area. */
la r8,FRAME_SIZE(r1)
std r5,FRAME_SIZE+FRAME_LR_SAVE(r1)
cfi_offset (lr, FRAME_LR_SAVE)
std r5,CALLING_LR(r1)
- mfcr r0
std r9,INT_PARMS+48(r1)
std r10,INT_PARMS+56(r1)
std r8,CALLING_SP(r1)
-/* I'm almost certain we don't have to save cr... be safe. */
- std r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r12,.LC__dl_hwcap@toc(r2)
#ifdef SHARED
/* Load _rtld_local_ro._dl_hwcap. */
@@ -319,11 +327,9 @@
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
- mtcrf 0xFF,r0
/* Prepare for calling the function returned by fixup. */
PPC64_LOAD_FUNCPTR r3
ld r3,INT_PARMS+0(r1)
@@ -346,10 +352,11 @@
lfd fp12,FPR_PARMS+88(r1)
lfd fp13,FPR_PARMS+96(r1)
/* Unwind the stack frame, and jump. */
- ld r31,584(r1)
- ld r30,576(r1)
+ ld r31,FRAME_SIZE-8(r1)
+ ld r30,FRAME_SIZE-16(r1)
addi r1,r1,FRAME_SIZE
bctr
+
L(do_pltexit):
la r10,(VR_PARMS+0)(r1)
la r9,(VR_PARMS+16)(r1)
@@ -383,11 +390,9 @@
ld r8,INT_PARMS+40(r1)
ld r7,INT_PARMS+32(r1)
mtlr r0
- ld r0,FRAME_SIZE+FRAME_CR_SAVE(r1)
ld r6,INT_PARMS+24(r1)
ld r5,INT_PARMS+16(r1)
ld r4,INT_PARMS+8(r1)
- mtcrf 0xFF,r0
/* Prepare for calling the function returned by fixup. */
std r2,FRAME_TOC_SAVE(r1)
PPC64_LOAD_FUNCPTR r3
@@ -413,16 +418,37 @@
/* But return here and store the return values. */
std r3,INT_RTN(r1)
std r4,INT_RTN+8(r1)
- stfd fp1,FPR_PARMS+0(r1)
- stfd fp2,FPR_PARMS+8(r1)
+ stfd fp1,FPR_RTN+0(r1)
+ stfd fp2,FPR_RTN+8(r1)
cmpdi cr0,r12,0
la r10,VR_RTN(r1)
- stfd fp3,FPR_PARMS+16(r1)
- stfd fp4,FPR_PARMS+24(r1)
+ stfd fp3,FPR_RTN+16(r1)
+ stfd fp4,FPR_RTN+24(r1)
+#if _CALL_ELF == 2
+ la r12,VR_RTN+16(r1)
+ stfd fp5,FPR_RTN+32(r1)
+ stfd fp6,FPR_RTN+40(r1)
+ li r5,32
+ li r6,64
+ stfd fp7,FPR_RTN+48(r1)
+ stfd fp8,FPR_RTN+56(r1)
+ stfd fp9,FPR_RTN+64(r1)
+ stfd fp10,FPR_RTN+72(r1)
+#endif
mr r3,r31
mr r4,r30
beq L(callpltexit)
stvx v2,0,r10
+#if _CALL_ELF == 2
+ stvx v3,0,r12
+ stvx v4,r5,r10
+ stvx v5,r5,r12
+ addi r5,r5,64
+ stvx v6,r6,r10
+ stvx v7,r6,r12
+ stvx v8,r5,r10
+ stvx v9,r5,r12
+#endif
L(callpltexit):
addi r5,r1,INT_PARMS
addi r6,r1,INT_RTN
@@ -434,18 +460,39 @@
lwz r12,VR_VRSAVE(r1)
ld r3,INT_RTN(r1)
ld r4,INT_RTN+8(r1)
- lfd fp1,FPR_PARMS+0(r1)
- lfd fp2,FPR_PARMS+8(r1)
+ lfd fp1,FPR_RTN+0(r1)
+ lfd fp2,FPR_RTN+8(r1)
cmpdi cr0,r12,0
- la r10,VR_RTN(r1)
- lfd fp3,FPR_PARMS+16(r1)
- lfd fp4,FPR_PARMS+24(r1)
+ la r11,VR_RTN(r1)
+ lfd fp3,FPR_RTN+16(r1)
+ lfd fp4,FPR_RTN+24(r1)
+#if _CALL_ELF == 2
+ la r12,VR_RTN+16(r1)
+ lfd fp5,FPR_RTN+32(r1)
+ lfd fp6,FPR_RTN+40(r1)
+ li r30,32
+ li r31,64
+ lfd fp7,FPR_RTN+48(r1)
+ lfd fp8,FPR_RTN+56(r1)
+ lfd fp9,FPR_RTN+64(r1)
+ lfd fp10,FPR_RTN+72(r1)
+#endif
beq L(pltexitreturn)
- lvx v2,0,r10
+ lvx v2,0,r11
+#if _CALL_ELF == 2
+ lvx v3,0,r12
+ lvx v4,r30,r11
+ lvx v5,r30,r12
+ addi r30,r30,64
+ lvx v6,r31,r11
+ lvx v7,r31,r12
+ lvx v8,r30,r11
+ lvx v9,r30,r12
+#endif
L(pltexitreturn):
ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
- ld r31,584(r1)
- ld r30,576(r1)
+ ld r31,FRAME_SIZE-8(r1)
+ ld r30,FRAME_SIZE-16(r1)
mtlr r0
ld r1,0(r1)
blr
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h 2014-05-29 14:11:12.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/tst-audit.h 2014-05-29 14:11:20.000000000 -0500
@@ -18,8 +18,16 @@
License along with the GNU C Library. If not, see
<http://www.gnu.org/licenses/>. */
+#if _CALL_ELF != 2
#define pltenter la_ppc64_gnu_pltenter
#define pltexit la_ppc64_gnu_pltexit
#define La_regs La_ppc64_regs
#define La_retval La_ppc64_retval
#define int_retval lrv_r3
+#else
+#define pltenter la_ppc64v2_gnu_pltenter
+#define pltexit la_ppc64v2_gnu_pltexit
+#define La_regs La_ppc64v2_regs
+#define La_retval La_ppc64v2_retval
+#define int_retval lrv_r3
+#endif

View File

@ -0,0 +1,248 @@
# commit 5b118558f9fb0620508d51c34c2cb5ba4f1f01c2
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
# Date: Wed Dec 4 07:08:48 2013 -0600
#
# PowerPC64 ELFv2 ABI 6/6: Bump ld.so soname version number
#
# To avoid having a ELFv2 binary accidentally picking up an old ABI ld.so,
# this patch bumps the soname to ld64.so.2.
#
# In theory (or for testing purposes) this will also allow co-installing
# ld.so versions for both ABIs on the same system. Note that the kernel
# will already be able to load executables of both ABIs. However, there
# is currently no plan to use that theoretical possibility in a any
# supported distribution environment ...
#
# Note that in order to check which ABI to use, we need to invoke the
# compiler to check the _CALL_ELF macro; this is done in a new configure
# check in sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac,
# replacing the hard-coded value of default-abi in the Makefile.
#
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile 2014-05-29 14:12:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/Makefile 2014-05-29 14:12:30.000000000 -0500
@@ -1,9 +1,12 @@
-abi-variants := 32 64
+abi-variants := 32 64-v1 64-v2
abi-32-options := -U__powerpc64__
abi-32-condition := __WORDSIZE == 32
-abi-64-options := -D__powerpc64__
-abi-64-condition := __WORDSIZE == 64
-abi-64-ld-soname := ld64.so.1
+abi-64-v1-options := -D__powerpc64__ -U_CALL_ELF -D_CALL_ELF=1
+abi-64-v1-condition := __WORDSIZE == 64 && _CALL_ELF != 2
+abi-64-v1-ld-soname := ld64.so.1
+abi-64-v2-options := -D__powerpc64__ -U_CALL_ELF -D_CALL_ELF=2
+abi-64-v2-condition := __WORDSIZE == 64 && _CALL_ELF == 2
+abi-64-v2-ld-soname := ld64.so.2
ifeq ($(subdir),rt)
librt-routines += rt-sysdep
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h 2014-05-29 14:12:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/ldconfig.h 2014-05-29 14:12:30.000000000 -0500
@@ -20,7 +20,8 @@
#define SYSDEP_KNOWN_INTERPRETER_NAMES \
{ "/lib/ld.so.1", FLAG_ELF_LIBC6 }, \
- { "/lib64/ld64.so.1", FLAG_ELF_LIBC6 },
+ { "/lib64/ld64.so.1", FLAG_ELF_LIBC6 }, \
+ { "/lib64/ld64.so.2", FLAG_ELF_LIBC6 },
#define SYSDEP_KNOWN_LIBRARY_NAMES \
{ "libc.so.6", FLAG_ELF_LIBC6 }, \
{ "libm.so.6", FLAG_ELF_LIBC6 },
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile 2014-05-29 14:12:25.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/Makefile 1969-12-31 18:00:00.000000000 -0600
@@ -1,2 +0,0 @@
-# See Makeconfig regarding the use of default-abi.
-default-abi := 64
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure 1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure 2014-05-29 14:12:30.000000000 -0500
@@ -0,0 +1,166 @@
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/unix/sysv/linux/powerpc/powerpc64/.
+
+# Define default-abi according to compiler flags.
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -z "$GREP"; then
+ ac_path_GREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_GREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_GREP"; then
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ if test -z "$EGREP"; then
+ ac_path_EGREP_found=false
+ # Loop through the user's path and test for each of PROGNAME-LIST
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ $as_echo_n 0123456789 >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ $as_echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ as_fn_arith $ac_count + 1 && ac_count=$as_val
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+ $ac_path_EGREP_found && break 3
+ done
+ done
+ done
+IFS=$as_save_IFS
+ if test -z "$ac_cv_path_EGREP"; then
+ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ fi
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler is using the PowerPC64 ELFv2 ABI" >&5
+$as_echo_n "checking whether the compiler is using the PowerPC64 ELFv2 ABI... " >&6; }
+if ${libc_cv_ppc64_elfv2_abi+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#if _CALL_ELF == 2
+ yes
+ #endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "yes" >/dev/null 2>&1; then :
+ libc_cv_ppc64_elfv2_abi=yes
+else
+ libc_cv_ppc64_elfv2_abi=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_elfv2_abi" >&5
+$as_echo "$libc_cv_ppc64_elfv2_abi" >&6; }
+if test $libc_cv_ppc64_elfv2_abi = yes; then
+ config_vars="$config_vars
+default-abi = 64-v2"
+else
+ config_vars="$config_vars
+default-abi = 64-v1"
+fi
diff -urN glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac
--- glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac 1969-12-31 18:00:00.000000000 -0600
+++ glibc-2.17-c758a686/sysdeps/unix/sysv/linux/powerpc/powerpc64/configure.ac 2014-05-29 14:12:30.000000000 -0500
@@ -0,0 +1,15 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/unix/sysv/linux/powerpc/powerpc64/.
+
+# Define default-abi according to compiler flags.
+AC_CACHE_CHECK([whether the compiler is using the PowerPC64 ELFv2 ABI],
+ [libc_cv_ppc64_elfv2_abi],
+ [AC_EGREP_CPP(yes,[#if _CALL_ELF == 2
+ yes
+ #endif
+ ], libc_cv_ppc64_elfv2_abi=yes, libc_cv_ppc64_elfv2_abi=no)])
+if test $libc_cv_ppc64_elfv2_abi = yes; then
+ LIBC_CONFIG_VAR([default-abi], [64-v2])
+else
+ LIBC_CONFIG_VAR([default-abi], [64-v1])
+fi

View File

@ -0,0 +1,26 @@
# commit c859b32e9d76afe8a3f20bb9528961a573c06937
# Author: Alan Modra <amodra@gmail.com>
# Date: Tue Apr 1 14:07:42 2014 +1030
#
# Fix s_copysign stack temp for PowerPC64 ELFv2
#
# [BZ #16786]
# * sysdeps/powerpc/powerpc64/fpu/s_copysign.S: Don't trash stack.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S 2014-05-29 14:13:47.000000000 -0500
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/fpu/s_copysign.S 2014-05-29 14:13:50.000000000 -0500
@@ -27,11 +27,11 @@
/* double [f1] copysign (double [f1] x, double [f2] y);
copysign(x,y) returns a value with the magnitude of x and
with the sign bit of y. */
- stfd fp2,56(r1)
+ stfd fp2,-8(r1)
nop
nop
nop
- ld r3,56(r1)
+ ld r3,-8(r1)
cmpdi r3,0
blt L(0)
fabs fp1,fp1

View File

@ -0,0 +1,33 @@
#
# For PPC64LE only!
#
# This is fixed upstream by the removal of Versions.def
# and auto-generation of the SHLIB_COMPAT required entries.
# See: https://sourceware.org/ml/libc-alpha/2014-02/msg00818.html
# Backporting that infrastructure to RHEL 7.x is too much work
# at this junction for little reward. Instead we simply fix up
# the Versions.def to include GLIBC_2.3 which is used by
# nptl/old_pthread_atfork.c, otherwise ppc64le will get
# pthread_atfork in libpthread.so.0 when it should not.
#
# The ABI testing for libpthread.so now passes for ppc64le.
#
diff -urN glibc-2.17-c758a686/Versions.def glibc-2.17-c758a686/Versions.def
--- glibc-2.17-c758a686/Versions.def 2014-06-02 21:13:12.000000000 +0000
+++ glibc-2.17-c758a686/Versions.def 2014-06-02 21:14:38.000000000 +0000
@@ -92,6 +92,7 @@
GLIBC_2.2
GLIBC_2.2.3
GLIBC_2.2.6
+ GLIBC_2.3
GLIBC_2.3.2
GLIBC_2.3.3
GLIBC_2.3.4
@@ -99,6 +100,7 @@
GLIBC_2.6
GLIBC_2.11
GLIBC_2.12
+ GLIBC_2.17
GLIBC_PRIVATE
}
libresolv {

View File

@ -0,0 +1,22 @@
#
# On POWER this patch also fixes test-ildoubl and test-ldouble failures where tan
# rounded toward zero had acceptable 1 ULP error. Upstream is using 3 ULP, but
# we prefer to keep the bound tighter unless we have a reason not to.
#
# This is the ppc64le version which is required becuase it applies *after* another
# ppc64le patch that touches the same ULPs file. See glibc-power-libm-test-ulps.patch
# for the ppc64/ppc version.
#
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps
--- glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-07-25 22:07:06.280020855 -0400
+++ glibc-2.17-c758a686/sysdeps/powerpc/fpu/libm-test-ulps 2014-07-25 22:26:54.650021033 -0400
@@ -2644,6 +2644,9 @@
Test "tan_towardzero (2)":
ildouble: 1
ldouble: 1
+Test "tan_towardzero (2) == -2.1850398632615189916433061023136825434320":
+ildouble: 1
+ldouble: 1
Test "tan_towardzero (3) == -0.1425465430742778052956354105339134932261":
float: 1
ifloat: 1