aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-07-15 19:02:47 +0000
committerUlrich Drepper <drepper@redhat.com>2000-07-15 19:02:47 +0000
commit9aae19cd9aaf10e7d99c56f9d7d820c5b792faeb (patch)
tree16ffc7a009065b483f7a65230de3e01b6d111894 /linuxthreads
parentUpdate. (diff)
downloadglibc-9aae19cd9aaf10e7d99c56f9d7d820c5b792faeb.tar.gz
glibc-9aae19cd9aaf10e7d99c56f9d7d820c5b792faeb.tar.bz2
glibc-9aae19cd9aaf10e7d99c56f9d7d820c5b792faeb.zip
Update.
2000-05-05 H.J. Lu <hjl@gnu.org> * sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to have acquire semantics. (__compare_and_swap_with_release_semantics): New inline function. (HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro. 2000-01-28 Hans Boehm <hboehm@exch.hpl.hp.com> * manager.c: Fix the problem with signals at startup. Change the way that thread stacks are allocated on IA64. Clean up some of the guard page allocation stuff. 1999-12-19 H.J. Lu <hjl@gnu.org> * internals.h (page_roundup): New. * attr.c (__pthread_attr_setguardsize); Use page_roundup instead of roundup. * manager.c (pthread_allocate_stack): Make sure guardaddr is page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK is define. 1999-12-17 Hans Boehm <hboehm@exch.hpl.hp.com> * manager.c (pthread_allocate_stack): Unmap the stack top if failed to map the stack bottom. Fix the guard page. (pthread_free): Fix the guard page. * pthread.c (pthread_initialize): Set rlimit correctly for NEED_SEPARATE_REGISTER_STACK. 1999-12-16 H.J. Lu <hjl@gnu.org> * pthread.c (__pthread_initialize_manager): Pass __pthread_manager_thread_bos instead of __pthread_manager_thread_tos to __clone2. 1999-12-16 H.J. Lu <hjl@gnu.org> * manager.c (pthread_allocate_stack): Correct the calculation of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for stack bottom. 1999-12-13 H.J. Lu <hjl@gnu.org> * sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop bit after setting ar.ccv. 1999-12-12 H.J. Lu <hjl@gnu.org> * manager.c (pthread_allocate_stack): Make the starting address of the stack bottom page aligned. FIXME: it may need changes in other places. (pthread_handle_create): Likewise. 1999-12-11 Hans Boehm <hboehm@exch.hpl.hp.com> * manager.c (pthread_allocate_stack): Handle NEED_SEPARATE_REGISTER_STACK. (pthread_handle_create): Likewise. * pthread.c (__pthread_initialize_manager): Likewise. * sysdeps/ia64/pt-machine.h: Use r13 for thread pointer. 1999-12-02 H.J. Lu <hjl@gnu.org> * sysdeps/ia64/pt-machine.h: New.
Diffstat (limited to 'linuxthreads')
-rw-r--r--linuxthreads/ChangeLog70
-rw-r--r--linuxthreads/internals.h5
-rw-r--r--linuxthreads/manager.c165
-rw-r--r--linuxthreads/pthread.c29
-rw-r--r--linuxthreads/sysdeps/ia64/pt-machine.h106
5 files changed, 352 insertions, 23 deletions
diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog
index e205a2e3ec..10abc40329 100644
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@@ -1,3 +1,73 @@
+2000-05-05 H.J. Lu <hjl@gnu.org>
+
+ * sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to
+ have acquire semantics.
+ (__compare_and_swap_with_release_semantics): New inline
+ function.
+ (HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro.
+
+2000-01-28 Hans Boehm <hboehm@exch.hpl.hp.com>
+
+ * manager.c: Fix the problem with signals at startup.
+ Change the way that thread stacks are allocated on IA64.
+ Clean up some of the guard page allocation stuff.
+
+1999-12-19 H.J. Lu <hjl@gnu.org>
+
+ * internals.h (page_roundup): New.
+ * attr.c (__pthread_attr_setguardsize); Use page_roundup
+ instead of roundup.
+ * manager.c (pthread_allocate_stack): Make sure guardaddr is
+ page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK
+ is define.
+
+1999-12-17 Hans Boehm <hboehm@exch.hpl.hp.com>
+
+ * manager.c (pthread_allocate_stack): Unmap the stack top
+ if failed to map the stack bottom.
+ Fix the guard page.
+ (pthread_free): Fix the guard page.
+
+ * pthread.c (pthread_initialize): Set rlimit correctly for
+ NEED_SEPARATE_REGISTER_STACK.
+
+1999-12-16 H.J. Lu <hjl@gnu.org>
+
+ * pthread.c (__pthread_initialize_manager): Pass
+ __pthread_manager_thread_bos instead of
+ __pthread_manager_thread_tos to __clone2.
+
+1999-12-16 H.J. Lu <hjl@gnu.org>
+
+ * manager.c (pthread_allocate_stack): Correct the calculation
+ of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for
+ stack bottom.
+
+1999-12-13 H.J. Lu <hjl@gnu.org>
+
+ * sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop
+ bit after setting ar.ccv.
+
+1999-12-12 H.J. Lu <hjl@gnu.org>
+
+ * manager.c (pthread_allocate_stack): Make the starting
+ address of the stack bottom page aligned. FIXME: it may
+ need changes in other places.
+ (pthread_handle_create): Likewise.
+
+1999-12-11 Hans Boehm <hboehm@exch.hpl.hp.com>
+
+ * manager.c (pthread_allocate_stack): Handle
+ NEED_SEPARATE_REGISTER_STACK.
+ (pthread_handle_create): Likewise.
+ * pthread.c (__pthread_initialize_manager): Likewise.
+
+ * sysdeps/ia64/pt-machine.h: Use r13 for thread pointer.
+
+1999-12-02 H.J. Lu <hjl@gnu.org>
+
+ * sysdeps/ia64/pt-machine.h: New.
+
2000-07-13 Ulrich Drepper <drepper@redhat.com>
* wrapsyscall.c: Mark non-__ protected names as weak.
diff --git a/linuxthreads/internals.h b/linuxthreads/internals.h
index e3fbf8c521..118eecfff0 100644
--- a/linuxthreads/internals.h
+++ b/linuxthreads/internals.h
@@ -311,6 +311,11 @@ static inline int nonexisting_handle(pthread_handle h, pthread_t id)
/* Fill in defaults left unspecified by pt-machine.h. */
+/* We round up a value with page size. */
+#ifndef page_roundup
+#define page_roundup(v,p) ((((size_t) (v)) + (p) - 1) & ~((p) - 1))
+#endif
+
/* The page size we can get from the system. This should likely not be
changed by the machine file but, you never know. */
#ifndef PAGE_SIZE
diff --git a/linuxthreads/manager.c b/linuxthreads/manager.c
index 0ca172c8dd..76ef6cf9fb 100644
--- a/linuxthreads/manager.c
+++ b/linuxthreads/manager.c
@@ -82,6 +82,13 @@ static int main_thread_exiting = 0;
static pthread_t pthread_threads_counter = 0;
+#ifdef NEED_SEPARATE_REGISTER_STACK
+/* Signal masks for the manager. These have to be global only when clone2
+ is used since it's currently borken wrt signals in the child. */
+static sigset_t manager_mask; /* Manager normal signal mask */
+static sigset_t manager_mask_all; /* All bits set. */
+#endif
+
/* Forward declarations */
static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
@@ -100,7 +107,9 @@ int __pthread_manager(void *arg)
{
int reqfd = (int) (long int) arg;
struct pollfd ufd;
- sigset_t mask;
+#ifndef NEED_SEPARATE_REGISTER_STACK
+ sigset_t manager_mask;
+#endif
int n;
struct pthread_request request;
@@ -112,12 +121,15 @@ int __pthread_manager(void *arg)
__pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno;
__pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno;
/* Block all signals except __pthread_sig_cancel and SIGTRAP */
- sigfillset(&mask);
- sigdelset(&mask, __pthread_sig_cancel); /* for thread termination */
- sigdelset(&mask, SIGTRAP); /* for debugging purposes */
+ sigfillset(&manager_mask);
+ sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
+ sigdelset(&manager_mask, SIGTRAP); /* for debugging purposes */
if (__pthread_threads_debug && __pthread_sig_debug > 0)
- sigdelset(&mask, __pthread_sig_debug);
- sigprocmask(SIG_SETMASK, &mask, NULL);
+ sigdelset(&manager_mask, __pthread_sig_debug);
+ sigprocmask(SIG_SETMASK, &manager_mask, NULL);
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ sigfillset(&manager_mask_all);
+#endif
/* Raise our priority to match that of main thread */
__pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
/* Synchronize debugging of the thread manager */
@@ -294,7 +306,16 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
if (attr != NULL && attr->__stackaddr_set)
{
- /* The user provided a stack. */
+ /* The user provided a stack. For now we interpret the supplied
+ address as 1 + the highest addr. in the stack segment. If a
+ separate register stack is needed, we place it at the low end
+ of the segment, relying on the associated stacksize to
+ determine the low end of the segment. This differs from many
+ (but not all) other pthreads implementations. The intent is
+ that on machines with a single stack growing toward higher
+ addresses, stackaddr would be the lowest address in the stack
+ segment, so that it is consistently close to the initial sp
+ value. */
new_thread =
(pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
@@ -304,11 +325,57 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
}
else
{
- stacksize = STACK_SIZE - pagesize;
- if (attr != NULL)
- stacksize = MIN (stacksize, roundup(attr->__stacksize, pagesize));
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ size_t granularity = 2 * pagesize;
+ /* Try to make stacksize/2 a multiple of pagesize */
+#else
+ size_t granularity = pagesize;
+#endif
/* Allocate space for stack and thread descriptor at default address */
+ if (attr != NULL)
+ {
+ guardsize = page_roundup (attr->__guardsize, granularity);
+ stacksize = STACK_SIZE - guardsize;
+ stacksize = MIN (stacksize,
+ page_roundup (attr->__stacksize, granularity));
+ }
+ else
+ {
+ guardsize = granularity;
+ stacksize = STACK_SIZE - granularity;
+ }
new_thread = default_new_thread;
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
+ /* Includes guard area, unlike the normal case. Use the bottom
+ end of the segment as backing store for the register stack.
+ Needed on IA64. In this case, we also map the entire stack at
+ once. According to David Mosberger, that's cheaper. It also
+ avoids the risk of intermittent failures due to other mappings
+ in the same region. The cost is that we might be able to map
+ slightly fewer stacks. */
+
+ /* First the main stack: */
+ if (mmap((caddr_t)((char *)(new_thread + 1) - stacksize / 2),
+ stacksize / 2, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
+ == MAP_FAILED)
+ /* Bad luck, this segment is already mapped. */
+ return -1;
+ /* Then the register stack: */
+ if (mmap((caddr_t)new_thread_bottom, stacksize/2,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
+ == MAP_FAILED)
+ {
+ munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
+ stacksize/2);
+ return -1;
+ }
+
+ guardaddr = new_thread_bottom + stacksize/2;
+ /* We leave the guard area in the middle unmapped. */
+#else /* !NEED_SEPARATE_REGISTER_STACK */
new_thread_bottom = (char *) (new_thread + 1) - stacksize;
if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE),
INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
@@ -317,10 +384,10 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
/* Bad luck, this segment is already mapped. */
return -1;
/* We manage to get a stack. Now see whether we need a guard
- and allocate it if necessary. Notice that the default
- attributes (stack_size = STACK_SIZE - pagesize) do not need
- a guard page, since the RLIMIT_STACK soft limit prevents stacks
- from running into one another. */
+ and allocate it if necessary. Notice that the default
+ attributes (stack_size = STACK_SIZE - pagesize and guardsize
+ = pagesize) do not need a guard page, since the RLIMIT_STACK
+ soft limit prevents stacks from running into one another. */
if (stacksize == STACK_SIZE - pagesize)
{
/* We don't need a guard page. */
@@ -330,7 +397,6 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
else
{
/* Put a bad page at the bottom of the stack */
- guardsize = attr->__guardsize;
guardaddr = (void *)new_thread_bottom - guardsize;
if (mmap ((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0)
== MAP_FAILED)
@@ -340,6 +406,7 @@ static int pthread_allocate_stack(const pthread_attr_t *attr,
guardsize = 0;
}
}
+#endif /* !NEED_SEPARATE_REGISTER_STACK */
}
/* Clear the thread data structure. */
memset (new_thread, '\0', sizeof (*new_thread));
@@ -452,9 +519,30 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
__pthread_lock(new_thread->p_lock, NULL);
/* We have to report this event. */
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ /* Perhaps this version should be used on all platforms. But
+ this requires that __clone2 be uniformly supported
+ everywhere.
+
+ And there is some argument for changing the __clone2
+ interface to pass sp and bsp instead, making it more IA64
+ specific, but allowing stacks to grow outward from each
+ other, to get less paging and fewer mmaps. Clone2
+ currently can't take signals in the child right after
+ process creation. Mask them in the child. It resets the
+ mask once it starts up. */
+ sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
+ pid = __clone2(pthread_start_thread_event,
+ (void **)new_thread_bottom,
+ (char *)new_thread - new_thread_bottom,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
+ __pthread_sig_cancel, new_thread);
+ sigprocmask(SIG_SETMASK, &manager_mask, NULL);
+#else
pid = __clone(pthread_start_thread_event, (void **) new_thread,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
__pthread_sig_cancel, new_thread);
+#endif
if (pid != -1)
{
/* Now fill in the information about the new thread in
@@ -479,18 +567,38 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
}
}
if (pid == 0)
- pid = __clone(pthread_start_thread, (void **) new_thread,
- CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
- __pthread_sig_cancel, new_thread);
+ {
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ sigprocmask(SIG_SETMASK, &manager_mask_all, NULL);
+ pid = __clone2(pthread_start_thread,
+ (void **)new_thread_bottom,
+ (char *)new_thread - new_thread_bottom,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
+ __pthread_sig_cancel, new_thread);
+ sigprocmask(SIG_SETMASK, &manager_mask, NULL);
+#else
+ pid = __clone(pthread_start_thread, (void **) new_thread,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
+ __pthread_sig_cancel, new_thread);
+#endif /* !NEED_SEPARATE_REGISTER_STACK */
+ }
/* Check if cloning succeeded */
if (pid == -1) {
/* Free the stack if we allocated it */
if (attr == NULL || !attr->__stackaddr_set)
{
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ size_t stacksize = ((char *)(new_thread->p_guardaddr)
+ - new_thread_bottom);
+ munmap((caddr_t)new_thread_bottom, stacksize);
+ munmap((caddr_t)new_thread_bottom + stacksize
+ + new_thread->p_guardsize, stacksize);
+#else
if (new_thread->p_guardsize != 0)
munmap(new_thread->p_guardaddr, new_thread->p_guardsize);
munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE),
INITIAL_STACK_SIZE);
+#endif
}
__pthread_handles[sseg].h_descr = NULL;
__pthread_handles[sseg].h_bottom = NULL;
@@ -550,10 +658,27 @@ static void pthread_free(pthread_descr th)
if (th == &__pthread_initial_thread) return;
if (!th->p_userstack)
{
+ size_t guardsize = th->p_guardsize;
/* Free the stack and thread descriptor area */
- if (th->p_guardsize != 0)
- munmap(th->p_guardaddr, th->p_guardsize);
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ char *guardaddr = th->p_guardaddr;
+ /* We unmap exactly what we mapped, in case there was something
+ else in the same region. Guardaddr is always set, eve if
+ guardsize is 0. This allows us to compute everything else. */
+ size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
+ /* Unmap the register stack, which is below guardaddr. */
+ munmap((caddr_t)(guardaddr-stacksize), stacksize);
+ /* Unmap the main stack. */
+ munmap((caddr_t)(guardaddr+guardsize), stacksize);
+#else
+ /* The following assumes that we only allocate stacks of one
+ size. That's currently true but probably shouldn't be. This
+ looks like it fails for growing stacks if there was something
+ else mapped just below the stack? */
+ if (guardsize != 0)
+ munmap(th->p_guardaddr, guardsize);
munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE);
+#endif
}
}
diff --git a/linuxthreads/pthread.c b/linuxthreads/pthread.c
index 2700a29fb1..d70e3f4b1f 100644
--- a/linuxthreads/pthread.c
+++ b/linuxthreads/pthread.c
@@ -362,7 +362,13 @@ static void pthread_initialize(void)
/* Play with the stack size limit to make sure that no stack ever grows
beyond STACK_SIZE minus one page (to act as a guard page). */
getrlimit(RLIMIT_STACK, &limit);
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ /* STACK_SIZE bytes hold both the main stack and register backing
+ store. The rlimit value applies to each individually. */
+ max_stack = STACK_SIZE/2 - __getpagesize();
+#else
max_stack = STACK_SIZE - __getpagesize();
+#endif
if (limit.rlim_cur > max_stack) {
limit.rlim_cur = max_stack;
setrlimit(RLIMIT_STACK, &limit);
@@ -444,10 +450,18 @@ int __pthread_initialize_manager(void)
| __pthread_initial_thread.p_eventbuf.eventmask.event_bits[idx]))
!= 0)
{
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ pid = __clone2(__pthread_manager_event,
+ (void **) __pthread_manager_thread_bos,
+ THREAD_MANAGER_STACK_SIZE,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
+ (void *)(long)manager_pipe[0]);
+#else
pid = __clone(__pthread_manager_event,
(void **) __pthread_manager_thread_tos,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
(void *)(long)manager_pipe[0]);
+#endif
if (pid != -1)
{
@@ -472,9 +486,18 @@ int __pthread_initialize_manager(void)
}
if (pid == 0)
- pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos,
- CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
- (void *)(long)manager_pipe[0]);
+ {
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ pid = __clone2(__pthread_manager, (void **) __pthread_manager_thread_bos,
+ THREAD_MANAGER_STACK_SIZE,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
+ (void *)(long)manager_pipe[0]);
+#else
+ pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos,
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND,
+ (void *)(long)manager_pipe[0]);
+#endif
+ }
if (pid == -1) {
free(__pthread_manager_thread_bos);
__libc_close(manager_pipe[0]);
diff --git a/linuxthreads/sysdeps/ia64/pt-machine.h b/linuxthreads/sysdeps/ia64/pt-machine.h
new file mode 100644
index 0000000000..58cccc2962
--- /dev/null
+++ b/linuxthreads/sysdeps/ia64/pt-machine.h
@@ -0,0 +1,106 @@
+/* Machine-dependent pthreads configuration and inline functions.
+ IA-64 version.
+ Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef PT_EI
+# define PT_EI extern inline
+#endif
+
+/* Make sure gcc doesn't try to be clever and move things around on
+ us. We need to use _exactly_ the address the user gave us, not some
+ alias that contains the same information. */
+#define __atomic_fool_gcc(x) (*(volatile struct { int a[100]; } *)x)
+
+#ifndef ELF_MACHINE_NAME
+
+#define NEED_SEPARATE_REGISTER_STACK
+
+/* Get some notion of the current stack. Need not be exactly the top
+ of the stack, just something somewhere in the current frame.
+ r12 (sp) is the stack pointer. */
+#define CURRENT_STACK_FRAME stack_pointer
+register char *stack_pointer __asm__ ("sp");
+
+
+/* Register r13 (tp) is reserved by the ABI as "thread pointer". */
+struct _pthread_descr_struct;
+register struct _pthread_descr_struct *__thread_self __asm__("r13");
+
+/* Return the thread descriptor for the current thread. */
+#define THREAD_SELF __thread_self
+
+/* Initialize the thread-unique value. */
+#define INIT_THREAD_SELF(descr, nr) (__thread_self = (descr))
+
+
+/* Access to data in the thread descriptor is easy. */
+#define THREAD_GETMEM(descr, member) __thread_self->member
+#define THREAD_GETMEM_NC(descr, member) __thread_self->member
+#define THREAD_SETMEM(descr, member, value) __thread_self->member = (value)
+#define THREAD_SETMEM_NC(descr, member, value) __thread_self->member = (value)
+
+
+#define HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS
+
+PT_EI long int
+__compare_and_swap (long int *p, long int oldval, long int newval)
+{
+ long int readval;
+
+ __asm__ __volatile__
+ ("mov ar.ccv=%4;;\n\t"
+ "cmpxchg8.acq %0=%1,%2,ar.ccv"
+ : "=r" (readval), "=m" (__atomic_fool_gcc (p))
+ : "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval)
+ : "memory");
+ return readval == oldval;
+}
+
+PT_EI long int
+__compare_and_swap_with_release_semantics (long int *p,
+ long int oldval,
+ long int newval)
+{
+ long int readval;
+
+ __asm__ __volatile__
+ ("mov ar.ccv=%4;;\n\t"
+ "cmpxchg8.rel %0=%1,%2,ar.ccv"
+ : "=r" (readval), "=m" (__atomic_fool_gcc (p))
+ : "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval)
+ : "memory");
+ return readval == oldval;
+}
+
+#endif /* ELF_MACHINE_NAME */
+
+/* Spinlock implementation; required. */
+PT_EI long int
+testandset (int *spinlock)
+{
+ long int ret;
+
+ __asm__ __volatile__(
+ "xchg4 %0=%1,%2"
+ : "=r"(ret), "=m"(__atomic_fool_gcc (spinlock))
+ : "r"(1), "1"(__atomic_fool_gcc (spinlock))
+ : "memory");
+
+ return ret;
+}