| /* |
| * Copyright © 2009 CNRS |
| * Copyright © 2009-2011 INRIA. All rights reserved. |
| * Copyright © 2009-2011 Université Bordeaux 1 |
| * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. |
| * Copyright © 2010 IBM |
| * See COPYING in top-level directory. |
| */ |
| |
| #include <private/autogen/config.h> |
| #include <hwloc.h> |
| #include <hwloc/linux.h> |
| #include <private/misc.h> |
| #include <private/private.h> |
| #include <private/misc.h> |
| #include <private/debug.h> |
| |
| #include <limits.h> |
| #include <stdio.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| #include <assert.h> |
| #include <dirent.h> |
| #include <unistd.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sched.h> |
| #include <pthread.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND |
| #define migratepages migrate_pages /* workaround broken migratepages prototype in numaif.h before libnuma 2.0.2 */ |
| #include <numaif.h> |
| #endif |
| |
| #if !defined(HWLOC_HAVE_CPU_SET) && !(defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)) && defined(HWLOC_HAVE__SYSCALL3) |
| /* libc doesn't have support for sched_setaffinity, build system call |
| * ourselves: */ |
| # include <linux/unistd.h> |
| # ifndef __NR_sched_setaffinity |
| # ifdef __i386__ |
| # define __NR_sched_setaffinity 241 |
| # elif defined(__x86_64__) |
| # define __NR_sched_setaffinity 203 |
| # elif defined(__ia64__) |
| # define __NR_sched_setaffinity 1231 |
| # elif defined(__hppa__) |
| # define __NR_sched_setaffinity 211 |
| # elif defined(__alpha__) |
| # define __NR_sched_setaffinity 395 |
| # elif defined(__s390__) |
| # define __NR_sched_setaffinity 239 |
| # elif defined(__sparc__) |
| # define __NR_sched_setaffinity 261 |
| # elif defined(__m68k__) |
| # define __NR_sched_setaffinity 311 |
| # elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__) |
| # define __NR_sched_setaffinity 222 |
| # elif defined(__arm__) |
| # define __NR_sched_setaffinity 241 |
| # elif defined(__cris__) |
| # define __NR_sched_setaffinity 241 |
| /*# elif defined(__mips__) |
| # define __NR_sched_setaffinity TODO (32/64/nabi) */ |
| # else |
| # warning "don't know the syscall number for sched_setaffinity on this architecture, will not support binding" |
| # define sched_setaffinity(pid, lg, mask) (errno = ENOSYS, -1) |
| # endif |
| # endif |
| # ifndef sched_setaffinity |
| _syscall3(int, sched_setaffinity, pid_t, pid, unsigned int, lg, const void *, mask) |
| # endif |
| # ifndef __NR_sched_getaffinity |
| # ifdef __i386__ |
| # define __NR_sched_getaffinity 242 |
| # elif defined(__x86_64__) |
| # define __NR_sched_getaffinity 204 |
| # elif defined(__ia64__) |
| # define __NR_sched_getaffinity 1232 |
| # elif defined(__hppa__) |
| # define __NR_sched_getaffinity 212 |
| # elif defined(__alpha__) |
| # define __NR_sched_getaffinity 396 |
| # elif defined(__s390__) |
| # define __NR_sched_getaffinity 240 |
| # elif defined(__sparc__) |
| # define __NR_sched_getaffinity 260 |
| # elif defined(__m68k__) |
| # define __NR_sched_getaffinity 312 |
| # elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__) |
| # define __NR_sched_getaffinity 223 |
| # elif defined(__arm__) |
| # define __NR_sched_getaffinity 242 |
| # elif defined(__cris__) |
| # define __NR_sched_getaffinity 242 |
| /*# elif defined(__mips__) |
| # define __NR_sched_getaffinity TODO (32/64/nabi) */ |
| # else |
| # warning "don't know the syscall number for sched_getaffinity on this architecture, will not support getting binding" |
| # define sched_getaffinity(pid, lg, mask) (errno = ENOSYS, -1) |
| # endif |
| # endif |
| # ifndef sched_getaffinity |
| _syscall3(int, sched_getaffinity, pid_t, pid, unsigned int, lg, void *, mask) |
| # endif |
| #endif |
| |
| /* Added for ntohl() */ |
| #include <arpa/inet.h> |
| |
| #ifdef HAVE_OPENAT |
| /* Use our own filesystem functions if we have openat */ |
| |
| static const char * |
| hwloc_checkat(const char *path, int fsroot_fd) |
| { |
| const char *relative_path; |
| if (fsroot_fd < 0) { |
| errno = EBADF; |
| return NULL; |
| } |
| |
| /* Skip leading slashes. */ |
| for (relative_path = path; *relative_path == '/'; relative_path++); |
| |
| return relative_path; |
| } |
| |
| static int |
| hwloc_openat(const char *path, int fsroot_fd) |
| { |
| const char *relative_path; |
| |
| relative_path = hwloc_checkat(path, fsroot_fd); |
| if (!relative_path) |
| return -1; |
| |
| return openat (fsroot_fd, relative_path, O_RDONLY); |
| } |
| |
| static FILE * |
| hwloc_fopenat(const char *path, const char *mode, int fsroot_fd) |
| { |
| int fd; |
| |
| if (strcmp(mode, "r")) { |
| errno = ENOTSUP; |
| return NULL; |
| } |
| |
| fd = hwloc_openat (path, fsroot_fd); |
| if (fd == -1) |
| return NULL; |
| |
| return fdopen(fd, mode); |
| } |
| |
| static int |
| hwloc_accessat(const char *path, int mode, int fsroot_fd) |
| { |
| const char *relative_path; |
| |
| relative_path = hwloc_checkat(path, fsroot_fd); |
| if (!relative_path) |
| return -1; |
| |
| return faccessat(fsroot_fd, relative_path, mode, 0); |
| } |
| |
| static int |
| hwloc_fstatat(const char *path, struct stat *st, int flags, int fsroot_fd) |
| { |
| const char *relative_path; |
| |
| relative_path = hwloc_checkat(path, fsroot_fd); |
| if (!relative_path) |
| return -1; |
| |
| return fstatat(fsroot_fd, relative_path, st, flags); |
| } |
| |
| static DIR* |
| hwloc_opendirat(const char *path, int fsroot_fd) |
| { |
| int dir_fd; |
| const char *relative_path; |
| |
| relative_path = hwloc_checkat(path, fsroot_fd); |
| if (!relative_path) |
| return NULL; |
| |
| dir_fd = openat(fsroot_fd, relative_path, O_RDONLY | O_DIRECTORY); |
| if (dir_fd < 0) |
| return NULL; |
| |
| return fdopendir(dir_fd); |
| } |
| |
| #endif /* HAVE_OPENAT */ |
| |
| /* Static inline version of fopen so that we can use openat if we have |
| it, but still preserve compiler parameter checking */ |
| static __hwloc_inline int |
| hwloc_open(const char *p, int d __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| return hwloc_openat(p, d); |
| #else |
| return open(p, O_RDONLY); |
| #endif |
| } |
| |
| static __hwloc_inline FILE * |
| hwloc_fopen(const char *p, const char *m, int d __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| return hwloc_fopenat(p, m, d); |
| #else |
| return fopen(p, m); |
| #endif |
| } |
| |
| /* Static inline version of access so that we can use openat if we have |
| it, but still preserve compiler parameter checking */ |
| static __hwloc_inline int |
| hwloc_access(const char *p, int m, int d __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| return hwloc_accessat(p, m, d); |
| #else |
| return access(p, m); |
| #endif |
| } |
| |
| static __hwloc_inline int |
| hwloc_stat(const char *p, struct stat *st, int d __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| return hwloc_fstatat(p, st, 0, d); |
| #else |
| return stat(p, st); |
| #endif |
| } |
| |
| /* Static inline version of opendir so that we can use openat if we have |
| it, but still preserve compiler parameter checking */ |
| static __hwloc_inline DIR * |
| hwloc_opendir(const char *p, int d __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| return hwloc_opendirat(p, d); |
| #else |
| return opendir(p); |
| #endif |
| } |
| |
| int |
| hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set __hwloc_attribute_unused) |
| { |
| /* TODO Kerrighed: Use |
| * int migrate (pid_t pid, int destination_node); |
| * int migrate_self (int destination_node); |
| * int thread_migrate (int thread_id, int destination_node); |
| */ |
| |
| /* The resulting binding is always strict */ |
| |
| #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) |
| cpu_set_t *plinux_set; |
| unsigned cpu; |
| int last; |
| size_t setsize; |
| int err; |
| |
| last = hwloc_bitmap_last(hwloc_set); |
| if (last == -1) { |
| errno = EINVAL; |
| return -1; |
| } |
| |
| setsize = CPU_ALLOC_SIZE(last+1); |
| plinux_set = CPU_ALLOC(last+1); |
| |
| CPU_ZERO_S(setsize, plinux_set); |
| hwloc_bitmap_foreach_begin(cpu, hwloc_set) |
| CPU_SET_S(cpu, setsize, plinux_set); |
| hwloc_bitmap_foreach_end(); |
| |
| err = sched_setaffinity(tid, setsize, plinux_set); |
| |
| CPU_FREE(plinux_set); |
| return err; |
| #elif defined(HWLOC_HAVE_CPU_SET) |
| cpu_set_t linux_set; |
| unsigned cpu; |
| |
| CPU_ZERO(&linux_set); |
| hwloc_bitmap_foreach_begin(cpu, hwloc_set) |
| CPU_SET(cpu, &linux_set); |
| hwloc_bitmap_foreach_end(); |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| return sched_setaffinity(tid, &linux_set); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| return sched_setaffinity(tid, sizeof(linux_set), &linux_set); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| #elif defined(HWLOC_HAVE__SYSCALL3) |
| unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set); |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| return sched_setaffinity(tid, (void*) &mask); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| return sched_setaffinity(tid, sizeof(mask), (void*) &mask); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| #else /* !_SYSCALL3 */ |
| errno = ENOSYS; |
| return -1; |
| #endif /* !_SYSCALL3 */ |
| } |
| |
| #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) |
| /* |
| * On some kernels, sched_getaffinity requires the output size to be larger |
| * than the kernel cpu_set size (defined by CONFIG_NR_CPUS). |
| * Try sched_affinity on ourself until we find a nr_cpus value that makes |
| * the kernel happy. |
| */ |
| static int |
| hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology) |
| { |
| static int nr_cpus = -1; |
| |
| if (nr_cpus != -1) |
| /* already computed */ |
| return nr_cpus; |
| |
| /* start with a nr_cpus that may contain the whole topology */ |
| nr_cpus = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset) + 1; |
| while (1) { |
| cpu_set_t *set = CPU_ALLOC(nr_cpus); |
| size_t setsize = CPU_ALLOC_SIZE(nr_cpus); |
| int err = sched_getaffinity(0, setsize, set); /* always works, unless setsize is too small */ |
| CPU_FREE(set); |
| if (!err) |
| /* found it */ |
| return nr_cpus; |
| nr_cpus *= 2; |
| } |
| } |
| #endif |
| |
| int |
| hwloc_linux_get_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_bitmap_t hwloc_set __hwloc_attribute_unused) |
| { |
| int err __hwloc_attribute_unused; |
| /* TODO Kerrighed */ |
| |
| #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) |
| cpu_set_t *plinux_set; |
| unsigned cpu; |
| int last; |
| size_t setsize; |
| int kernel_nr_cpus; |
| |
| /* find the kernel nr_cpus so as to use a large enough cpu_set size */ |
| kernel_nr_cpus = hwloc_linux_find_kernel_nr_cpus(topology); |
| setsize = CPU_ALLOC_SIZE(kernel_nr_cpus); |
| plinux_set = CPU_ALLOC(kernel_nr_cpus); |
| |
| err = sched_getaffinity(tid, setsize, plinux_set); |
| |
| if (err < 0) { |
| CPU_FREE(plinux_set); |
| return -1; |
| } |
| |
| last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset); |
| assert(last != -1); |
| |
| hwloc_bitmap_zero(hwloc_set); |
| for(cpu=0; cpu<=(unsigned) last; cpu++) |
| if (CPU_ISSET_S(cpu, setsize, plinux_set)) |
| hwloc_bitmap_set(hwloc_set, cpu); |
| |
| CPU_FREE(plinux_set); |
| #elif defined(HWLOC_HAVE_CPU_SET) |
| cpu_set_t linux_set; |
| unsigned cpu; |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = sched_getaffinity(tid, &linux_set); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = sched_getaffinity(tid, sizeof(linux_set), &linux_set); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| if (err < 0) |
| return -1; |
| |
| hwloc_bitmap_zero(hwloc_set); |
| for(cpu=0; cpu<CPU_SETSIZE; cpu++) |
| if (CPU_ISSET(cpu, &linux_set)) |
| hwloc_bitmap_set(hwloc_set, cpu); |
| #elif defined(HWLOC_HAVE__SYSCALL3) |
| unsigned long mask; |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = sched_getaffinity(tid, (void*) &mask); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = sched_getaffinity(tid, sizeof(mask), (void*) &mask); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| if (err < 0) |
| return -1; |
| |
| hwloc_bitmap_from_ulong(hwloc_set, mask); |
| #else /* !_SYSCALL3 */ |
| errno = ENOSYS; |
| return -1; |
| #endif /* !_SYSCALL3 */ |
| |
| return 0; |
| } |
| |
| /* Get the array of tids of a process from the task directory in /proc */ |
| static int |
| hwloc_linux_get_proc_tids(DIR *taskdir, unsigned *nr_tidsp, pid_t ** tidsp) |
| { |
| struct dirent *dirent; |
| unsigned nr_tids = 0; |
| unsigned max_tids = 32; |
| pid_t *tids; |
| struct stat sb; |
| |
| /* take the number of links as a good estimate for the number of tids */ |
| if (fstat(dirfd(taskdir), &sb) == 0) |
| max_tids = sb.st_nlink; |
| |
| tids = malloc(max_tids*sizeof(pid_t)); |
| if (!tids) { |
| errno = ENOMEM; |
| return -1; |
| } |
| |
| rewinddir(taskdir); |
| |
| while ((dirent = readdir(taskdir)) != NULL) { |
| if (nr_tids == max_tids) { |
| pid_t *newtids; |
| max_tids += 8; |
| newtids = realloc(tids, max_tids*sizeof(pid_t)); |
| if (!newtids) { |
| free(tids); |
| errno = ENOMEM; |
| return -1; |
| } |
| tids = newtids; |
| } |
| if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..")) |
| continue; |
| tids[nr_tids++] = atoi(dirent->d_name); |
| } |
| |
| *nr_tidsp = nr_tids; |
| *tidsp = tids; |
| return 0; |
| } |
| |
| /* Callbacks for binding each process sub-tid */ |
| typedef int (*hwloc_linux_foreach_proc_tid_cb_t)(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags); |
| |
| static int |
| hwloc_linux_foreach_proc_tid_set_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx __hwloc_attribute_unused, int flags __hwloc_attribute_unused) |
| { |
| hwloc_bitmap_t cpuset = data; |
| return hwloc_linux_set_tid_cpubind(topology, tid, cpuset); |
| } |
| |
| static int |
| hwloc_linux_foreach_proc_tid_get_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags) |
| { |
| hwloc_bitmap_t *cpusets = data; |
| hwloc_bitmap_t cpuset = cpusets[0]; |
| hwloc_bitmap_t tidset = cpusets[1]; |
| |
| if (hwloc_linux_get_tid_cpubind(topology, tid, tidset)) |
| return -1; |
| |
| /* reset the cpuset on first iteration */ |
| if (!idx) |
| hwloc_bitmap_zero(cpuset); |
| |
| if (flags & HWLOC_CPUBIND_STRICT) { |
| /* if STRICT, we want all threads to have the same binding */ |
| if (!idx) { |
| /* this is the first thread, copy its binding */ |
| hwloc_bitmap_copy(cpuset, tidset); |
| } else if (!hwloc_bitmap_isequal(cpuset, tidset)) { |
| /* this is not the first thread, and it's binding is different */ |
| errno = EXDEV; |
| return -1; |
| } |
| } else { |
| /* if not STRICT, just OR all thread bindings */ |
| hwloc_bitmap_or(cpuset, cpuset, tidset); |
| } |
| return 0; |
| } |
| |
| /* Call the callback for each process tid. */ |
| static int |
| hwloc_linux_foreach_proc_tid(hwloc_topology_t topology, |
| pid_t pid, hwloc_linux_foreach_proc_tid_cb_t cb, |
| void *data, int flags) |
| { |
| char taskdir_path[128]; |
| DIR *taskdir; |
| pid_t *tids, *newtids; |
| unsigned i, nr, newnr; |
| int err; |
| |
| if (pid) |
| snprintf(taskdir_path, sizeof(taskdir_path), "/proc/%u/task", (unsigned) pid); |
| else |
| snprintf(taskdir_path, sizeof(taskdir_path), "/proc/self/task"); |
| |
| taskdir = opendir(taskdir_path); |
| if (!taskdir) { |
| errno = ENOSYS; |
| err = -1; |
| goto out; |
| } |
| |
| /* read the current list of threads */ |
| err = hwloc_linux_get_proc_tids(taskdir, &nr, &tids); |
| if (err < 0) |
| goto out_with_dir; |
| |
| retry: |
| /* apply the callback to all threads */ |
| for(i=0; i<nr; i++) { |
| err = cb(topology, tids[i], data, i, flags); |
| if (err < 0) |
| goto out_with_tids; |
| } |
| |
| /* re-read the list of thread and retry if it changed in the meantime */ |
| err = hwloc_linux_get_proc_tids(taskdir, &newnr, &newtids); |
| if (err < 0) |
| goto out_with_tids; |
| if (newnr != nr || memcmp(newtids, tids, nr*sizeof(pid_t))) { |
| free(tids); |
| tids = newtids; |
| nr = newnr; |
| goto retry; |
| } |
| |
| err = 0; |
| free(newtids); |
| out_with_tids: |
| free(tids); |
| out_with_dir: |
| closedir(taskdir); |
| out: |
| return err; |
| } |
| |
| static int |
| hwloc_linux_set_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags) |
| { |
| return hwloc_linux_foreach_proc_tid(topology, pid, |
| hwloc_linux_foreach_proc_tid_set_cpubind_cb, |
| (void*) hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_get_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| hwloc_bitmap_t tidset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_t cpusets[2]; |
| int ret; |
| |
| cpusets[0] = hwloc_set; |
| cpusets[1] = tidset; |
| ret = hwloc_linux_foreach_proc_tid(topology, pid, |
| hwloc_linux_foreach_proc_tid_get_cpubind_cb, |
| (void*) cpusets, flags); |
| hwloc_bitmap_free(tidset); |
| return ret; |
| } |
| |
| static int |
| hwloc_linux_set_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags) |
| { |
| if (pid == 0) |
| pid = topology->pid; |
| if (flags & HWLOC_CPUBIND_THREAD) |
| return hwloc_linux_set_tid_cpubind(topology, pid, hwloc_set); |
| else |
| return hwloc_linux_set_pid_cpubind(topology, pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_get_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| if (pid == 0) |
| pid = topology->pid; |
| if (flags & HWLOC_CPUBIND_THREAD) |
| return hwloc_linux_get_tid_cpubind(topology, pid, hwloc_set); |
| else |
| return hwloc_linux_get_pid_cpubind(topology, pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) |
| { |
| return hwloc_linux_set_pid_cpubind(topology, topology->pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| return hwloc_linux_get_pid_cpubind(topology, topology->pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) |
| { |
| if (topology->pid) { |
| errno = ENOSYS; |
| return -1; |
| } |
| return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set); |
| } |
| |
| static int |
| hwloc_linux_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) |
| { |
| if (topology->pid) { |
| errno = ENOSYS; |
| return -1; |
| } |
| return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set); |
| } |
| |
| #if HAVE_DECL_PTHREAD_SETAFFINITY_NP |
| #pragma weak pthread_setaffinity_np |
| #pragma weak pthread_self |
| |
| static int |
| hwloc_linux_set_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) |
| { |
| int err; |
| |
| if (topology->pid) { |
| errno = ENOSYS; |
| return -1; |
| } |
| |
| if (!pthread_self) { |
| /* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */ |
| errno = ENOSYS; |
| return -1; |
| } |
| if (tid == pthread_self()) |
| return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set); |
| |
| if (!pthread_setaffinity_np) { |
| errno = ENOSYS; |
| return -1; |
| } |
| /* TODO Kerrighed: Use |
| * int migrate (pid_t pid, int destination_node); |
| * int migrate_self (int destination_node); |
| * int thread_migrate (int thread_id, int destination_node); |
| */ |
| |
| #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| cpu_set_t *plinux_set; |
| unsigned cpu; |
| int last; |
| size_t setsize; |
| |
| last = hwloc_bitmap_last(hwloc_set); |
| if (last == -1) { |
| errno = EINVAL; |
| return -1; |
| } |
| |
| setsize = CPU_ALLOC_SIZE(last+1); |
| plinux_set = CPU_ALLOC(last+1); |
| |
| CPU_ZERO_S(setsize, plinux_set); |
| hwloc_bitmap_foreach_begin(cpu, hwloc_set) |
| CPU_SET_S(cpu, setsize, plinux_set); |
| hwloc_bitmap_foreach_end(); |
| |
| err = pthread_setaffinity_np(tid, setsize, plinux_set); |
| |
| CPU_FREE(plinux_set); |
| } |
| #elif defined(HWLOC_HAVE_CPU_SET) |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| cpu_set_t linux_set; |
| unsigned cpu; |
| |
| CPU_ZERO(&linux_set); |
| hwloc_bitmap_foreach_begin(cpu, hwloc_set) |
| CPU_SET(cpu, &linux_set); |
| hwloc_bitmap_foreach_end(); |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = pthread_setaffinity_np(tid, &linux_set); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = pthread_setaffinity_np(tid, sizeof(linux_set), &linux_set); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| } |
| #else /* CPU_SET */ |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set); |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = pthread_setaffinity_np(tid, (void*) &mask); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = pthread_setaffinity_np(tid, sizeof(mask), (void*) &mask); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| } |
| #endif /* CPU_SET */ |
| |
| if (err) { |
| errno = err; |
| return -1; |
| } |
| return 0; |
| } |
| #endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */ |
| |
| #if HAVE_DECL_PTHREAD_GETAFFINITY_NP |
| #pragma weak pthread_getaffinity_np |
| #pragma weak pthread_self |
| |
| static int |
| hwloc_linux_get_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) |
| { |
| int err; |
| |
| if (topology->pid) { |
| errno = ENOSYS; |
| return -1; |
| } |
| |
| if (!pthread_self) { |
| /* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */ |
| errno = ENOSYS; |
| return -1; |
| } |
| if (tid == pthread_self()) |
| return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set); |
| |
| if (!pthread_getaffinity_np) { |
| errno = ENOSYS; |
| return -1; |
| } |
| /* TODO Kerrighed */ |
| |
| #if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY) |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| cpu_set_t *plinux_set; |
| unsigned cpu; |
| int last; |
| size_t setsize; |
| |
| last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset); |
| assert (last != -1); |
| |
| setsize = CPU_ALLOC_SIZE(last+1); |
| plinux_set = CPU_ALLOC(last+1); |
| |
| err = pthread_getaffinity_np(tid, setsize, plinux_set); |
| if (err) { |
| CPU_FREE(plinux_set); |
| errno = err; |
| return -1; |
| } |
| |
| hwloc_bitmap_zero(hwloc_set); |
| for(cpu=0; cpu<(unsigned) last; cpu++) |
| if (CPU_ISSET_S(cpu, setsize, plinux_set)) |
| hwloc_bitmap_set(hwloc_set, cpu); |
| |
| CPU_FREE(plinux_set); |
| } |
| #elif defined(HWLOC_HAVE_CPU_SET) |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| cpu_set_t linux_set; |
| unsigned cpu; |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = pthread_getaffinity_np(tid, &linux_set); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = pthread_getaffinity_np(tid, sizeof(linux_set), &linux_set); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| if (err) { |
| errno = err; |
| return -1; |
| } |
| |
| hwloc_bitmap_zero(hwloc_set); |
| for(cpu=0; cpu<CPU_SETSIZE; cpu++) |
| if (CPU_ISSET(cpu, &linux_set)) |
| hwloc_bitmap_set(hwloc_set, cpu); |
| } |
| #else /* CPU_SET */ |
| /* Use a separate block so that we can define specific variable |
| types here */ |
| { |
| unsigned long mask; |
| |
| #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY |
| err = pthread_getaffinity_np(tid, (void*) &mask); |
| #else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| err = pthread_getaffinity_np(tid, sizeof(mask), (void*) &mask); |
| #endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */ |
| if (err) { |
| errno = err; |
| return -1; |
| } |
| |
| hwloc_bitmap_from_ulong(hwloc_set, mask); |
| } |
| #endif /* CPU_SET */ |
| |
| return 0; |
| } |
| #endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */ |
| |
| static int |
| hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid, hwloc_bitmap_t set) |
| { |
| /* read /proc/pid/stat. |
| * its second field contains the command name between parentheses, |
| * and the command itself may contain parentheses, |
| * so read the whole line and find the last closing parenthesis to find the third field. |
| */ |
| char buf[1024] = ""; |
| char name[64]; |
| char *tmp; |
| FILE *file; |
| int i; |
| |
| if (!tid) { |
| #ifdef SYS_gettid |
| tid = syscall(SYS_gettid); |
| #else |
| errno = ENOSYS; |
| return -1; |
| #endif |
| } |
| |
| snprintf(name, sizeof(name), "/proc/%lu/stat", (unsigned long) tid); |
| file = fopen(name, "r"); |
| if (!file) { |
| errno = ENOSYS; |
| return -1; |
| } |
| tmp = fgets(buf, sizeof(buf), file); |
| fclose(file); |
| if (!tmp) { |
| errno = ENOSYS; |
| return -1; |
| } |
| |
| tmp = strrchr(buf, ')'); |
| if (!tmp) { |
| errno = ENOSYS; |
| return -1; |
| } |
| /* skip ') ' to find the actual third argument */ |
| tmp += 2; |
| |
| /* skip 35 fields */ |
| for(i=0; i<36; i++) { |
| tmp = strchr(tmp, ' '); |
| if (!tmp) { |
| errno = ENOSYS; |
| return -1; |
| } |
| /* skip the ' ' itself */ |
| tmp++; |
| } |
| |
| /* read the last cpu in the 38th field now */ |
| if (sscanf(tmp, "%d ", &i) != 1) { |
| errno = ENOSYS; |
| return -1; |
| } |
| |
| hwloc_bitmap_only(set, i); |
| return 0; |
| } |
| |
| static int |
| hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx, int flags __hwloc_attribute_unused) |
| { |
| hwloc_bitmap_t *cpusets = data; |
| hwloc_bitmap_t cpuset = cpusets[0]; |
| hwloc_bitmap_t tidset = cpusets[1]; |
| |
| if (hwloc_linux_get_tid_last_cpu_location(topology, tid, tidset)) |
| return -1; |
| |
| /* reset the cpuset on first iteration */ |
| if (!idx) |
| hwloc_bitmap_zero(cpuset); |
| |
| hwloc_bitmap_or(cpuset, cpuset, tidset); |
| return 0; |
| } |
| |
| static int |
| hwloc_linux_get_pid_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| hwloc_bitmap_t tidset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_t cpusets[2]; |
| int ret; |
| |
| cpusets[0] = hwloc_set; |
| cpusets[1] = tidset; |
| ret = hwloc_linux_foreach_proc_tid(topology, pid, |
| hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb, |
| (void*) cpusets, flags); |
| hwloc_bitmap_free(tidset); |
| return ret; |
| } |
| |
| static int |
| hwloc_linux_get_proc_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| if (pid == 0) |
| pid = topology->pid; |
| if (flags & HWLOC_CPUBIND_THREAD) |
| return hwloc_linux_get_tid_last_cpu_location(topology, pid, hwloc_set); |
| else |
| return hwloc_linux_get_pid_last_cpu_location(topology, pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_get_thisproc_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags) |
| { |
| return hwloc_linux_get_pid_last_cpu_location(topology, topology->pid, hwloc_set, flags); |
| } |
| |
| static int |
| hwloc_linux_get_thisthread_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) |
| { |
| if (topology->pid) { |
| errno = ENOSYS; |
| return -1; |
| } |
| return hwloc_linux_get_tid_last_cpu_location(topology, 0, hwloc_set); |
| } |
| |
| |
| #if defined HWLOC_HAVE_SET_MEMPOLICY || defined HWLOC_HAVE_MBIND |
| static int |
| hwloc_linux_membind_policy_from_hwloc(int *linuxpolicy, hwloc_membind_policy_t policy, int flags) |
| { |
| switch (policy) { |
| case HWLOC_MEMBIND_DEFAULT: |
| case HWLOC_MEMBIND_FIRSTTOUCH: |
| *linuxpolicy = MPOL_DEFAULT; |
| break; |
| case HWLOC_MEMBIND_BIND: |
| if (flags & HWLOC_MEMBIND_STRICT) |
| *linuxpolicy = MPOL_BIND; |
| else |
| *linuxpolicy = MPOL_PREFERRED; |
| break; |
| case HWLOC_MEMBIND_INTERLEAVE: |
| *linuxpolicy = MPOL_INTERLEAVE; |
| break; |
| /* TODO: next-touch when (if?) patch applied upstream */ |
| default: |
| errno = ENOSYS; |
| return -1; |
| } |
| return 0; |
| } |
| |
| static int |
| hwloc_linux_membind_mask_from_nodeset(hwloc_topology_t topology __hwloc_attribute_unused, |
| hwloc_const_nodeset_t nodeset, |
| unsigned *max_os_index_p, unsigned long **linuxmaskp) |
| { |
| unsigned max_os_index = 0; /* highest os_index + 1 */ |
| unsigned long *linuxmask; |
| unsigned i; |
| hwloc_nodeset_t linux_nodeset = NULL; |
| |
| if (hwloc_bitmap_isfull(nodeset)) { |
| linux_nodeset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_only(linux_nodeset, 0); |
| nodeset = linux_nodeset; |
| } |
| |
| max_os_index = hwloc_bitmap_last(nodeset); |
| if (max_os_index == (unsigned) -1) |
| max_os_index = 0; |
| /* add 1 to convert the last os_index into a max_os_index, |
| * and round up to the nearest multiple of BITS_PER_LONG */ |
| max_os_index = (max_os_index + 1 + HWLOC_BITS_PER_LONG - 1) & ~(HWLOC_BITS_PER_LONG - 1); |
| |
| linuxmask = calloc(max_os_index/HWLOC_BITS_PER_LONG, sizeof(long)); |
| if (!linuxmask) { |
| errno = ENOMEM; |
| return -1; |
| } |
| |
| for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++) |
| linuxmask[i] = hwloc_bitmap_to_ith_ulong(nodeset, i); |
| |
| if (linux_nodeset) |
| hwloc_bitmap_free(linux_nodeset); |
| |
| *max_os_index_p = max_os_index; |
| *linuxmaskp = linuxmask; |
| return 0; |
| } |
| |
| static void |
| hwloc_linux_membind_mask_to_nodeset(hwloc_topology_t topology __hwloc_attribute_unused, |
| hwloc_nodeset_t nodeset, |
| unsigned max_os_index, const unsigned long *linuxmask) |
| { |
| unsigned i; |
| |
| #ifdef HWLOC_DEBUG |
| /* max_os_index comes from hwloc_linux_find_kernel_max_numnodes() so it's a multiple of HWLOC_BITS_PER_LONG */ |
| assert(!(max_os_index%HWLOC_BITS_PER_LONG)); |
| #endif |
| |
| hwloc_bitmap_zero(nodeset); |
| for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++) |
| hwloc_bitmap_set_ith_ulong(nodeset, i, linuxmask[i]); |
| } |
| #endif /* HWLOC_HAVE_SET_MEMPOLICY || HWLOC_HAVE_MBIND */ |
| |
| #ifdef HWLOC_HAVE_MBIND |
| static int |
| hwloc_linux_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) |
| { |
| unsigned max_os_index; /* highest os_index + 1 */ |
| unsigned long *linuxmask; |
| size_t remainder; |
| int linuxpolicy; |
| unsigned linuxflags = 0; |
| int err; |
| |
| remainder = (uintptr_t) addr & (sysconf(_SC_PAGESIZE)-1); |
| addr = (char*) addr - remainder; |
| len += remainder; |
| |
| err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags); |
| if (err < 0) |
| return err; |
| |
| if (linuxpolicy == MPOL_DEFAULT) |
| /* Some Linux kernels don't like being passed a set */ |
| return mbind((void *) addr, len, linuxpolicy, NULL, 0, 0); |
| |
| err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask); |
| if (err < 0) |
| goto out; |
| |
| if (flags & HWLOC_MEMBIND_MIGRATE) { |
| #ifdef MPOL_MF_MOVE |
| linuxflags = MPOL_MF_MOVE; |
| if (flags & HWLOC_MEMBIND_STRICT) |
| linuxflags |= MPOL_MF_STRICT; |
| #else |
| if (flags & HWLOC_MEMBIND_STRICT) { |
| errno = ENOSYS; |
| goto out_with_mask; |
| } |
| #endif |
| } |
| |
| err = mbind((void *) addr, len, linuxpolicy, linuxmask, max_os_index+1, linuxflags); |
| if (err < 0) |
| goto out_with_mask; |
| |
| free(linuxmask); |
| return 0; |
| |
| out_with_mask: |
| free(linuxmask); |
| out: |
| return -1; |
| } |
| |
| static void * |
| hwloc_linux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) |
| { |
| void *buffer; |
| int err; |
| |
| buffer = hwloc_alloc_mmap(topology, len); |
| if (buffer == MAP_FAILED) |
| return NULL; |
| |
| err = hwloc_linux_set_area_membind(topology, buffer, len, nodeset, policy, flags); |
| if (err < 0 && policy & HWLOC_MEMBIND_STRICT) { |
| munmap(buffer, len); |
| return NULL; |
| } |
| |
| return buffer; |
| } |
| #endif /* HWLOC_HAVE_MBIND */ |
| |
| #ifdef HWLOC_HAVE_SET_MEMPOLICY |
| static int |
| hwloc_linux_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) |
| { |
| unsigned max_os_index; /* highest os_index + 1 */ |
| unsigned long *linuxmask; |
| int linuxpolicy; |
| int err; |
| |
| err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags); |
| if (err < 0) |
| return err; |
| |
| if (linuxpolicy == MPOL_DEFAULT) |
| /* Some Linux kernels don't like being passed a set */ |
| return set_mempolicy(linuxpolicy, NULL, 0); |
| |
| err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask); |
| if (err < 0) |
| goto out; |
| |
| if (flags & HWLOC_MEMBIND_MIGRATE) { |
| #ifdef HWLOC_HAVE_MIGRATE_PAGES |
| unsigned long *fullmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long)); |
| if (fullmask) { |
| memset(fullmask, 0xf, max_os_index/HWLOC_BITS_PER_LONG * sizeof(long)); |
| err = migrate_pages(0, max_os_index+1, fullmask, linuxmask); |
| free(fullmask); |
| } else |
| err = -1; |
| if (err < 0 && (flags & HWLOC_MEMBIND_STRICT)) |
| goto out_with_mask; |
| #else |
| errno = ENOSYS; |
| goto out_with_mask; |
| #endif |
| } |
| |
| err = set_mempolicy(linuxpolicy, linuxmask, max_os_index+1); |
| if (err < 0) |
| goto out_with_mask; |
| |
| free(linuxmask); |
| return 0; |
| |
| out_with_mask: |
| free(linuxmask); |
| out: |
| return -1; |
| } |
| |
| /* |
| * On some kernels, get_mempolicy requires the output size to be larger |
| * than the kernel MAX_NUMNODES (defined by CONFIG_NODES_SHIFT). |
| * Try get_mempolicy on ourself until we find a max_os_index value that |
| * makes the kernel happy. |
| */ |
| static int |
| hwloc_linux_find_kernel_max_numnodes(hwloc_topology_t topology __hwloc_attribute_unused) |
| { |
| static int max_numnodes = -1; |
| int linuxpolicy; |
| |
| if (max_numnodes != -1) |
| /* already computed */ |
| return max_numnodes; |
| |
| /* start with a single ulong, it's the minimal and it's enough for most machines */ |
| max_numnodes = HWLOC_BITS_PER_LONG; |
| while (1) { |
| unsigned long *mask = malloc(max_numnodes / HWLOC_BITS_PER_LONG * sizeof(long)); |
| int err = get_mempolicy(&linuxpolicy, mask, max_numnodes, 0, 0); |
| free(mask); |
| if (!err || errno != EINVAL) |
| /* found it */ |
| return max_numnodes; |
| max_numnodes *= 2; |
| } |
| } |
| |
| static int |
| hwloc_linux_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused) |
| { |
| unsigned max_os_index; |
| unsigned long *linuxmask; |
| int linuxpolicy; |
| int err; |
| |
| max_os_index = hwloc_linux_find_kernel_max_numnodes(topology); |
| |
| linuxmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long)); |
| if (!linuxmask) { |
| errno = ENOMEM; |
| goto out; |
| } |
| |
| err = get_mempolicy(&linuxpolicy, linuxmask, max_os_index, 0, 0); |
| if (err < 0) |
| goto out_with_mask; |
| |
| if (linuxpolicy == MPOL_DEFAULT) { |
| hwloc_bitmap_copy(nodeset, hwloc_topology_get_topology_nodeset(topology)); |
| } else { |
| hwloc_linux_membind_mask_to_nodeset(topology, nodeset, max_os_index, linuxmask); |
| } |
| |
| switch (linuxpolicy) { |
| case MPOL_DEFAULT: |
| *policy = HWLOC_MEMBIND_FIRSTTOUCH; |
| break; |
| case MPOL_PREFERRED: |
| case MPOL_BIND: |
| *policy = HWLOC_MEMBIND_BIND; |
| break; |
| case MPOL_INTERLEAVE: |
| *policy = HWLOC_MEMBIND_INTERLEAVE; |
| break; |
| default: |
| errno = EINVAL; |
| goto out_with_mask; |
| } |
| |
| free(linuxmask); |
| return 0; |
| |
| out_with_mask: |
| free(linuxmask); |
| out: |
| return -1; |
| } |
| |
| #endif /* HWLOC_HAVE_SET_MEMPOLICY */ |
| |
| int |
| hwloc_backend_sysfs_init(struct hwloc_topology *topology, const char *fsroot_path __hwloc_attribute_unused) |
| { |
| #ifdef HAVE_OPENAT |
| int root; |
| |
| assert(topology->backend_type == HWLOC_BACKEND_NONE); |
| |
| if (!fsroot_path) |
| fsroot_path = "/"; |
| |
| root = open(fsroot_path, O_RDONLY | O_DIRECTORY); |
| if (root < 0) |
| return -1; |
| |
| if (strcmp(fsroot_path, "/")) |
| topology->is_thissystem = 0; |
| |
| topology->backend_params.sysfs.root_path = strdup(fsroot_path); |
| topology->backend_params.sysfs.root_fd = root; |
| #else |
| topology->backend_params.sysfs.root_path = NULL; |
| topology->backend_params.sysfs.root_fd = -1; |
| #endif |
| topology->backend_type = HWLOC_BACKEND_SYSFS; |
| return 0; |
| } |
| |
| void |
| hwloc_backend_sysfs_exit(struct hwloc_topology *topology) |
| { |
| assert(topology->backend_type == HWLOC_BACKEND_SYSFS); |
| #ifdef HAVE_OPENAT |
| close(topology->backend_params.sysfs.root_fd); |
| free(topology->backend_params.sysfs.root_path); |
| topology->backend_params.sysfs.root_path = NULL; |
| #endif |
| topology->backend_type = HWLOC_BACKEND_NONE; |
| } |
| |
| static int |
| hwloc_parse_sysfs_unsigned(const char *mappath, unsigned *value, int fsroot_fd) |
| { |
| char string[11]; |
| FILE * fd; |
| |
| fd = hwloc_fopen(mappath, "r", fsroot_fd); |
| if (!fd) { |
| *value = -1; |
| return -1; |
| } |
| |
| if (!fgets(string, 11, fd)) { |
| *value = -1; |
| fclose(fd); |
| return -1; |
| } |
| *value = strtoul(string, NULL, 10); |
| |
| fclose(fd); |
| |
| return 0; |
| } |
| |
| |
| /* kernel cpumaps are composed of an array of 32bits cpumasks */ |
| #define KERNEL_CPU_MASK_BITS 32 |
| #define KERNEL_CPU_MAP_LEN (KERNEL_CPU_MASK_BITS/4+2) |
| |
| int |
| hwloc_linux_parse_cpumap_file(FILE *file, hwloc_bitmap_t set) |
| { |
| unsigned long *maps; |
| unsigned long map; |
| int nr_maps = 0; |
| static int nr_maps_allocated = 8; /* only compute the power-of-two above the kernel cpumask size once */ |
| int i; |
| |
| maps = malloc(nr_maps_allocated * sizeof(*maps)); |
| |
| /* reset to zero first */ |
| hwloc_bitmap_zero(set); |
| |
| /* parse the whole mask */ |
| while (fscanf(file, "%lx,", &map) == 1) /* read one kernel cpu mask and the ending comma */ |
| { |
| if (nr_maps == nr_maps_allocated) { |
| nr_maps_allocated *= 2; |
| maps = realloc(maps, nr_maps_allocated * sizeof(*maps)); |
| } |
| |
| if (!map && !nr_maps) |
| /* ignore the first map if it's empty */ |
| continue; |
| |
| memmove(&maps[1], &maps[0], nr_maps*sizeof(*maps)); |
| maps[0] = map; |
| nr_maps++; |
| } |
| |
| /* convert into a set */ |
| #if KERNEL_CPU_MASK_BITS == HWLOC_BITS_PER_LONG |
| for(i=0; i<nr_maps; i++) |
| hwloc_bitmap_set_ith_ulong(set, i, maps[i]); |
| #else |
| for(i=0; i<(nr_maps+1)/2; i++) { |
| unsigned long mask; |
| mask = maps[2*i]; |
| if (2*i+1<nr_maps) |
| mask |= maps[2*i+1] << KERNEL_CPU_MASK_BITS; |
| hwloc_bitmap_set_ith_ulong(set, i, mask); |
| } |
| #endif |
| |
| free(maps); |
| |
| return 0; |
| } |
| |
| static hwloc_bitmap_t |
| hwloc_parse_cpumap(const char *mappath, int fsroot_fd) |
| { |
| hwloc_bitmap_t set; |
| FILE * file; |
| |
| file = hwloc_fopen(mappath, "r", fsroot_fd); |
| if (!file) |
| return NULL; |
| |
| set = hwloc_bitmap_alloc(); |
| hwloc_linux_parse_cpumap_file(file, set); |
| |
| fclose(file); |
| return set; |
| } |
| |
| static char * |
| hwloc_strdup_mntpath(const char *escapedpath, size_t length) |
| { |
| char *path = malloc(length+1); |
| const char *src = escapedpath, *tmp = src; |
| char *dst = path; |
| |
| while ((tmp = strchr(src, '\\')) != NULL) { |
| strncpy(dst, src, tmp-src); |
| dst += tmp-src; |
| if (!strncmp(tmp+1, "040", 3)) |
| *dst = ' '; |
| else if (!strncmp(tmp+1, "011", 3)) |
| *dst = ' '; |
| else if (!strncmp(tmp+1, "012", 3)) |
| *dst = '\n'; |
| else |
| *dst = '\\'; |
| dst++; |
| src = tmp+4; |
| } |
| |
| strcpy(dst, src); |
| |
| return path; |
| } |
| |
| static void |
| hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, int fsroot_fd) |
| { |
| #define PROC_MOUNT_LINE_LEN 512 |
| char line[PROC_MOUNT_LINE_LEN]; |
| FILE *fd; |
| |
| *cgroup_mntpnt = NULL; |
| *cpuset_mntpnt = NULL; |
| |
| /* ideally we should use setmntent, getmntent, hasmntopt and endmntent, |
| * but they do not support fsroot_fd. |
| */ |
| |
| fd = hwloc_fopen("/proc/mounts", "r", fsroot_fd); |
| if (!fd) |
| return; |
| |
| while (fgets(line, sizeof(line), fd)) { |
| char *path; |
| char *type; |
| char *tmp; |
| |
| /* remove the ending " 0 0\n" that the kernel always adds */ |
| tmp = line + strlen(line) - 5; |
| if (tmp < line || strcmp(tmp, " 0 0\n")) |
| fprintf(stderr, "Unexpected end of /proc/mounts line `%s'\n", line); |
| else |
| *tmp = '\0'; |
| |
| /* path is after first field and a space */ |
| tmp = strchr(line, ' '); |
| if (!tmp) |
| continue; |
| path = tmp+1; |
| |
| /* type is after path, which may not contain spaces since the kernel escaped them to \040 |
| * (see the manpage of getmntent) */ |
| tmp = strchr(path, ' '); |
| if (!tmp) |
| continue; |
| type = tmp+1; |
| /* mark the end of path to ease upcoming strdup */ |
| *tmp = '\0'; |
| |
| if (!strncmp(type, "cpuset ", 7)) { |
| /* found a cpuset mntpnt */ |
| hwloc_debug("Found cpuset mount point on %s\n", path); |
| *cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path); |
| break; |
| |
| } else if (!strncmp(type, "cgroup ", 7)) { |
| /* found a cgroup mntpnt */ |
| char *opt, *opts; |
| int cpuset_opt = 0; |
| int noprefix_opt = 0; |
| |
| /* find options */ |
| tmp = strchr(type, ' '); |
| if (!tmp) |
| continue; |
| opts = tmp+1; |
| |
| /* look at options */ |
| while ((opt = strsep(&opts, ",")) != NULL) { |
| if (!strcmp(opt, "cpuset")) |
| cpuset_opt = 1; |
| else if (!strcmp(opt, "noprefix")) |
| noprefix_opt = 1; |
| } |
| if (!cpuset_opt) |
| continue; |
| |
| if (noprefix_opt) { |
| hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", path); |
| *cpuset_mntpnt = hwloc_strdup_mntpath(path, type-path); |
| } else { |
| hwloc_debug("Found cgroup/cpuset mount point on %s\n", path); |
| *cgroup_mntpnt = hwloc_strdup_mntpath(path, type-path); |
| } |
| break; |
| } |
| } |
| |
| fclose(fd); |
| } |
| |
| /* |
| * Linux cpusets may be managed directly or through cgroup. |
| * If cgroup is used, tasks get a /proc/pid/cgroup which may contain a |
| * single line %d:cpuset:<name>. If cpuset are used they get /proc/pid/cpuset |
| * containing <name>. |
| */ |
| static char * |
| hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid) |
| { |
| #define CPUSET_NAME_LEN 128 |
| char cpuset_name[CPUSET_NAME_LEN]; |
| FILE *fd; |
| char *tmp; |
| |
| /* check whether a cgroup-cpuset is enabled */ |
| if (!pid) |
| fd = hwloc_fopen("/proc/self/cgroup", "r", fsroot_fd); |
| else { |
| char path[] = "/proc/XXXXXXXXXX/cgroup"; |
| snprintf(path, sizeof(path), "/proc/%d/cgroup", pid); |
| fd = hwloc_fopen(path, "r", fsroot_fd); |
| } |
| if (fd) { |
| /* find a cpuset line */ |
| #define CGROUP_LINE_LEN 256 |
| char line[CGROUP_LINE_LEN]; |
| while (fgets(line, sizeof(line), fd)) { |
| char *end, *colon = strchr(line, ':'); |
| if (!colon) |
| continue; |
| if (strncmp(colon, ":cpuset:", 8)) |
| continue; |
| |
| /* found a cgroup-cpuset line, return the name */ |
| fclose(fd); |
| end = strchr(colon, '\n'); |
| if (end) |
| *end = '\0'; |
| hwloc_debug("Found cgroup-cpuset %s\n", colon+8); |
| return strdup(colon+8); |
| } |
| fclose(fd); |
| } |
| |
| /* check whether a cpuset is enabled */ |
| if (!pid) |
| fd = hwloc_fopen("/proc/self/cpuset", "r", fsroot_fd); |
| else { |
| char path[] = "/proc/XXXXXXXXXX/cpuset"; |
| snprintf(path, sizeof(path), "/proc/%d/cpuset", pid); |
| fd = hwloc_fopen(path, "r", fsroot_fd); |
| } |
| if (!fd) { |
| /* found nothing */ |
| hwloc_debug("%s", "No cgroup or cpuset found\n"); |
| return NULL; |
| } |
| |
| /* found a cpuset, return the name */ |
| tmp = fgets(cpuset_name, sizeof(cpuset_name), fd); |
| fclose(fd); |
| if (!tmp) |
| return NULL; |
| tmp = strchr(cpuset_name, '\n'); |
| if (tmp) |
| *tmp = '\0'; |
| hwloc_debug("Found cpuset %s\n", cpuset_name); |
| return strdup(cpuset_name); |
| } |
| |
| /* |
| * Then, the cpuset description is available from either the cgroup or |
| * the cpuset filesystem (usually mounted in / or /dev) where there |
| * are cgroup<name>/cpuset.{cpus,mems} or cpuset<name>/{cpus,mems} files. |
| */ |
| static char * |
| hwloc_read_linux_cpuset_mask(const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name, const char *attr_name, int fsroot_fd) |
| { |
| #define CPUSET_FILENAME_LEN 256 |
| char cpuset_filename[CPUSET_FILENAME_LEN]; |
| FILE *fd; |
| char *info = NULL, *tmp; |
| ssize_t ssize; |
| size_t size; |
| |
| if (cgroup_mntpnt) { |
| /* try to read the cpuset from cgroup */ |
| snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/cpuset.%s", cgroup_mntpnt, cpuset_name, attr_name); |
| hwloc_debug("Trying to read cgroup file <%s>\n", cpuset_filename); |
| fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd); |
| if (fd) |
| goto gotfile; |
| } else if (cpuset_mntpnt) { |
| /* try to read the cpuset directly */ |
| snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/%s", cpuset_mntpnt, cpuset_name, attr_name); |
| hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename); |
| fd = hwloc_fopen(cpuset_filename, "r", fsroot_fd); |
| if (fd) |
| goto gotfile; |
| } |
| |
| /* found no cpuset description, ignore it */ |
| hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name); |
| goto out; |
| |
| gotfile: |
| ssize = getline(&info, &size, fd); |
| fclose(fd); |
| if (ssize < 0) |
| goto out; |
| if (!info) |
| goto out; |
| |
| tmp = strchr(info, '\n'); |
| if (tmp) |
| *tmp = '\0'; |
| |
| out: |
| return info; |
| } |
| |
| static void |
| hwloc_admin_disable_set_from_cpuset(struct hwloc_topology *topology, |
| const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name, |
| const char *attr_name, |
| hwloc_bitmap_t admin_enabled_cpus_set) |
| { |
| char *cpuset_mask; |
| char *current, *comma, *tmp; |
| int prevlast, nextfirst, nextlast; /* beginning/end of enabled-segments */ |
| hwloc_bitmap_t tmpset; |
| |
| cpuset_mask = hwloc_read_linux_cpuset_mask(cgroup_mntpnt, cpuset_mntpnt, cpuset_name, |
| attr_name, topology->backend_params.sysfs.root_fd); |
| if (!cpuset_mask) |
| return; |
| |
| hwloc_debug("found cpuset %s: %s\n", attr_name, cpuset_mask); |
| |
| current = cpuset_mask; |
| prevlast = -1; |
| |
| while (1) { |
| /* save a pointer to the next comma and erase it to simplify things */ |
| comma = strchr(current, ','); |
| if (comma) |
| *comma = '\0'; |
| |
| /* find current enabled-segment bounds */ |
| nextfirst = strtoul(current, &tmp, 0); |
| if (*tmp == '-') |
| nextlast = strtoul(tmp+1, NULL, 0); |
| else |
| nextlast = nextfirst; |
| if (prevlast+1 <= nextfirst-1) { |
| hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1); |
| hwloc_bitmap_clr_range(admin_enabled_cpus_set, prevlast+1, nextfirst-1); |
| } |
| |
| /* switch to next enabled-segment */ |
| prevlast = nextlast; |
| if (!comma) |
| break; |
| current = comma+1; |
| } |
| |
| hwloc_debug("%s [%d:%d] excluded by cpuset\n", attr_name, prevlast+1, nextfirst-1); |
| /* no easy way to clear until the infinity */ |
| tmpset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_set_range(tmpset, 0, prevlast); |
| hwloc_bitmap_and(admin_enabled_cpus_set, admin_enabled_cpus_set, tmpset); |
| hwloc_bitmap_free(tmpset); |
| |
| free(cpuset_mask); |
| } |
| |
| static void |
| hwloc_parse_meminfo_info(struct hwloc_topology *topology, |
| const char *path, |
| int prefixlength, |
| uint64_t *local_memory, |
| uint64_t *meminfo_hugepages_count, |
| uint64_t *meminfo_hugepages_size, |
| int onlytotal) |
| { |
| char string[64]; |
| FILE *fd; |
| |
| fd = hwloc_fopen(path, "r", topology->backend_params.sysfs.root_fd); |
| if (!fd) |
| return; |
| |
| while (fgets(string, sizeof(string), fd) && *string != '\0') |
| { |
| unsigned long long number; |
| if (strlen(string) < (size_t) prefixlength) |
| continue; |
| if (sscanf(string+prefixlength, "MemTotal: %llu kB", (unsigned long long *) &number) == 1) { |
| *local_memory = number << 10; |
| if (onlytotal) |
| break; |
| } |
| else if (!onlytotal) { |
| if (sscanf(string+prefixlength, "Hugepagesize: %llu", (unsigned long long *) &number) == 1) |
| *meminfo_hugepages_size = number << 10; |
| else if (sscanf(string+prefixlength, "HugePages_Free: %llu", (unsigned long long *) &number) == 1) |
| /* these are free hugepages, not the total amount of huge pages */ |
| *meminfo_hugepages_count = number; |
| } |
| } |
| |
| fclose(fd); |
| } |
| |
| #define SYSFS_NUMA_NODE_PATH_LEN 128 |
| |
| static void |
| hwloc_parse_hugepages_info(struct hwloc_topology *topology, |
| const char *dirpath, |
| struct hwloc_obj_memory_s *memory, |
| uint64_t *remaining_local_memory) |
| { |
| DIR *dir; |
| struct dirent *dirent; |
| unsigned long index_ = 1; |
| FILE *hpfd; |
| char line[64]; |
| char path[SYSFS_NUMA_NODE_PATH_LEN]; |
| |
| dir = hwloc_opendir(dirpath, topology->backend_params.sysfs.root_fd); |
| if (dir) { |
| while ((dirent = readdir(dir)) != NULL) { |
| if (strncmp(dirent->d_name, "hugepages-", 10)) |
| continue; |
| memory->page_types[index_].size = strtoul(dirent->d_name+10, NULL, 0) * 1024ULL; |
| sprintf(path, "%s/%s/nr_hugepages", dirpath, dirent->d_name); |
| hpfd = hwloc_fopen(path, "r", topology->backend_params.sysfs.root_fd); |
| if (hpfd) { |
| if (fgets(line, sizeof(line), hpfd)) { |
| fclose(hpfd); |
| /* these are the actual total amount of huge pages */ |
| memory->page_types[index_].count = strtoull(line, NULL, 0); |
| *remaining_local_memory -= memory->page_types[index_].count * memory->page_types[index_].size; |
| index_++; |
| } |
| } |
| } |
| closedir(dir); |
| memory->page_types_len = index_; |
| } |
| } |
| |
| static void |
| hwloc_get_kerrighed_node_meminfo_info(struct hwloc_topology *topology, unsigned long node, struct hwloc_obj_memory_s *memory) |
| { |
| char path[128]; |
| uint64_t meminfo_hugepages_count, meminfo_hugepages_size = 0; |
| |
| if (topology->is_thissystem) { |
| memory->page_types_len = 2; |
| memory->page_types = malloc(2*sizeof(*memory->page_types)); |
| memset(memory->page_types, 0, 2*sizeof(*memory->page_types)); |
| /* Try to get the hugepage size from sysconf in case we fail to get it from /proc/meminfo later */ |
| #ifdef HAVE__SC_LARGE_PAGESIZE |
| memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); |
| #endif |
| memory->page_types[0].size = getpagesize(); |
| } |
| |
| snprintf(path, sizeof(path), "/proc/nodes/node%lu/meminfo", node); |
| hwloc_parse_meminfo_info(topology, path, 0 /* no prefix */, |
| &memory->local_memory, |
| &meminfo_hugepages_count, &meminfo_hugepages_size, |
| memory->page_types == NULL); |
| |
| if (memory->page_types) { |
| uint64_t remaining_local_memory = memory->local_memory; |
| if (meminfo_hugepages_size) { |
| memory->page_types[1].size = meminfo_hugepages_size; |
| memory->page_types[1].count = meminfo_hugepages_count; |
| remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size; |
| } else { |
| memory->page_types_len = 1; |
| } |
| memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size; |
| } |
| } |
| |
| static void |
| hwloc_get_procfs_meminfo_info(struct hwloc_topology *topology, struct hwloc_obj_memory_s *memory) |
| { |
| uint64_t meminfo_hugepages_count, meminfo_hugepages_size = 0; |
| struct stat st; |
| int has_sysfs_hugepages = 0; |
| int types = 2; |
| int err; |
| |
| err = hwloc_stat("/sys/kernel/mm/hugepages", &st, topology->backend_params.sysfs.root_fd); |
| if (!err) { |
| types = 1 + st.st_nlink-2; |
| has_sysfs_hugepages = 1; |
| } |
| |
| if (topology->is_thissystem) { |
| memory->page_types_len = types; |
| memory->page_types = malloc(types*sizeof(*memory->page_types)); |
| memset(memory->page_types, 0, types*sizeof(*memory->page_types)); |
| /* Try to get the hugepage size from sysconf in case we fail to get it from /proc/meminfo later */ |
| #ifdef HAVE__SC_LARGE_PAGESIZE |
| memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); |
| #endif |
| memory->page_types[0].size = getpagesize(); |
| } |
| |
| hwloc_parse_meminfo_info(topology, "/proc/meminfo", 0 /* no prefix */, |
| &memory->local_memory, |
| &meminfo_hugepages_count, &meminfo_hugepages_size, |
| memory->page_types == NULL); |
| |
| if (memory->page_types) { |
| uint64_t remaining_local_memory = memory->local_memory; |
| if (has_sysfs_hugepages) { |
| /* read from node%d/hugepages/hugepages-%skB/nr_hugepages */ |
| hwloc_parse_hugepages_info(topology, "/sys/kernel/mm/hugepages", memory, &remaining_local_memory); |
| } else { |
| /* use what we found in meminfo */ |
| if (meminfo_hugepages_size) { |
| memory->page_types[1].size = meminfo_hugepages_size; |
| memory->page_types[1].count = meminfo_hugepages_count; |
| remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size; |
| } else { |
| memory->page_types_len = 1; |
| } |
| } |
| memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size; |
| } |
| } |
| |
| static void |
| hwloc_sysfs_node_meminfo_info(struct hwloc_topology *topology, |
| const char *syspath, int node, |
| struct hwloc_obj_memory_s *memory) |
| { |
| char path[SYSFS_NUMA_NODE_PATH_LEN]; |
| char meminfopath[SYSFS_NUMA_NODE_PATH_LEN]; |
| uint64_t meminfo_hugepages_count = 0; |
| uint64_t meminfo_hugepages_size = 0; |
| struct stat st; |
| int has_sysfs_hugepages = 0; |
| int types = 2; |
| int err; |
| |
| sprintf(path, "%s/node%d/hugepages", syspath, node); |
| err = hwloc_stat(path, &st, topology->backend_params.sysfs.root_fd); |
| if (!err) { |
| types = 1 + st.st_nlink-2; |
| has_sysfs_hugepages = 1; |
| } |
| |
| if (topology->is_thissystem) { |
| memory->page_types_len = types; |
| memory->page_types = malloc(types*sizeof(*memory->page_types)); |
| memset(memory->page_types, 0, types*sizeof(*memory->page_types)); |
| } |
| |
| sprintf(meminfopath, "%s/node%d/meminfo", syspath, node); |
| hwloc_parse_meminfo_info(topology, meminfopath, |
| hwloc_snprintf(NULL, 0, "Node %d ", node), |
| &memory->local_memory, |
| &meminfo_hugepages_count, NULL /* no hugepage size in node-specific meminfo */, |
| memory->page_types == NULL); |
| |
| if (memory->page_types) { |
| uint64_t remaining_local_memory = memory->local_memory; |
| if (has_sysfs_hugepages) { |
| /* read from node%d/hugepages/hugepages-%skB/nr_hugepages */ |
| hwloc_parse_hugepages_info(topology, path, memory, &remaining_local_memory); |
| } else { |
| /* get hugepage size from machine-specific meminfo since there is no size in node-specific meminfo, |
| * hwloc_get_procfs_meminfo_info must have been called earlier */ |
| meminfo_hugepages_size = topology->levels[0][0]->memory.page_types[1].size; |
| /* use what we found in meminfo */ |
| if (meminfo_hugepages_size) { |
| memory->page_types[1].count = meminfo_hugepages_count; |
| memory->page_types[1].size = meminfo_hugepages_size; |
| remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size; |
| } else { |
| memory->page_types_len = 1; |
| } |
| } |
| /* update what's remaining as normal pages */ |
| memory->page_types[0].size = getpagesize(); |
| memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size; |
| } |
| } |
| |
| static void |
| hwloc_parse_node_distance(const char *distancepath, unsigned nbnodes, float *distances, int fsroot_fd) |
| { |
| char string[4096]; /* enough for hundreds of nodes */ |
| char *tmp, *next; |
| FILE * fd; |
| |
| fd = hwloc_fopen(distancepath, "r", fsroot_fd); |
| if (!fd) |
| return; |
| |
| if (!fgets(string, sizeof(string), fd)) { |
| fclose(fd); |
| return; |
| } |
| |
| tmp = string; |
| while (tmp) { |
| unsigned distance = strtoul(tmp, &next, 0); |
| if (next == tmp) |
| break; |
| *distances = (float) distance; |
| distances++; |
| nbnodes--; |
| if (!nbnodes) |
| break; |
| tmp = next+1; |
| } |
| |
| fclose(fd); |
| } |
| |
| static void |
| look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *found) |
| { |
| unsigned osnode; |
| unsigned nbnodes = 0; |
| DIR *dir; |
| struct dirent *dirent; |
| hwloc_obj_t node; |
| hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); |
| |
| *found = 0; |
| |
| /* Get the list of nodes first */ |
| dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd); |
| if (dir) |
| { |
| while ((dirent = readdir(dir)) != NULL) |
| { |
| if (strncmp(dirent->d_name, "node", 4)) |
| continue; |
| osnode = strtoul(dirent->d_name+4, NULL, 0); |
| hwloc_bitmap_set(nodeset, osnode); |
| nbnodes++; |
| } |
| closedir(dir); |
| } |
| |
| if (nbnodes <= 1) |
| { |
| hwloc_bitmap_free(nodeset); |
| return; |
| } |
| |
| /* For convenience, put these declarations inside a block. */ |
| |
| { |
| hwloc_obj_t * nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); |
| float * distances = calloc(nbnodes*nbnodes, sizeof(float)); |
| unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); |
| unsigned index_; |
| |
| if (NULL == indexes || NULL == distances || NULL == nodes) { |
| free(nodes); |
| free(indexes); |
| free(distances); |
| goto out; |
| } |
| |
| /* Get node indexes now. We need them in order since Linux groups |
| * sparse distances but keep them in order in the sysfs distance files. |
| */ |
| index_ = 0; |
| hwloc_bitmap_foreach_begin (osnode, nodeset) { |
| indexes[index_] = osnode; |
| index_++; |
| } hwloc_bitmap_foreach_end(); |
| hwloc_bitmap_free(nodeset); |
| |
| #ifdef HWLOC_DEBUG |
| hwloc_debug("%s", "numa distance indexes: "); |
| for (index_ = 0; index_ < nbnodes; index_++) { |
| hwloc_debug(" %u", indexes[index_]); |
| } |
| hwloc_debug("%s", "\n"); |
| #endif |
| |
| /* Get actual distances now */ |
| for (index_ = 0; index_ < nbnodes; index_++) { |
| char nodepath[SYSFS_NUMA_NODE_PATH_LEN]; |
| hwloc_bitmap_t cpuset; |
| osnode = indexes[index_]; |
| |
| sprintf(nodepath, "%s/node%u/cpumap", path, osnode); |
| cpuset = hwloc_parse_cpumap(nodepath, topology->backend_params.sysfs.root_fd); |
| if (!cpuset) |
| continue; |
| |
| node = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, osnode); |
| node->cpuset = cpuset; |
| node->nodeset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_set(node->nodeset, osnode); |
| |
| hwloc_sysfs_node_meminfo_info(topology, path, osnode, &node->memory); |
| |
| hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", |
| osnode, node->cpuset); |
| hwloc_insert_object_by_cpuset(topology, node); |
| nodes[index_] = node; |
| |
| /* Linux nodeX/distance file contains distance from X to other localities (from ACPI SLIT table or so), |
| * store them in slots X*N...X*N+N-1 */ |
| sprintf(nodepath, "%s/node%u/distance", path, osnode); |
| hwloc_parse_node_distance(nodepath, nbnodes, distances+index_*nbnodes, topology->backend_params.sysfs.root_fd); |
| } |
| |
| hwloc_topology__set_distance_matrix(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances); |
| } |
| |
| out: |
| *found = nbnodes; |
| } |
| |
| /* Reads the entire file and returns bytes read if bytes_read != NULL |
| * Returned pointer can be freed by using free(). */ |
| static void * |
| hwloc_read_raw(const char *p, const char *p1, size_t *bytes_read, int root_fd) |
| { |
| char *fname = NULL; |
| char *ret = NULL; |
| struct stat fs; |
| int file = -1; |
| unsigned len; |
| |
| len = strlen(p) + 1 + strlen(p1) + 1; |
| fname = malloc(len); |
| if (NULL == fname) { |
| return NULL; |
| } |
| snprintf(fname, len, "%s/%s", p, p1); |
| |
| file = hwloc_open(fname, root_fd); |
| if (-1 == file) { |
| goto out; |
| } |
| if (fstat(file, &fs)) { |
| goto out; |
| } |
| |
| ret = (char *) malloc(fs.st_size); |
| if (NULL != ret) { |
| ssize_t cb = read(file, ret, fs.st_size); |
| if (cb == -1) { |
| free(ret); |
| ret = NULL; |
| } else { |
| if (NULL != bytes_read) |
| *bytes_read = cb; |
| } |
| } |
| |
| out: |
| close(file); |
| if (NULL != fname) { |
| free(fname); |
| } |
| return ret; |
| } |
| |
| /* Reads the entire file and returns it as a 0-terminated string |
| * Returned pointer can be freed by using free(). */ |
| static char * |
| hwloc_read_str(const char *p, const char *p1, int root_fd) |
| { |
| size_t cb = 0; |
| char *ret = hwloc_read_raw(p, p1, &cb, root_fd); |
| if ((NULL != ret) && (0 < cb) && (0 != ret[cb-1])) { |
| ret = realloc(ret, cb + 1); |
| ret[cb] = 0; |
| } |
| return ret; |
| } |
| |
| /* Reads first 32bit bigendian value */ |
| static ssize_t |
| hwloc_read_unit32be(const char *p, const char *p1, uint32_t *buf, int root_fd) |
| { |
| size_t cb = 0; |
| uint32_t *tmp = hwloc_read_raw(p, p1, &cb, root_fd); |
| if (sizeof(*buf) != cb) { |
| errno = EINVAL; |
| return -1; |
| } |
| *buf = htonl(*tmp); |
| free(tmp); |
| return sizeof(*buf); |
| } |
| |
| typedef struct { |
| unsigned int n, allocated; |
| struct { |
| hwloc_bitmap_t cpuset; |
| uint32_t phandle; |
| uint32_t l2_cache; |
| char *name; |
| } *p; |
| } device_tree_cpus_t; |
| |
| static void |
| add_device_tree_cpus_node(device_tree_cpus_t *cpus, hwloc_bitmap_t cpuset, |
| uint32_t l2_cache, uint32_t phandle, const char *name) |
| { |
| if (cpus->n == cpus->allocated) { |
| if (!cpus->allocated) |
| cpus->allocated = 64; |
| else |
| cpus->allocated *= 2; |
| cpus->p = realloc(cpus->p, cpus->allocated * sizeof(cpus->p[0])); |
| } |
| cpus->p[cpus->n].phandle = phandle; |
| cpus->p[cpus->n].cpuset = (NULL == cpuset)?NULL:hwloc_bitmap_dup(cpuset); |
| cpus->p[cpus->n].l2_cache = l2_cache; |
| cpus->p[cpus->n].name = strdup(name); |
| ++cpus->n; |
| } |
| |
| /* Walks over the cache list in order to detect nested caches and CPU mask for each */ |
| static int |
| look_powerpc_device_tree_discover_cache(device_tree_cpus_t *cpus, |
| uint32_t phandle, unsigned int *level, hwloc_bitmap_t cpuset) |
| { |
| unsigned int i; |
| int ret = -1; |
| if ((NULL == level) || (NULL == cpuset) || phandle == (uint32_t) -1) |
| return ret; |
| for (i = 0; i < cpus->n; ++i) { |
| if (phandle != cpus->p[i].l2_cache) |
| continue; |
| if (NULL != cpus->p[i].cpuset) { |
| hwloc_bitmap_or(cpuset, cpuset, cpus->p[i].cpuset); |
| ret = 0; |
| } else { |
| ++(*level); |
| if (0 == look_powerpc_device_tree_discover_cache(cpus, |
| cpus->p[i].phandle, level, cpuset)) |
| ret = 0; |
| } |
| } |
| return ret; |
| } |
| |
| static void |
| try_add_cache_from_device_tree_cpu(struct hwloc_topology *topology, |
| const char *cpu, unsigned int level, hwloc_bitmap_t cpuset) |
| { |
| /* Ignore Instruction caches */ |
| /* d-cache-block-size - ignore */ |
| /* d-cache-line-size - to read, in bytes */ |
| /* d-cache-sets - ignore */ |
| /* d-cache-size - to read, in bytes */ |
| /* d-tlb-sets - ignore */ |
| /* d-tlb-size - ignore, always 0 on power6 */ |
| /* i-cache-* and i-tlb-* represent instruction cache, ignore */ |
| uint32_t d_cache_line_size = 0, d_cache_size = 0; |
| struct hwloc_obj *c = NULL; |
| |
| hwloc_read_unit32be(cpu, "d-cache-line-size", &d_cache_line_size, |
| topology->backend_params.sysfs.root_fd); |
| hwloc_read_unit32be(cpu, "d-cache-size", &d_cache_size, |
| topology->backend_params.sysfs.root_fd); |
| |
| if ( (0 == d_cache_line_size) && (0 == d_cache_size) ) |
| return; |
| |
| c = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); |
| c->attr->cache.depth = level; |
| c->attr->cache.linesize = d_cache_line_size; |
| c->attr->cache.size = d_cache_size; |
| c->cpuset = hwloc_bitmap_dup(cpuset); |
| hwloc_debug_1arg_bitmap("cache depth %d has cpuset %s\n", level, c->cpuset); |
| hwloc_insert_object_by_cpuset(topology, c); |
| } |
| |
| /* |
| * Discovers L1/L2/L3 cache information on IBM PowerPC systems for old kernels (RHEL5.*) |
| * which provide NUMA nodes information without any details |
| */ |
| static void |
| look_powerpc_device_tree(struct hwloc_topology *topology) |
| { |
| device_tree_cpus_t cpus; |
| const char ofroot[] = "/proc/device-tree/cpus"; |
| unsigned int i; |
| int root_fd = topology->backend_params.sysfs.root_fd; |
| DIR *dt = hwloc_opendir(ofroot, root_fd); |
| struct dirent *dirent; |
| |
| cpus.n = 0; |
| cpus.p = NULL; |
| cpus.allocated = 0; |
| |
| if (NULL == dt) |
| return; |
| |
| while (NULL != (dirent = readdir(dt))) { |
| struct stat statbuf; |
| int err; |
| char *cpu; |
| char *device_type; |
| uint32_t reg = -1, l2_cache = -1, phandle = -1; |
| unsigned len; |
| |
| if ('.' == dirent->d_name[0]) |
| continue; |
| |
| len = sizeof(ofroot) + 1 + strlen(dirent->d_name) + 1; |
| cpu = malloc(len); |
| if (NULL == cpu) { |
| continue; |
| } |
| snprintf(cpu, len, "%s/%s", ofroot, dirent->d_name); |
| |
| err = hwloc_stat(cpu, &statbuf, root_fd); |
| if (err < 0 || !S_ISDIR(statbuf.st_mode)) |
| goto cont; |
| |
| device_type = hwloc_read_str(cpu, "device_type", root_fd); |
| if (NULL == device_type) |
| goto cont; |
| |
| hwloc_read_unit32be(cpu, "reg", ®, root_fd); |
| if (hwloc_read_unit32be(cpu, "next-level-cache", &l2_cache, root_fd) == -1) |
| hwloc_read_unit32be(cpu, "l2-cache", &l2_cache, root_fd); |
| if (hwloc_read_unit32be(cpu, "phandle", &phandle, root_fd) == -1) |
| if (hwloc_read_unit32be(cpu, "ibm,phandle", &phandle, root_fd) == -1) |
| hwloc_read_unit32be(cpu, "linux,phandle", &phandle, root_fd); |
| |
| if (0 == strcmp(device_type, "cache")) { |
| add_device_tree_cpus_node(&cpus, NULL, l2_cache, phandle, dirent->d_name); |
| } |
| else if (0 == strcmp(device_type, "cpu")) { |
| /* Found CPU */ |
| hwloc_bitmap_t cpuset = NULL; |
| size_t cb = 0; |
| uint32_t *threads = hwloc_read_raw(cpu, "ibm,ppc-interrupt-server#s", &cb, root_fd); |
| uint32_t nthreads = cb / sizeof(threads[0]); |
| |
| if (NULL != threads) { |
| cpuset = hwloc_bitmap_alloc(); |
| for (i = 0; i < nthreads; ++i) { |
| if (hwloc_bitmap_isset(topology->levels[0][0]->complete_cpuset, ntohl(threads[i]))) |
| hwloc_bitmap_set(cpuset, ntohl(threads[i])); |
| } |
| free(threads); |
| } else if ((unsigned int)-1 != reg) { |
| cpuset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_set(cpuset, reg); |
| } |
| |
| if (NULL == cpuset) { |
| hwloc_debug("%s has no \"reg\" property, skipping\n", cpu); |
| } else { |
| struct hwloc_obj *core = NULL; |
| add_device_tree_cpus_node(&cpus, cpuset, l2_cache, phandle, dirent->d_name); |
| |
| /* Add core */ |
| core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, reg); |
| core->cpuset = hwloc_bitmap_dup(cpuset); |
| hwloc_insert_object_by_cpuset(topology, core); |
| |
| /* Add L1 cache */ |
| try_add_cache_from_device_tree_cpu(topology, cpu, 1, cpuset); |
| |
| hwloc_bitmap_free(cpuset); |
| } |
| free(device_type); |
| } |
| cont: |
| free(cpu); |
| } |
| closedir(dt); |
| |
| /* No cores and L2 cache were found, exiting */ |
| if (0 == cpus.n) { |
| hwloc_debug("No cores and L2 cache were found in %s, exiting\n", ofroot); |
| return; |
| } |
| |
| #ifdef HWLOC_DEBUG |
| for (i = 0; i < cpus.n; ++i) { |
| hwloc_debug("%i: %s ibm,phandle=%08X l2_cache=%08X ", |
| i, cpus.p[i].name, cpus.p[i].phandle, cpus.p[i].l2_cache); |
| if (NULL == cpus.p[i].cpuset) { |
| hwloc_debug("%s\n", "no cpuset"); |
| } else { |
| hwloc_debug_bitmap("cpuset %s\n", cpus.p[i].cpuset); |
| } |
| } |
| #endif |
| |
| /* Scan L2/L3/... caches */ |
| for (i = 0; i < cpus.n; ++i) { |
| unsigned int level = 2; |
| hwloc_bitmap_t cpuset; |
| /* Skip real CPUs */ |
| if (NULL != cpus.p[i].cpuset) |
| continue; |
| |
| /* Calculate cache level and CPU mask */ |
| cpuset = hwloc_bitmap_alloc(); |
| if (0 == look_powerpc_device_tree_discover_cache(&cpus, |
| cpus.p[i].phandle, &level, cpuset)) { |
| char *cpu; |
| unsigned len; |
| |
| len = sizeof(ofroot) + 1 + strlen(cpus.p[i].name) + 1; |
| cpu = malloc(len); |
| if (NULL == cpu) { |
| return; |
| } |
| snprintf(cpu, len, "%s/%s", ofroot, cpus.p[i].name); |
| |
| try_add_cache_from_device_tree_cpu(topology, cpu, level, cpuset); |
| free(cpu); |
| } |
| hwloc_bitmap_free(cpuset); |
| } |
| |
| /* Do cleanup */ |
| for (i = 0; i < cpus.n; ++i) { |
| hwloc_bitmap_free(cpus.p[i].cpuset); |
| free(cpus.p[i].name); |
| } |
| free(cpus.p); |
| } |
| |
| /* Look at Linux' /sys/devices/system/cpu/cpu%d/topology/ */ |
| static void |
| look_sysfscpu(struct hwloc_topology *topology, const char *path) |
| { |
| hwloc_bitmap_t cpuset; /* Set of cpus for which we have topology information */ |
| #define CPU_TOPOLOGY_STR_LEN 128 |
| char str[CPU_TOPOLOGY_STR_LEN]; |
| DIR *dir; |
| int i,j; |
| FILE *fd; |
| unsigned caches_added; |
| |
| cpuset = hwloc_bitmap_alloc(); |
| |
| /* fill the cpuset of interesting cpus */ |
| dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd); |
| if (dir) { |
| struct dirent *dirent; |
| while ((dirent = readdir(dir)) != NULL) { |
| unsigned long cpu; |
| char online[2]; |
| |
| if (strncmp(dirent->d_name, "cpu", 3)) |
| continue; |
| cpu = strtoul(dirent->d_name+3, NULL, 0); |
| |
| /* Maybe we don't have topology information but at least it exists */ |
| hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, cpu); |
| |
| /* check whether this processor is online */ |
| sprintf(str, "%s/cpu%lu/online", path, cpu); |
| fd = hwloc_fopen(str, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| if (fgets(online, sizeof(online), fd)) { |
| fclose(fd); |
| if (atoi(online)) { |
| hwloc_debug("os proc %lu is online\n", cpu); |
| } else { |
| hwloc_debug("os proc %lu is offline\n", cpu); |
| hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpu); |
| } |
| } else { |
| fclose(fd); |
| } |
| } |
| |
| /* check whether the kernel exports topology information for this cpu */ |
| sprintf(str, "%s/cpu%lu/topology", path, cpu); |
| if (hwloc_access(str, X_OK, topology->backend_params.sysfs.root_fd) < 0 && errno == ENOENT) { |
| hwloc_debug("os proc %lu has no accessible %s/cpu%lu/topology\n", |
| cpu, path, cpu); |
| continue; |
| } |
| |
| hwloc_bitmap_set(cpuset, cpu); |
| } |
| closedir(dir); |
| } |
| |
| topology->support.discovery->pu = 1; |
| hwloc_debug_1arg_bitmap("found %d cpu topologies, cpuset %s\n", |
| hwloc_bitmap_weight(cpuset), cpuset); |
| |
| caches_added = 0; |
| hwloc_bitmap_foreach_begin(i, cpuset) |
| { |
| struct hwloc_obj *sock, *core, *thread; |
| hwloc_bitmap_t socketset, coreset, threadset, savedcoreset; |
| unsigned mysocketid, mycoreid; |
| int threadwithcoreid = 0; |
| |
| /* look at the socket */ |
| mysocketid = 0; /* shut-up the compiler */ |
| sprintf(str, "%s/cpu%d/topology/physical_package_id", path, i); |
| hwloc_parse_sysfs_unsigned(str, &mysocketid, topology->backend_params.sysfs.root_fd); |
| |
| sprintf(str, "%s/cpu%d/topology/core_siblings", path, i); |
| socketset = hwloc_parse_cpumap(str, topology->backend_params.sysfs.root_fd); |
| if (socketset && hwloc_bitmap_first(socketset) == i) { |
| /* first cpu in this socket, add the socket */ |
| sock = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, mysocketid); |
| sock->cpuset = socketset; |
| hwloc_debug_1arg_bitmap("os socket %u has cpuset %s\n", |
| mysocketid, socketset); |
| hwloc_insert_object_by_cpuset(topology, sock); |
| socketset = NULL; /* don't free it */ |
| } |
| hwloc_bitmap_free(socketset); |
| |
| /* look at the core */ |
| mycoreid = 0; /* shut-up the compiler */ |
| sprintf(str, "%s/cpu%d/topology/core_id", path, i); |
| hwloc_parse_sysfs_unsigned(str, &mycoreid, topology->backend_params.sysfs.root_fd); |
| |
| sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); |
| coreset = hwloc_parse_cpumap(str, topology->backend_params.sysfs.root_fd); |
| savedcoreset = coreset; /* store it for later work-arounds */ |
| |
| if (coreset && hwloc_bitmap_weight(coreset) > 1) { |
| /* check if this is hyperthreading or different coreids */ |
| unsigned siblingid, siblingcoreid; |
| hwloc_bitmap_t set = hwloc_bitmap_dup(coreset); |
| hwloc_bitmap_clr(set, i); |
| siblingid = hwloc_bitmap_first(set); |
| siblingcoreid = mycoreid; |
| sprintf(str, "%s/cpu%d/topology/core_id", path, siblingid); |
| hwloc_parse_sysfs_unsigned(str, &siblingcoreid, topology->backend_params.sysfs.root_fd); |
| threadwithcoreid = (siblingcoreid != mycoreid); |
| hwloc_bitmap_free(set); |
| } |
| |
| |
| if (coreset && (hwloc_bitmap_first(coreset) == i || threadwithcoreid)) { |
| /* regular core */ |
| core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, mycoreid); |
| if (threadwithcoreid) { |
| /* amd multicore compute-unit, create one core per thread */ |
| core->cpuset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_set(core->cpuset, i); |
| } else { |
| core->cpuset = coreset; |
| } |
| hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", |
| mycoreid, coreset); |
| hwloc_insert_object_by_cpuset(topology, core); |
| coreset = NULL; /* don't free it */ |
| } |
| |
| /* look at the thread */ |
| threadset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_only(threadset, i); |
| |
| /* add the thread */ |
| thread = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i); |
| thread->cpuset = threadset; |
| hwloc_debug_1arg_bitmap("thread %d has cpuset %s\n", |
| i, threadset); |
| hwloc_insert_object_by_cpuset(topology, thread); |
| |
| /* look at the caches */ |
| for(j=0; j<10; j++) { |
| #define SHARED_CPU_MAP_STRLEN 128 |
| char mappath[SHARED_CPU_MAP_STRLEN]; |
| char str2[20]; /* enough for a level number (one digit) or a type (Data/Instruction/Unified) */ |
| struct hwloc_obj *cache; |
| hwloc_bitmap_t cacheset; |
| unsigned long kB = 0; |
| unsigned linesize = 0; |
| int depth; /* 0 for L1, .... */ |
| |
| /* get the cache level depth */ |
| sprintf(mappath, "%s/cpu%d/cache/index%d/level", path, i, j); |
| fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| if (fgets(str2,sizeof(str2), fd)) |
| depth = strtoul(str2, NULL, 10)-1; |
| else |
| continue; |
| fclose(fd); |
| } else |
| continue; |
| |
| /* ignore Instruction caches */ |
| sprintf(mappath, "%s/cpu%d/cache/index%d/type", path, i, j); |
| fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| if (fgets(str2, sizeof(str2), fd)) { |
| fclose(fd); |
| if (!strncmp(str2, "Instruction", 11)) |
| continue; |
| } else { |
| fclose(fd); |
| continue; |
| } |
| } else |
| continue; |
| |
| /* get the cache size */ |
| sprintf(mappath, "%s/cpu%d/cache/index%d/size", path, i, j); |
| fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| if (fgets(str2,sizeof(str2), fd)) |
| kB = atol(str2); /* in kB */ |
| fclose(fd); |
| } |
| |
| /* get the line size */ |
| sprintf(mappath, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); |
| fd = hwloc_fopen(mappath, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| if (fgets(str2,sizeof(str2), fd)) |
| linesize = atol(str2); /* in bytes */ |
| fclose(fd); |
| } |
| |
| sprintf(mappath, "%s/cpu%d/cache/index%d/shared_cpu_map", path, i, j); |
| cacheset = hwloc_parse_cpumap(mappath, topology->backend_params.sysfs.root_fd); |
| if (cacheset) { |
| if (hwloc_bitmap_weight(cacheset) < 1) { |
| /* mask is wrong (useful for many itaniums) */ |
| if (savedcoreset) |
| /* assume it's a core-specific cache */ |
| hwloc_bitmap_copy(cacheset, savedcoreset); |
| else |
| /* assumes it's not shared */ |
| hwloc_bitmap_only(cacheset, i); |
| } |
| |
| if (hwloc_bitmap_first(cacheset) == i) { |
| /* first cpu in this cache, add the cache */ |
| cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); |
| cache->attr->cache.size = kB << 10; |
| cache->attr->cache.depth = depth+1; |
| cache->attr->cache.linesize = linesize; |
| cache->cpuset = cacheset; |
| hwloc_debug_1arg_bitmap("cache depth %d has cpuset %s\n", |
| depth, cacheset); |
| hwloc_insert_object_by_cpuset(topology, cache); |
| cacheset = NULL; /* don't free it */ |
| ++caches_added; |
| } |
| } |
| hwloc_bitmap_free(cacheset); |
| } |
| hwloc_bitmap_free(coreset); |
| } |
| hwloc_bitmap_foreach_end(); |
| |
| if (0 == caches_added) |
| look_powerpc_device_tree(topology); |
| |
| hwloc_bitmap_free(cpuset); |
| } |
| |
| |
| /* Look at Linux' /proc/cpuinfo */ |
| # define PROCESSOR "processor" |
| # define PHYSID "physical id" |
| # define COREID "core id" |
| #define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */ |
| static int |
| look_cpuinfo(struct hwloc_topology *topology, const char *path, |
| hwloc_bitmap_t online_cpuset) |
| { |
| FILE *fd; |
| char *str = NULL; |
| char *endptr; |
| unsigned len; |
| unsigned proc_physids[HWLOC_NBMAXCPUS]; |
| unsigned osphysids[HWLOC_NBMAXCPUS]; |
| unsigned proc_coreids[HWLOC_NBMAXCPUS]; |
| unsigned oscoreids[HWLOC_NBMAXCPUS]; |
| unsigned proc_osphysids[HWLOC_NBMAXCPUS]; |
| unsigned core_osphysids[HWLOC_NBMAXCPUS]; |
| unsigned procid_max=0; |
| unsigned numprocs=0; |
| unsigned numsockets=0; |
| unsigned numcores=0; |
| unsigned long physid; |
| unsigned long coreid; |
| unsigned missingsocket; |
| unsigned missingcore; |
| unsigned long processor = (unsigned long) -1; |
| unsigned i; |
| hwloc_bitmap_t cpuset; |
| hwloc_obj_t obj; |
| |
| for (i = 0; i < HWLOC_NBMAXCPUS; i++) { |
| proc_physids[i] = -1; |
| osphysids[i] = -1; |
| proc_coreids[i] = -1; |
| oscoreids[i] = -1; |
| proc_osphysids[i] = -1; |
| core_osphysids[i] = -1; |
| } |
| |
| if (!(fd=hwloc_fopen(path,"r", topology->backend_params.sysfs.root_fd))) |
| { |
| hwloc_debug("%s", "could not open /proc/cpuinfo\n"); |
| return -1; |
| } |
| |
| cpuset = hwloc_bitmap_alloc(); |
| /* Just record information and count number of sockets and cores */ |
| |
| len = strlen(PHYSID) + 1 + 9 + 1 + 1; |
| str = malloc(len); |
| hwloc_debug("%s", "\n\n * Topology extraction from /proc/cpuinfo *\n\n"); |
| while (fgets(str,len,fd)!=NULL) |
| { |
| # define getprocnb_begin(field, var) \ |
| if ( !strncmp(field,str,strlen(field))) \ |
| { \ |
| char *c = strchr(str, ':')+1; \ |
| var = strtoul(c,&endptr,0); \ |
| if (endptr==c) \ |
| { \ |
| hwloc_debug("%s", "no number in "field" field of /proc/cpuinfo\n"); \ |
| hwloc_bitmap_free(cpuset); \ |
| free(str); \ |
| return -1; \ |
| } \ |
| else if (var==ULONG_MAX) \ |
| { \ |
| hwloc_debug("%s", "too big "field" number in /proc/cpuinfo\n"); \ |
| hwloc_bitmap_free(cpuset); \ |
| free(str); \ |
| return -1; \ |
| } \ |
| hwloc_debug(field " %lu\n", var) |
| # define getprocnb_end() \ |
| } |
| getprocnb_begin(PROCESSOR,processor); |
| hwloc_bitmap_set(cpuset, processor); |
| |
| obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, processor); |
| obj->cpuset = hwloc_bitmap_alloc(); |
| hwloc_bitmap_only(obj->cpuset, processor); |
| |
| hwloc_debug_2args_bitmap("cpu %u (os %lu) has cpuset %s\n", |
| numprocs, processor, obj->cpuset); |
| numprocs++; |
| hwloc_insert_object_by_cpuset(topology, obj); |
| |
| getprocnb_end() else |
| getprocnb_begin(PHYSID,physid); |
| proc_osphysids[processor]=physid; |
| for (i=0; i<numsockets; i++) |
| if (physid == osphysids[i]) |
| break; |
| proc_physids[processor]=i; |
| hwloc_debug("%lu on socket %u (%lx)\n", processor, i, physid); |
| if (i==numsockets) |
| osphysids[(numsockets)++] = physid; |
| getprocnb_end() else |
| getprocnb_begin(COREID,coreid); |
| for (i=0; i<numcores; i++) |
| if (coreid == oscoreids[i] && proc_osphysids[processor] == core_osphysids[i]) |
| break; |
| proc_coreids[processor]=i; |
| if (i==numcores) |
| { |
| core_osphysids[numcores] = proc_osphysids[processor]; |
| oscoreids[numcores] = coreid; |
| (numcores)++; |
| } |
| getprocnb_end() |
| if (str[strlen(str)-1]!='\n') |
| { |
| /* ignore end of line */ |
| if (fscanf(fd,"%*[^\n]") == EOF) |
| break; |
| getc(fd); |
| } |
| } |
| fclose(fd); |
| free(str); |
| |
| if (processor == (unsigned long) -1) { |
| hwloc_bitmap_free(cpuset); |
| return -1; |
| } |
| |
| topology->support.discovery->pu = 1; |
| /* setup the final number of procs */ |
| procid_max = processor + 1; |
| hwloc_bitmap_copy(online_cpuset, cpuset); |
| hwloc_bitmap_free(cpuset); |
| |
| hwloc_debug("%u online processors found, with id max %u\n", numprocs, procid_max); |
| hwloc_debug_bitmap("online processor cpuset: %s\n", online_cpuset); |
| |
| hwloc_debug("%s", "\n * Topology summary *\n"); |
| hwloc_debug("%u processors (%u max id)\n", numprocs, procid_max); |
| |
| /* Some buggy Linuxes don't provide numbers for processor 0, which makes us |
| * provide bogus information. We should rather drop it. */ |
| missingsocket=0; |
| missingcore=0; |
| hwloc_bitmap_foreach_begin(processor, online_cpuset) |
| if (proc_physids[processor] == (unsigned) -1) |
| missingsocket=1; |
| if (proc_coreids[processor] == (unsigned) -1) |
| missingcore=1; |
| if (missingcore && missingsocket) |
| /* No usable information, no need to continue */ |
| break; |
| hwloc_bitmap_foreach_end(); |
| |
| hwloc_debug("%u sockets%s\n", numsockets, missingsocket ? ", but some missing socket" : ""); |
| if (!missingsocket && numsockets>0) |
| hwloc_setup_level(procid_max, numsockets, osphysids, proc_physids, topology, HWLOC_OBJ_SOCKET); |
| |
| look_powerpc_device_tree(topology); |
| |
| hwloc_debug("%u cores%s\n", numcores, missingcore ? ", but some missing core" : ""); |
| if (!missingcore && numcores>0) |
| hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE); |
| |
| return 0; |
| } |
| |
| static void |
| hwloc__get_dmi_one_info(struct hwloc_topology *topology, hwloc_obj_t obj, const char *sysfs_name, const char *hwloc_name) |
| { |
| char sysfs_path[128]; |
| char dmi_line[64]; |
| char *tmp; |
| FILE *fd; |
| |
| snprintf(sysfs_path, sizeof(sysfs_path), "/sys/class/dmi/id/%s", sysfs_name); |
| |
| dmi_line[0] = '\0'; |
| fd = hwloc_fopen(sysfs_path, "r", topology->backend_params.sysfs.root_fd); |
| if (fd) { |
| tmp = fgets(dmi_line, sizeof(dmi_line), fd); |
| fclose (fd); |
| if (tmp && dmi_line[0] != '\0') { |
| tmp = strchr(dmi_line, '\n'); |
| if (tmp) |
| *tmp = '\0'; |
| hwloc_debug("found %s '%s'\n", hwloc_name, dmi_line); |
| hwloc_add_object_info(obj, hwloc_name, dmi_line); |
| } |
| } |
| } |
| |
| static void |
| hwloc__get_dmi_info(struct hwloc_topology *topology, hwloc_obj_t obj) |
| { |
| hwloc__get_dmi_one_info(topology, obj, "product_name", "DMIProductName"); |
| hwloc__get_dmi_one_info(topology, obj, "product_version", "DMIProductVersion"); |
| hwloc__get_dmi_one_info(topology, obj, "product_serial", "DMIProductSerial"); |
| hwloc__get_dmi_one_info(topology, obj, "product_uuid", "DMIProductUUID"); |
| hwloc__get_dmi_one_info(topology, obj, "board_vendor", "DMIBoardVendor"); |
| hwloc__get_dmi_one_info(topology, obj, "board_name", "DMIBoardName"); |
| hwloc__get_dmi_one_info(topology, obj, "board_version", "DMIBoardVersion"); |
| hwloc__get_dmi_one_info(topology, obj, "board_serial", "DMIBoardSerial"); |
| hwloc__get_dmi_one_info(topology, obj, "board_asset_tag", "DMIBoardAssetTag"); |
| hwloc__get_dmi_one_info(topology, obj, "chassis_vendor", "DMIChassisVendor"); |
| hwloc__get_dmi_one_info(topology, obj, "chassis_type", "DMIChassisType"); |
| hwloc__get_dmi_one_info(topology, obj, "chassis_version", "DMIChassisVersion"); |
| hwloc__get_dmi_one_info(topology, obj, "chassis_serial", "DMIChassisSerial"); |
| hwloc__get_dmi_one_info(topology, obj, "chassis_asset_tag", "DMIChassisAssetTag"); |
| hwloc__get_dmi_one_info(topology, obj, "bios_vendor", "DMIBIOSVendor"); |
| hwloc__get_dmi_one_info(topology, obj, "bios_version", "DMIBIOSVersion"); |
| hwloc__get_dmi_one_info(topology, obj, "bios_date", "DMIBIOSDate"); |
| hwloc__get_dmi_one_info(topology, obj, "sys_vendor", "DMISysVendor"); |
| } |
| |
| void |
| hwloc_look_linux(struct hwloc_topology *topology) |
| { |
| DIR *nodes_dir; |
| unsigned nbnodes; |
| char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL; |
| int err; |
| |
| /* Gather the list of admin-disabled cpus and mems */ |
| hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, topology->backend_params.sysfs.root_fd); |
| if (cgroup_mntpnt || cpuset_mntpnt) { |
| cpuset_name = hwloc_read_linux_cpuset_name(topology->backend_params.sysfs.root_fd, topology->pid); |
| if (cpuset_name) { |
| hwloc_admin_disable_set_from_cpuset(topology, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset); |
| hwloc_admin_disable_set_from_cpuset(topology, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset); |
| } |
| free(cgroup_mntpnt); |
| free(cpuset_mntpnt); |
| } |
| |
| nodes_dir = hwloc_opendir("/proc/nodes", topology->backend_params.sysfs.root_fd); |
| if (nodes_dir) { |
| /* Kerrighed */ |
| struct dirent *dirent; |
| char path[128]; |
| hwloc_obj_t machine; |
| hwloc_bitmap_t machine_online_set; |
| |
| /* replace top-level object type with SYSTEM and add some MACHINE underneath */ |
| |
| topology->levels[0][0]->type = HWLOC_OBJ_SYSTEM; |
| topology->levels[0][0]->name = strdup("Kerrighed"); |
| |
| /* No cpuset support for now. */ |
| /* No sys support for now. */ |
| while ((dirent = readdir(nodes_dir)) != NULL) { |
| unsigned long node; |
| if (strncmp(dirent->d_name, "node", 4)) |
| continue; |
| machine_online_set = hwloc_bitmap_alloc(); |
| node = strtoul(dirent->d_name+4, NULL, 0); |
| snprintf(path, sizeof(path), "/proc/nodes/node%lu/cpuinfo", node); |
| err = look_cpuinfo(topology, path, machine_online_set); |
| if (err < 0) |
| continue; |
| hwloc_bitmap_or(topology->levels[0][0]->online_cpuset, topology->levels[0][0]->online_cpuset, machine_online_set); |
| machine = hwloc_alloc_setup_object(HWLOC_OBJ_MACHINE, node); |
| machine->cpuset = machine_online_set; |
| hwloc_debug_1arg_bitmap("machine number %lu has cpuset %s\n", |
| node, machine_online_set); |
| hwloc_insert_object_by_cpuset(topology, machine); |
| |
| /* Get the machine memory attributes */ |
| hwloc_get_kerrighed_node_meminfo_info(topology, node, &machine->memory); |
| |
| /* Gather DMI info */ |
| /* FIXME: get the right DMI info of each machine */ |
| hwloc__get_dmi_info(topology, machine); |
| } |
| closedir(nodes_dir); |
| } else { |
| /* Get the machine memory attributes */ |
| hwloc_get_procfs_meminfo_info(topology, &topology->levels[0][0]->memory); |
| |
| /* Gather NUMA information. Must be after hwloc_get_procfs_meminfo_info so that the hugepage size is known */ |
| look_sysfsnode(topology, "/sys/devices/system/node", &nbnodes); |
| |
| /* if we found some numa nodes, the machine object has no local memory */ |
| if (nbnodes) { |
| unsigned i; |
| topology->levels[0][0]->memory.local_memory = 0; |
| if (topology->levels[0][0]->memory.page_types) |
| for(i=0; i<topology->levels[0][0]->memory.page_types_len; i++) |
| topology->levels[0][0]->memory.page_types[i].count = 0; |
| } |
| |
| /* Gather the list of cpus now */ |
| if (getenv("HWLOC_LINUX_USE_CPUINFO") |
| || (hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, topology->backend_params.sysfs.root_fd) < 0 |
| && hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, topology->backend_params.sysfs.root_fd) < 0)) { |
| /* revert to reading cpuinfo only if /sys/.../topology unavailable (before 2.6.16) |
| * or not containing anything interesting */ |
| err = look_cpuinfo(topology, "/proc/cpuinfo", topology->levels[0][0]->online_cpuset); |
| if (err < 0) { |
| if (topology->is_thissystem) |
| hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology)); |
| else |
| /* fsys-root but not this system, no way, assume there's just 1 |
| * processor :/ */ |
| hwloc_setup_pu_level(topology, 1); |
| } |
| } else { |
| look_sysfscpu(topology, "/sys/devices/system/cpu"); |
| } |
| |
| /* Gather DMI info */ |
| hwloc__get_dmi_info(topology, topology->levels[0][0]); |
| } |
| |
| hwloc_add_object_info(topology->levels[0][0], "Backend", "Linux"); |
| if (cpuset_name) { |
| hwloc_add_object_info(topology->levels[0][0], "LinuxCgroup", cpuset_name); |
| free(cpuset_name); |
| } |
| |
| /* gather uname info if fsroot wasn't changed */ |
| if (topology->is_thissystem) |
| hwloc_add_uname_info(topology); |
| } |
| |
| void |
| hwloc_set_linux_hooks(struct hwloc_topology *topology) |
| { |
| topology->set_thisthread_cpubind = hwloc_linux_set_thisthread_cpubind; |
| topology->get_thisthread_cpubind = hwloc_linux_get_thisthread_cpubind; |
| topology->set_thisproc_cpubind = hwloc_linux_set_thisproc_cpubind; |
| topology->get_thisproc_cpubind = hwloc_linux_get_thisproc_cpubind; |
| topology->set_proc_cpubind = hwloc_linux_set_proc_cpubind; |
| topology->get_proc_cpubind = hwloc_linux_get_proc_cpubind; |
| #if HAVE_DECL_PTHREAD_SETAFFINITY_NP |
| topology->set_thread_cpubind = hwloc_linux_set_thread_cpubind; |
| #endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */ |
| #if HAVE_DECL_PTHREAD_GETAFFINITY_NP |
| topology->get_thread_cpubind = hwloc_linux_get_thread_cpubind; |
| #endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */ |
| topology->get_thisthread_last_cpu_location = hwloc_linux_get_thisthread_last_cpu_location; |
| topology->get_thisproc_last_cpu_location = hwloc_linux_get_thisproc_last_cpu_location; |
| topology->get_proc_last_cpu_location = hwloc_linux_get_proc_last_cpu_location; |
| #ifdef HWLOC_HAVE_SET_MEMPOLICY |
| topology->set_thisthread_membind = hwloc_linux_set_thisthread_membind; |
| topology->get_thisthread_membind = hwloc_linux_get_thisthread_membind; |
| #endif /* HWLOC_HAVE_SET_MEMPOLICY */ |
| #ifdef HWLOC_HAVE_MBIND |
| topology->set_area_membind = hwloc_linux_set_area_membind; |
| topology->alloc_membind = hwloc_linux_alloc_membind; |
| topology->alloc = hwloc_alloc_mmap; |
| topology->free_membind = hwloc_free_mmap; |
| topology->support.membind->firsttouch_membind = 1; |
| topology->support.membind->bind_membind = 1; |
| topology->support.membind->interleave_membind = 1; |
| #endif /* HWLOC_HAVE_MBIND */ |
| #if (defined HWLOC_HAVE_MIGRATE_PAGES) || ((defined HWLOC_HAVE_MBIND) && (defined MPOL_MF_MOVE)) |
| topology->support.membind->migrate_membind = 1; |
| #endif |
| } |