sl@0: /* sl@0: * LIBOIL - Library of Optimized Inner Loops sl@0: * Copyright (c) 2003,2004 David A. Schleef sl@0: * All rights reserved. sl@0: * sl@0: * Redistribution and use in source and binary forms, with or without sl@0: * modification, are permitted provided that the following conditions sl@0: * are met: sl@0: * 1. Redistributions of source code must retain the above copyright sl@0: * notice, this list of conditions and the following disclaimer. sl@0: * 2. Redistributions in binary form must reproduce the above copyright sl@0: * notice, this list of conditions and the following disclaimer in the sl@0: * documentation and/or other materials provided with the distribution. sl@0: * sl@0: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR sl@0: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED sl@0: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE sl@0: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, sl@0: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES sl@0: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR sl@0: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) sl@0: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, sl@0: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING sl@0: * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE sl@0: * POSSIBILITY OF SUCH DAMAGE. sl@0: */ sl@0: sl@0: #ifdef HAVE_CONFIG_H sl@0: #include "config.h" sl@0: #endif sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: sl@0: #ifdef HAVE_UNISTD_H sl@0: #include sl@0: #endif sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #include sl@0: #ifdef HAVE_SYS_TIME_H sl@0: #include sl@0: #endif sl@0: #include sl@0: sl@0: #if defined(__FreeBSD__) || defined(__APPLE__) sl@0: #include sl@0: #include sl@0: #endif sl@0: sl@0: #ifdef __sun sl@0: #include sl@0: #endif sl@0: sl@0: /***** i386, amd64 *****/ sl@0: sl@0: #if defined(__sun) sl@0: #define USE_I386_GETISAX sl@0: #else sl@0: #define USE_I386_CPUID sl@0: #endif sl@0: sl@0: sl@0: #ifdef USE_I386_CPUINFO sl@0: static void sl@0: oil_cpu_i386_getflags_cpuinfo (char *cpuinfo) sl@0: { sl@0: char *cpuinfo_flags; sl@0: char **flags; sl@0: char **f; sl@0: sl@0: cpuinfo_flags = get_tag_value (cpuinfo, "flags"); sl@0: if (cpuinfo_flags == NULL) { sl@0: free (cpuinfo); sl@0: return; sl@0: } sl@0: sl@0: flags = strsplit(cpuinfo_flags); sl@0: for (f = flags; *f; f++) { sl@0: if (strcmp (*f, "cmov") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_CMOV; sl@0: } sl@0: if (strcmp (*f, "mmx") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMX; sl@0: } sl@0: if (strcmp (*f, "sse") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE; sl@0: } sl@0: if (strcmp (*f, "mmxext") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (strcmp (*f, "sse2") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE2; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (strcmp (*f, "3dnow") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW; sl@0: } sl@0: if (strcmp (*f, "3dnowext") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT; sl@0: } sl@0: if (strcmp (*f, "sse3") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE3; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE2; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (strcmp (*f, "ssse3") == 0) { sl@0: OIL_DEBUG ("cpu flag %s", *f); sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSSE3; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE3; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE2; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: sl@0: free (*f); sl@0: } sl@0: free (flags); sl@0: free (cpuinfo); sl@0: free (cpuinfo_flags); sl@0: } sl@0: #endif sl@0: sl@0: #ifdef HAVE_GCC_ASM sl@0: static unsigned long sl@0: oil_profile_stamp_rdtsc(void) sl@0: { sl@0: unsigned long ts; sl@0: __asm__ __volatile__("rdtsc\n" : "=a" (ts) : : "edx"); sl@0: return ts; sl@0: } sl@0: #endif sl@0: sl@0: #ifdef USE_I386_CPUID sl@0: #ifdef __i386__ sl@0: static void sl@0: get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) sl@0: { sl@0: /* sl@0: __asm__ ( sl@0: " pushl %%ebx\n" sl@0: " cpuid\n" sl@0: " mov %%ebx, %%esi\n" sl@0: " popl %%ebx\n" sl@0: : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) sl@0: : "0" (op)); sl@0: */ sl@0: } sl@0: #endif sl@0: sl@0: #ifdef __amd64__ sl@0: static void sl@0: get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) sl@0: { sl@0: __asm__ ( sl@0: " pushq %%rbx\n" sl@0: " cpuid\n" sl@0: " mov %%ebx, %%esi\n" sl@0: " popq %%rbx\n" sl@0: : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) sl@0: : "0" (op)); sl@0: } sl@0: #endif sl@0: sl@0: static void sl@0: test_cpuid (void *ignored) sl@0: { sl@0: uint32_t eax, ebx, ecx, edx; sl@0: sl@0: get_cpuid (0x00000000, &eax, &ebx, &ecx, &edx); sl@0: } sl@0: sl@0: static void sl@0: oil_cpu_detect_cpuid (void) sl@0: { sl@0: uint32_t eax, ebx, ecx, edx; sl@0: uint32_t level; sl@0: char vendor[13] = { 0 }; sl@0: int ret; sl@0: sl@0: oil_fault_check_enable (); sl@0: ret = oil_fault_check_try(test_cpuid, NULL); sl@0: oil_fault_check_disable (); sl@0: if (!ret) { sl@0: /* CPU thinks cpuid is an illegal instruction. */ sl@0: return; sl@0: } sl@0: sl@0: get_cpuid (0x00000000, &level, (uint32_t *)(vendor+0), sl@0: (uint32_t *)(vendor+8), (uint32_t *)(vendor+4)); sl@0: sl@0: OIL_DEBUG("cpuid %d %s", level, vendor); sl@0: sl@0: if (level < 1) { sl@0: return; sl@0: } sl@0: sl@0: get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); sl@0: sl@0: #ifdef HAVE_GCC_ASM sl@0: if (edx & (1<<4)) { sl@0: _oil_profile_stamp = oil_profile_stamp_rdtsc; sl@0: } sl@0: #endif sl@0: sl@0: /* Intel flags */ sl@0: if (edx & (1<<15)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_CMOV; sl@0: } sl@0: if (edx & (1<<23)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMX; sl@0: } sl@0: if (edx & (1<<25)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE; sl@0: } sl@0: if (edx & (1<<26)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE2; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (ecx & (1<<0)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE3; sl@0: } sl@0: sl@0: if (memcmp (vendor, "AuthenticAMD", 12) == 0) { sl@0: get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); sl@0: sl@0: /* AMD flags */ sl@0: if (edx & (1<<22)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (edx & (1<<31)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW; sl@0: } sl@0: if (edx & (1<<30)) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT; sl@0: } sl@0: sl@0: get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx); sl@0: sl@0: OIL_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size", sl@0: (ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff); sl@0: OIL_INFO("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size", sl@0: (edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff); sl@0: sl@0: get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx); sl@0: OIL_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size", sl@0: (ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff); sl@0: } sl@0: } sl@0: #endif sl@0: sl@0: #ifdef USE_I386_GETISAX sl@0: static void sl@0: oil_cpu_detect_getisax (void) sl@0: { sl@0: uint_t ui; sl@0: sl@0: getisax (&ui, 1); sl@0: sl@0: if (ui & AV_386_CMOV) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_CMOV; sl@0: } sl@0: if (ui & AV_386_MMX) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMX; sl@0: } sl@0: if (ui & AV_386_SSE) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE; sl@0: } sl@0: if (ui & AV_386_SSE2) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE2; sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: if (ui & AV_386_SSE3) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_SSE3; sl@0: } sl@0: if (ui & AV_386_AMD_3DNow) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW; sl@0: } sl@0: if (ui & AV_386_AMD_3DNowx) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT; sl@0: } sl@0: if (ui & AV_386_AMD_MMX) { sl@0: oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT; sl@0: } sl@0: } sl@0: #endif sl@0: sl@0: /* Reduce the set of CPU capabilities detected by whatever detection mechanism sl@0: * was chosen, according to kernel limitations. SSE requires kernel support for sl@0: * use. sl@0: */ sl@0: static void sl@0: oil_cpu_detect_kernel_support (void) sl@0: { sl@0: #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__APPLE__) sl@0: int ret, enabled; sl@0: size_t len; sl@0: sl@0: len = sizeof(enabled); sl@0: ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0); sl@0: if (ret || !enabled) { sl@0: oil_cpu_flags &= ~(OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2 | sl@0: OIL_IMPL_FLAG_MMXEXT | OIL_IMPL_FLAG_SSE3); sl@0: } sl@0: #elif defined(__linux__) sl@0: /* sl@0: * Might also want to grow a check for the old RedHat + Linux 2.2 sl@0: * unmasked SSE FPU exception bug. Other than that, if /proc/cpuinfo sl@0: * reported SSE, then it's safe. sl@0: */ sl@0: #elif defined(__sun) sl@0: /* Solaris is OK */ sl@0: #elif defined(__NetBSD__) sl@0: /* NetBSD is OK */ sl@0: #else sl@0: sl@0: OIL_WARNING("Operating system is not known to support SSE. " sl@0: "Assuming it does, which might cause problems"); sl@0: #if 0 sl@0: oil_cpu_flags &= ~(OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2 | sl@0: OIL_IMPL_FLAG_MMXEXT | OIL_IMPL_FLAG_SSE3); sl@0: #endif sl@0: #endif sl@0: } sl@0: sl@0: void sl@0: oil_cpu_detect_arch(void) sl@0: { sl@0: #ifdef USE_I386_CPUID sl@0: oil_cpu_detect_cpuid (); sl@0: #endif sl@0: #ifdef USE_I386_GETISAX sl@0: oil_cpu_detect_getisax (); sl@0: #endif sl@0: #ifdef USE_I386_CPUINFO sl@0: oil_cpu_detect_cpuinfo (); sl@0: #endif sl@0: sl@0: oil_cpu_detect_kernel_support (); sl@0: } sl@0: sl@0: