1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "config.h" 20 21#if HAVE_SCHED_GETAFFINITY 22#ifndef _GNU_SOURCE 23# define _GNU_SOURCE 24#endif 25#include <sched.h> 26#endif 27 28#include <stddef.h> 29#include <stdint.h> 30#include <stdatomic.h> 31 32#include "attributes.h" 33#include "cpu.h" 34#include "cpu_internal.h" 35#include "opt.h" 36#include "common.h" 37 38#if HAVE_GETPROCESSAFFINITYMASK || HAVE_WINRT 39#include <windows.h> 40#endif 41#if HAVE_SYSCTL 42#if HAVE_SYS_PARAM_H 43#include <sys/param.h> 44#endif 45#include <sys/types.h> 46#include <sys/sysctl.h> 47#endif 48#if HAVE_UNISTD_H 49#include <unistd.h> 50#endif 51 52static atomic_int cpu_flags = ATOMIC_VAR_INIT(-1); 53static atomic_int cpu_count = ATOMIC_VAR_INIT(-1); 54 55static int get_cpu_flags(void) 56{ 57#if ARCH_MIPS 58 return ff_get_cpu_flags_mips(); 59#elif ARCH_AARCH64 60 return ff_get_cpu_flags_aarch64(); 61#elif ARCH_ARM 62 return ff_get_cpu_flags_arm(); 63#elif ARCH_PPC 64 return ff_get_cpu_flags_ppc(); 65#elif ARCH_X86 66 return ff_get_cpu_flags_x86(); 67#elif ARCH_LOONGARCH 68 return ff_get_cpu_flags_loongarch(); 69#endif 70 return 0; 71} 72 73void av_force_cpu_flags(int arg){ 74 if (ARCH_X86 && 75 (arg & ( AV_CPU_FLAG_3DNOW | 76 AV_CPU_FLAG_3DNOWEXT | 77 AV_CPU_FLAG_MMXEXT | 78 AV_CPU_FLAG_SSE | 79 AV_CPU_FLAG_SSE2 | 80 AV_CPU_FLAG_SSE2SLOW | 81 AV_CPU_FLAG_SSE3 | 82 AV_CPU_FLAG_SSE3SLOW | 83 AV_CPU_FLAG_SSSE3 | 84 AV_CPU_FLAG_SSE4 | 85 AV_CPU_FLAG_SSE42 | 86 AV_CPU_FLAG_AVX | 87 AV_CPU_FLAG_AVXSLOW | 88 AV_CPU_FLAG_XOP | 89 AV_CPU_FLAG_FMA3 | 90 AV_CPU_FLAG_FMA4 | 91 AV_CPU_FLAG_AVX2 | 92 AV_CPU_FLAG_AVX512 )) 93 && !(arg & AV_CPU_FLAG_MMX)) { 94 av_log(NULL, AV_LOG_WARNING, "MMX implied by specified flags\n"); 95 arg |= AV_CPU_FLAG_MMX; 96 } 97 98 atomic_store_explicit(&cpu_flags, arg, memory_order_relaxed); 99} 100 101int av_get_cpu_flags(void) 102{ 103 int flags = atomic_load_explicit(&cpu_flags, memory_order_relaxed); 104 if (flags == -1) { 105 flags = get_cpu_flags(); 106 atomic_store_explicit(&cpu_flags, flags, memory_order_relaxed); 107 } 108 return flags; 109} 110 111int av_parse_cpu_caps(unsigned *flags, const char *s) 112{ 113 static const AVOption cpuflags_opts[] = { 114 { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, 115#if ARCH_PPC 116 { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, 117#elif ARCH_X86 118 { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, 119 { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, 120 { "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX2 }, .unit = "flags" }, 121 { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE }, .unit = "flags" }, 122 { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2 }, .unit = "flags" }, 123 { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE2SLOW }, .unit = "flags" }, 124 { "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3 }, .unit = "flags" }, 125 { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE3SLOW }, .unit = "flags" }, 126 { "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSSE3 }, .unit = "flags" }, 127 { "atom" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ATOM }, .unit = "flags" }, 128 { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE4 }, .unit = "flags" }, 129 { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE42 }, .unit = "flags" }, 130 { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX }, .unit = "flags" }, 131 { "avxslow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVXSLOW }, .unit = "flags" }, 132 { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_XOP }, .unit = "flags" }, 133 { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, 134 { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, 135 { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, 136 { "bmi1" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI1 }, .unit = "flags" }, 137 { "bmi2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_BMI2 }, .unit = "flags" }, 138 { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, 139 { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT }, .unit = "flags" }, 140 { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, 141 { "aesni", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI }, .unit = "flags" }, 142 { "avx512" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512 }, .unit = "flags" }, 143 { "avx512icl", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL }, .unit = "flags" }, 144 { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" }, 145 146#define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX 147#define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE 148#define CPU_FLAG_P4 CPU_FLAG_P3| AV_CPU_FLAG_SSE2 149 { "pentium2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P2 }, .unit = "flags" }, 150 { "pentium3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P3 }, .unit = "flags" }, 151 { "pentium4", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_P4 }, .unit = "flags" }, 152 153#define CPU_FLAG_K62 AV_CPU_FLAG_MMX | AV_CPU_FLAG_3DNOW 154#define CPU_FLAG_ATHLON CPU_FLAG_K62 | AV_CPU_FLAG_CMOV | AV_CPU_FLAG_3DNOWEXT | AV_CPU_FLAG_MMX2 155#define CPU_FLAG_ATHLONXP CPU_FLAG_ATHLON | AV_CPU_FLAG_SSE 156#define CPU_FLAG_K8 CPU_FLAG_ATHLONXP | AV_CPU_FLAG_SSE2 157 { "k6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMX }, .unit = "flags" }, 158 { "k62", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K62 }, .unit = "flags" }, 159 { "athlon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLON }, .unit = "flags" }, 160 { "athlonxp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_ATHLONXP }, .unit = "flags" }, 161 { "k8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPU_FLAG_K8 }, .unit = "flags" }, 162#elif ARCH_ARM 163 { "armv5te", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE }, .unit = "flags" }, 164 { "armv6", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6 }, .unit = "flags" }, 165 { "armv6t2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6T2 }, .unit = "flags" }, 166 { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, 167 { "vfp_vm", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP_VM }, .unit = "flags" }, 168 { "vfpv3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3 }, .unit = "flags" }, 169 { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, 170 { "setend", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SETEND }, .unit = "flags" }, 171#elif ARCH_AARCH64 172 { "armv8", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV8 }, .unit = "flags" }, 173 { "neon", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON }, .unit = "flags" }, 174 { "vfp", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP }, .unit = "flags" }, 175#elif ARCH_MIPS 176 { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, 177 { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, 178#elif ARCH_LOONGARCH 179 { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, 180 { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, 181#endif 182 { NULL }, 183 }; 184 static const AVClass class = { 185 .class_name = "cpuflags", 186 .item_name = av_default_item_name, 187 .option = cpuflags_opts, 188 .version = LIBAVUTIL_VERSION_INT, 189 }; 190 const AVClass *pclass = &class; 191 192 return av_opt_eval_flags(&pclass, &cpuflags_opts[0], s, flags); 193} 194 195int av_cpu_count(void) 196{ 197 static atomic_int printed = ATOMIC_VAR_INIT(0); 198 199 int nb_cpus = 1; 200 int count = 0; 201#if HAVE_WINRT 202 SYSTEM_INFO sysinfo; 203#endif 204#if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT) 205 cpu_set_t cpuset; 206 207 CPU_ZERO(&cpuset); 208 209 if (!sched_getaffinity(0, sizeof(cpuset), &cpuset)) 210 nb_cpus = CPU_COUNT(&cpuset); 211#elif HAVE_GETPROCESSAFFINITYMASK 212 DWORD_PTR proc_aff, sys_aff; 213 if (GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff)) 214 nb_cpus = av_popcount64(proc_aff); 215#elif HAVE_SYSCTL && defined(HW_NCPUONLINE) 216 int mib[2] = { CTL_HW, HW_NCPUONLINE }; 217 size_t len = sizeof(nb_cpus); 218 219 if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) 220 nb_cpus = 0; 221#elif HAVE_SYSCTL && defined(HW_NCPU) 222 int mib[2] = { CTL_HW, HW_NCPU }; 223 size_t len = sizeof(nb_cpus); 224 225 if (sysctl(mib, 2, &nb_cpus, &len, NULL, 0) == -1) 226 nb_cpus = 0; 227#elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN) 228 nb_cpus = sysconf(_SC_NPROC_ONLN); 229#elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN) 230 nb_cpus = sysconf(_SC_NPROCESSORS_ONLN); 231#elif HAVE_WINRT 232 GetNativeSystemInfo(&sysinfo); 233 nb_cpus = sysinfo.dwNumberOfProcessors; 234#endif 235 236 if (!atomic_exchange_explicit(&printed, 1, memory_order_relaxed)) 237 av_log(NULL, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus); 238 239 count = atomic_load_explicit(&cpu_count, memory_order_relaxed); 240 241 if (count > 0) { 242 nb_cpus = count; 243 av_log(NULL, AV_LOG_DEBUG, "overriding to %d logical cores\n", nb_cpus); 244 } 245 246 return nb_cpus; 247} 248 249void av_cpu_force_count(int count) 250{ 251 atomic_store_explicit(&cpu_count, count, memory_order_relaxed); 252} 253 254size_t av_cpu_max_align(void) 255{ 256#if ARCH_MIPS 257 return ff_get_cpu_max_align_mips(); 258#elif ARCH_AARCH64 259 return ff_get_cpu_max_align_aarch64(); 260#elif ARCH_ARM 261 return ff_get_cpu_max_align_arm(); 262#elif ARCH_PPC 263 return ff_get_cpu_max_align_ppc(); 264#elif ARCH_X86 265 return ff_get_cpu_max_align_x86(); 266#elif ARCH_LOONGARCH 267 return ff_get_cpu_max_align_loongarch(); 268#endif 269 270 return 8; 271} 272