浏览代码

libobs/util: Various atomic improvements

Add exchange functions to alias the poorly named set functions.

Add store without reading previous. Faster on non-x86 processors.

Add compare-exchange that updates previous to avoid redundant fetch.

On Windows, load bool without conversion from char.

On Windows, load using mov with compiler barrier. Still seq_cst.

On POSIX, use GCC __atomic builtins.
jpark37 4 年之前
父节点
当前提交
1f90f0e36b
共有 2 个文件被更改,包括 103 次插入9 次删除
  1. 33 5
      libobs/util/threading-posix.h
  2. 70 4
      libobs/util/threading-windows.h

+ 33 - 5
libobs/util/threading-posix.h

@@ -18,17 +18,27 @@
 
 static inline long os_atomic_inc_long(volatile long *val)
 {
-	return __sync_add_and_fetch(val, 1);
+	return __atomic_add_fetch(val, 1, __ATOMIC_SEQ_CST);
 }
 
 static inline long os_atomic_dec_long(volatile long *val)
 {
-	return __sync_sub_and_fetch(val, 1);
+	return __atomic_sub_fetch(val, 1, __ATOMIC_SEQ_CST);
+}
+
+static inline void os_atomic_store_long(volatile long *ptr, long val)
+{
+	__atomic_store_n(ptr, val, __ATOMIC_SEQ_CST);
 }
 
 static inline long os_atomic_set_long(volatile long *ptr, long val)
 {
-	return __sync_lock_test_and_set(ptr, val);
+	return __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST);
+}
+
+static inline long os_atomic_exchange_long(volatile long *ptr, long val)
+{
+	return os_atomic_set_long(ptr, val);
 }
 
 static inline long os_atomic_load_long(const volatile long *ptr)
@@ -39,12 +49,30 @@ static inline long os_atomic_load_long(const volatile long *ptr)
 static inline bool os_atomic_compare_swap_long(volatile long *val, long old_val,
 					       long new_val)
 {
-	return __sync_bool_compare_and_swap(val, old_val, new_val);
+	return __atomic_compare_exchange_n(val, &old_val, new_val, false,
+					   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+static inline bool os_atomic_compare_exchange_long(volatile long *val,
+						   long *old_val, long new_val)
+{
+	return __atomic_compare_exchange_n(val, old_val, new_val, false,
+					   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+
+static inline void os_atomic_store_bool(volatile bool *ptr, bool val)
+{
+	__atomic_store_n(ptr, val, __ATOMIC_SEQ_CST);
 }
 
 static inline bool os_atomic_set_bool(volatile bool *ptr, bool val)
 {
-	return __sync_lock_test_and_set(ptr, val);
+	return __atomic_exchange_n(ptr, val, __ATOMIC_SEQ_CST);
+}
+
+static inline bool os_atomic_exchange_bool(volatile bool *ptr, bool val)
+{
+	return os_atomic_set_bool(ptr, val);
 }
 
 static inline bool os_atomic_load_bool(const volatile bool *ptr)

+ 70 - 4
libobs/util/threading-windows.h

@@ -17,6 +17,12 @@
 #pragma once
 
 #include <intrin.h>
+#include <string.h>
+
+#if !defined(_M_IX86) && !defined(_M_X64) && !defined(_M_ARM) && \
+	!defined(_M_ARM64)
+#error Processor not supported
+#endif
 
 static inline long os_atomic_inc_long(volatile long *val)
 {
@@ -28,14 +34,36 @@ static inline long os_atomic_dec_long(volatile long *val)
 	return _InterlockedDecrement(val);
 }
 
+static inline void os_atomic_store_long(volatile long *ptr, long val)
+{
+#if defined(_M_ARM) || defined(_M_ARM64)
+	__dmb(_ARM64_BARRIER_ISH);
+	__iso_volatile_store32((volatile __int32 *)ptr, val);
+	__dmb(_ARM64_BARRIER_ISH);
+#else
+	_InterlockedExchange(ptr, val);
+#endif
+}
+
 static inline long os_atomic_set_long(volatile long *ptr, long val)
 {
-	return (long)_InterlockedExchange((volatile long *)ptr, (long)val);
+	return _InterlockedExchange(ptr, val);
+}
+
+static inline long os_atomic_exchange_long(volatile long *ptr, long val)
+{
+	return os_atomic_set_long(ptr, val);
 }
 
 static inline long os_atomic_load_long(const volatile long *ptr)
 {
-	return (long)_InterlockedOr((volatile long *)ptr, 0);
+	const long val = __iso_volatile_load32((const volatile __int32 *)ptr);
+#if defined(_M_ARM) || defined(_M_ARM64)
+	__dmb(_ARM64_BARRIER_ISH);
+#else
+	_ReadWriteBarrier();
+#endif
+	return val;
 }
 
 static inline bool os_atomic_compare_swap_long(volatile long *val, long old_val,
@@ -44,12 +72,50 @@ static inline bool os_atomic_compare_swap_long(volatile long *val, long old_val,
 	return _InterlockedCompareExchange(val, new_val, old_val) == old_val;
 }
 
+static inline bool os_atomic_compare_exchange_long(volatile long *val,
+						   long *old_ptr, long new_val)
+{
+	const long old_val = *old_ptr;
+	const long previous =
+		_InterlockedCompareExchange(val, new_val, old_val);
+	*old_ptr = previous;
+	return previous == old_val;
+}
+
+static inline void os_atomic_store_bool(volatile bool *ptr, bool val)
+{
+#if defined(_M_ARM) || defined(_M_ARM64)
+	__dmb(_ARM64_BARRIER_ISH);
+	__iso_volatile_store8((volatile char *)ptr, val);
+	__dmb(_ARM64_BARRIER_ISH);
+#else
+	_InterlockedExchange8((volatile char *)ptr, (char)val);
+#endif
+}
+
 static inline bool os_atomic_set_bool(volatile bool *ptr, bool val)
 {
-	return !!_InterlockedExchange8((volatile char *)ptr, (char)val);
+	const char c = _InterlockedExchange8((volatile char *)ptr, (char)val);
+	bool b;
+
+	/* Avoid unnecesary char to bool conversion. Value known 0 or 1. */
+	memcpy(&b, &c, sizeof(b));
+
+	return b;
+}
+
+static inline bool os_atomic_exchange_bool(volatile bool *ptr, bool val)
+{
+	return os_atomic_set_bool(ptr, val);
 }
 
 static inline bool os_atomic_load_bool(const volatile bool *ptr)
 {
-	return !!_InterlockedOr8((volatile char *)ptr, 0);
+	const char val = __iso_volatile_load8((const volatile char *)ptr);
+#if defined(_M_ARM) || defined(_M_ARM64)
+	__dmb(_ARM64_BARRIER_ISH);
+#else
+	_ReadWriteBarrier();
+#endif
+	return val;
 }