Browse Source

Use force inlining of YUV conversion functions

Force inling of the 444->420 conversion functions because their CPU
usage goes up pretty heavily without it when compiling without
optimizations
jp9000 11 years ago
parent
commit
4be4dd735e
3 changed files with 14 additions and 5 deletions
  1. 5 5
      libobs/media-io/format-conversion.c
  2. 6 0
      libobs/util/c99defs.h
  3. 3 0
      vs/2013/libobs/libobs.vcxproj

+ 5 - 5
libobs/media-io/format-conversion.c

@@ -19,17 +19,17 @@
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
-static inline uint32_t get_m128_32_0(const __m128i val)
+static FORCE_INLINE uint32_t get_m128_32_0(const __m128i val)
 {
 	return *(uint32_t* const)&val;
 }
 
-static inline uint32_t get_m128_32_1(const __m128i val)
+static FORCE_INLINE uint32_t get_m128_32_1(const __m128i val)
 {
 	return *(((uint32_t* const)&val)+1);
 }
 
-static inline void pack_lum(uint8_t *lum_plane,
+static FORCE_INLINE void pack_lum(uint8_t *lum_plane,
 		uint32_t lum_pos0, uint32_t lum_pos1,
 		const __m128i line1, const __m128i line2,
 		const __m128i lum_mask)
@@ -43,7 +43,7 @@ static inline void pack_lum(uint8_t *lum_plane,
 	*(uint32_t*)(lum_plane+lum_pos1) = get_m128_32_1(pack_val);
 }
 
-static inline void pack_chroma_1plane(uint8_t *uv_plane,
+static FORCE_INLINE void pack_chroma_1plane(uint8_t *uv_plane,
 		uint32_t chroma_pos,
 		const __m128i line1, const __m128i line2,
 		const __m128i uv_mask)
@@ -61,7 +61,7 @@ static inline void pack_chroma_1plane(uint8_t *uv_plane,
 	*(uint32_t*)(uv_plane+chroma_pos) = get_m128_32_0(avg_val);
 }
 
-static inline void pack_chroma_2plane(uint8_t *u_plane, uint8_t *v_plane,
+static FORCE_INLINE void pack_chroma_2plane(uint8_t *u_plane, uint8_t *v_plane,
 		uint32_t chroma_pos,
 		const __m128i line1, const __m128i line2,
 		const __m128i uv_mask)

+ 6 - 0
libobs/util/c99defs.h

@@ -36,6 +36,12 @@
 #define inline __inline
 #endif
 
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else
+#define FORCE_INLINE __attribute__(always_inline)
+#endif
+
 #define EXPORT __declspec(dllexport)
 #else
 #define EXPORT

+ 3 - 0
vs/2013/libobs/libobs.vcxproj

@@ -194,6 +194,8 @@
       <ExceptionHandling>false</ExceptionHandling>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <AdditionalIncludeDirectories>../../../libobs/util/vc</AdditionalIncludeDirectories>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -215,6 +217,7 @@
       <ExceptionHandling>false</ExceptionHandling>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <AdditionalIncludeDirectories>../../../libobs/util/vc</AdditionalIncludeDirectories>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>