From 509554ba1dc80a12bb27192255d99a1d9df6fcb9 Mon Sep 17 00:00:00 2001 From: Harshit-Dhanwalkar Date: Sun, 24 May 2026 18:46:17 +0530 Subject: [PATCH] Implement SIMD for YUYV to grayscale conversion --- C/Makefile | 3 ++- C/src/capture.c | 29 +++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/C/Makefile b/C/Makefile index b1eb68f..f0bb4d3 100644 --- a/C/Makefile +++ b/C/Makefile @@ -1,7 +1,8 @@ CC = gcc CFLAGS = -Wall -Wextra -O2 -Iinclude LDFLAGS = -lm -LDFLAGS += -lpthread +LDFLAGS += -lpthread # Multi-threaded capture-render, pthreads producer/consumer +LDFLAGS += -msse4.1 # SIMD - SSE2 for the YUYV to gray conversion SRCDIR = src INCDIR = include diff --git a/C/src/capture.c b/C/src/capture.c index dcbc479..b8e1109 100644 --- a/C/src/capture.c +++ b/C/src/capture.c @@ -10,6 +10,7 @@ #include #include #include +#include int webcam_init(webcam_t *cam, const char *device, int width, int height) { // Open device (non‑blocking for select usage) @@ -97,16 +98,36 @@ int webcam_wait_frame(webcam_t *cam, int timeout_ms) { return 0; } +void yuyv_to_gray_simd(const uint8_t *yuyv, uint8_t *gray, int n) { + // Mask to extract every even byte (Y samples) + __m128i mask = _mm_set1_epi16(0x00FF); + int i = 0; + for (; i + 16 <= n; i += 16) { + // Load 32 bytes of YUYV (= 16 pixels) + __m128i lo = _mm_loadu_si128((__m128i *)(yuyv + i*2)); + __m128i hi = _mm_loadu_si128((__m128i *)(yuyv + i*2 + 16)); + // low byte of each 16-bit word (the Y sample) + lo = _mm_and_si128(lo, mask); + hi = _mm_and_si128(hi, mask); + // Pack 16-bit + __m128i result = _mm_packus_epi16(lo, hi); + _mm_storeu_si128((__m128i *)(gray + i), result); + } + for (; i < n; i++) gray[i] = yuyv[i * 2]; // scalar tail +} + int webcam_capture_frame(webcam_t *cam, uint8_t *gray_buffer) { // Dequeue buffer struct v4l2_buffer buf = cam->buf_info; if (ioctl(cam->fd, VIDIOC_DQBUF, &buf) < 0) return -1; // Convert YUYV -> grayscale (Y component) - uint8_t *yuyv = (uint8_t *)cam->buffer; - for (int i = 0, j = 0; i < cam->width * cam->height * 2; i += 2, j++) { - gray_buffer[j] = yuyv[i]; - } + // uint8_t *yuyv = (uint8_t *)cam->buffer; + // for (int i = 0, j = 0; i < cam->width * cam->height * 2; i += 2, j++) { + // gray_buffer[j] = yuyv[i]; + // } + int total_pixels = cam->width * cam->height; + yuyv_to_gray_simd((uint8_t *)cam->buffer, gray_buffer, total_pixels); // Store updated buffer info for requeue cam->buf_info = buf;