Login
1 branch 0 tags
Ben (Desktop/Arch) Added clang-format 2f88780 1 month ago 66 Commits
moon / esp32 / main / ogg_opus_decoder.c
#include "ogg_opus_decoder.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../src/moon.h"
#include "esp_audio_dec.h"
#include "esp_log.h"
#include "esp_opus_dec.h"
#include "freertos/FreeRTOS.h"
#include "freertos/event_groups.h"
#include "freertos/task.h"

static const char* TAG = "ogg_opus";

// Event bits for task control
#define EVT_STOP (1 << 0)
#define EVT_PAUSE (1 << 1)
#define EVT_RESUME (1 << 2)

#define OGG_OPUS_TASK_STACK 16384

static TaskHandle_t decode_task_handle = NULL;
static EventGroupHandle_t evt_group = NULL;

// Parameters passed to decode task
typedef struct {
	char filepath[STORAGE_MAX_PATH + 8];
	ogg_opus_pcm_cb_t pcm_cb;
	void* ctx;
} decode_params_t;

static decode_params_t task_params;

// Read little-endian uint16 from buffer
static uint16_t rd_le16(const uint8_t* b) {
	return (uint16_t)b[0] | ((uint16_t)b[1] << 8);
}

// Read an OGG page header. Returns total body size, or -1 on error/EOF.
// segment_table must hold 255 bytes. File position is left after segment table.
static int read_ogg_page_header(FILE* f,
                                uint8_t* header_flags,
                                uint64_t* granule_pos,
                                uint8_t* segment_table,
                                uint8_t* num_segments) {
	uint8_t hdr[27];
	if (fread(hdr, 1, 27, f) != 27) {
		return -1;
	}
	if (memcmp(hdr, "OggS", 4) != 0) {
		ESP_LOGE(TAG, "Bad OGG sync");
		return -1;
	}

	*header_flags = hdr[5];
	if (granule_pos) {
		memcpy(granule_pos, &hdr[6], 8);
	}
	*num_segments = hdr[26];

	if (fread(segment_table, 1, *num_segments, f) != *num_segments) {
		return -1;
	}

	int body_size = 0;
	for (int i = 0; i < *num_segments; i++) {
		body_size += segment_table[i];
	}
	return body_size;
}

// Decode task: opens OGG file, parses pages, decodes Opus packets, writes PCM
static void ogg_opus_task(void* arg) {
	decode_params_t* params = (decode_params_t*)arg;
	FILE* f = NULL;
	void* dec_handle = NULL;
	uint8_t* pcm_buf = NULL;
	uint8_t* page_body = NULL;
	uint8_t* stereo_buf = NULL;
	uint8_t* seg_table = NULL;

	f = fopen(params->filepath, "rb");
	if (!f) {
		ESP_LOGE(TAG, "Failed to open: %s", params->filepath);
		goto done;
	}

	// Heap-allocate segment table to save stack space for Opus decoder
	seg_table = malloc(255);
	if (!seg_table) {
		goto done;
	}

	uint8_t num_seg, flags;
	uint64_t granule;

	// --- Page 0: OpusHead ---
	int body_size =
	    read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
	if (body_size < 19 || body_size > 256) {
		ESP_LOGE(TAG, "Bad OpusHead page (size=%d)", body_size);
		goto done;
	}

	uint8_t opus_head[19];  // Only need first 19 bytes of OpusHead
	if (fread(opus_head, 1, 19, f) != 19) {
		goto done;
	}
	// Skip rest of OpusHead body
	if (body_size > 19) {
		if (fseek(f, body_size - 19, SEEK_CUR) != 0) {
			goto done;
		}
	}

	if (memcmp(opus_head, "OpusHead", 8) != 0) {
		ESP_LOGE(TAG, "Not an Opus stream");
		goto done;
	}

	uint8_t channels = opus_head[9];
	uint16_t pre_skip = rd_le16(&opus_head[10]);
	ESP_LOGI(TAG, "Opus: channels=%d, pre_skip=%d", channels, pre_skip);

	// --- Page 1: OpusTags (skip) ---
	body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
	if (body_size < 0) {
		ESP_LOGE(TAG, "Bad OpusTags page");
		goto done;
	}
	if (fseek(f, body_size, SEEK_CUR) != 0) {
		goto done;
	}

	// --- Open Opus decoder ---
	esp_opus_dec_cfg_t dec_cfg = {
	    .sample_rate = 48000,
	    .channel = channels,
	    .frame_duration = ESP_OPUS_DEC_FRAME_DURATION_20_MS,
	    .self_delimited = false,
	};

	esp_audio_err_t err =
	    esp_opus_dec_open(&dec_cfg, sizeof(dec_cfg), &dec_handle);
	if (err != ESP_AUDIO_ERR_OK) {
		ESP_LOGE(TAG, "Failed to open Opus decoder: %d", err);
		goto done;
	}

	// 20ms at 48kHz stereo 16-bit = 48000*0.02*2*2 = 3840 bytes per frame
	// Use 8KB to handle larger frame sizes
	int pcm_buf_size = 8 * 1024;
	pcm_buf = malloc(pcm_buf_size);
	if (!pcm_buf) {
		ESP_LOGE(TAG, "Failed to alloc PCM buffer");
		goto done;
	}

	// Mono-to-stereo conversion buffer (I2S requires stereo)
	if (channels == 1) {
		stereo_buf = malloc(pcm_buf_size * 2);
		if (!stereo_buf) {
			ESP_LOGE(TAG, "Failed to alloc stereo buffer");
			goto done;
		}
	}

	int samples_to_skip = pre_skip;

	// Pre-allocate page body buffer. Max OGG page is 65025 bytes but Opus
	// pages are typically under 20KB. 32KB covers all practical cases.
	int page_buf_size = 32 * 1024;
	page_body = malloc(page_buf_size);
	if (!page_body) {
		ESP_LOGE(TAG, "Failed to alloc page body buffer");
		goto done;
	}

	// --- Decode audio pages ---
	while (1) {
		// Check for stop signal
		EventBits_t bits = xEventGroupGetBits(evt_group);
		if (bits & EVT_STOP) {
			break;
		}

		// Handle pause
		if (bits & EVT_PAUSE) {
			bits = xEventGroupWaitBits(evt_group, EVT_RESUME | EVT_STOP, pdTRUE,
			                           pdFALSE, portMAX_DELAY);
			if (bits & EVT_STOP) {
				break;
			}
			xEventGroupClearBits(evt_group, EVT_PAUSE);
		}

		// Read next OGG page
		body_size =
		    read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
		if (body_size < 0) {
			ESP_LOGI(TAG, "End of stream (no more pages)");
			break;
		}

		// Read page body
		if (body_size > 0) {
			if (body_size > page_buf_size) {
				ESP_LOGE(TAG, "OGG page too large (%d bytes)", body_size);
				break;
			}
			if ((int)fread(page_body, 1, body_size, f) != body_size) {
				break;
			}
		}

		// Extract and decode packets from page segments
		// Segments of 255 bytes continue; segment < 255 ends a packet
		int offset = 0;
		int pkt_start = 0;
		for (int i = 0; i < num_seg; i++) {
			offset += seg_table[i];

			if (seg_table[i] < 255 || i == num_seg - 1) {
				int pkt_size = offset - pkt_start;
				if (pkt_size > 0 && page_body) {
					esp_audio_dec_in_raw_t raw = {
					    .buffer = page_body + pkt_start,
					    .len = pkt_size,
					    .consumed = 0,
					    .frame_recover = ESP_AUDIO_DEC_RECOVERY_NONE,
					};
					esp_audio_dec_out_frame_t frame = {
					    .buffer = pcm_buf,
					    .len = pcm_buf_size,
					    .decoded_size = 0,
					};
					esp_audio_dec_info_t info = {0};

					err = esp_opus_dec_decode(dec_handle, &raw, &frame, &info);
					if (err == ESP_AUDIO_ERR_OK && frame.decoded_size > 0) {
						uint8_t* out_data = pcm_buf;
						int out_size = frame.decoded_size;

						// Handle pre-skip
						if (samples_to_skip > 0) {
							int bytes_per_sample = 2 * channels;
							int skip_bytes = samples_to_skip * bytes_per_sample;
							if (skip_bytes >= out_size) {
								samples_to_skip -= out_size / bytes_per_sample;
								goto next_packet;
							}
							out_data += skip_bytes;
							out_size -= skip_bytes;
							samples_to_skip = 0;
						}

						// Convert mono to stereo for I2S
						if (channels == 1 && stereo_buf) {
							int16_t* mono = (int16_t*)out_data;
							int16_t* stereo = (int16_t*)stereo_buf;
							int num_samples = out_size / 2;
							for (int s = 0; s < num_samples; s++) {
								stereo[s * 2] = mono[s];
								stereo[s * 2 + 1] = mono[s];
							}
							out_data = stereo_buf;
							out_size = num_samples * 4;
						}

						params->pcm_cb(out_data, out_size, params->ctx);
					} else if (err != ESP_AUDIO_ERR_OK) {
						ESP_LOGW(TAG, "Decode error: %d (pkt_size=%d)", err,
						         pkt_size);
					}
				}
			next_packet:
				pkt_start = offset;
			}
		}

		if (flags & 0x04) {
			ESP_LOGI(TAG, "End of stream (EOS flag)");
			break;
		}
	}

done:
	free(page_body);
	free(pcm_buf);
	free(stereo_buf);
	free(seg_table);
	if (dec_handle) {
		esp_opus_dec_close(dec_handle);
	}
	if (f) {
		fclose(f);
	}

	ESP_LOGI(TAG, "Decode task finished");
	decode_task_handle = NULL;
	vTaskDelete(NULL);
}

bool ogg_opus_start(const char* filepath, ogg_opus_pcm_cb_t pcm_cb, void* ctx) {
	ogg_opus_stop();

	if (!evt_group) {
		evt_group = xEventGroupCreate();
		if (!evt_group) {
			return false;
		}
	}
	xEventGroupClearBits(evt_group, EVT_STOP | EVT_PAUSE | EVT_RESUME);

	strncpy(task_params.filepath, filepath, sizeof(task_params.filepath) - 1);
	task_params.filepath[sizeof(task_params.filepath) - 1] = '\0';
	task_params.pcm_cb = pcm_cb;
	task_params.ctx = ctx;

	BaseType_t ret =
	    xTaskCreatePinnedToCore(ogg_opus_task, "ogg_opus", OGG_OPUS_TASK_STACK,
	                            &task_params, 15, &decode_task_handle, 1);
	if (ret != pdPASS) {
		ESP_LOGE(TAG, "Failed to create decode task");
		return false;
	}

	ESP_LOGI(TAG, "Started: %s", filepath);
	return true;
}

void ogg_opus_stop(void) {
	if (!decode_task_handle || !evt_group) {
		return;
	}

	xEventGroupSetBits(evt_group, EVT_STOP | EVT_RESUME);
	for (int i = 0; i < 300 && decode_task_handle; i++) {
		vTaskDelay(pdMS_TO_TICKS(10));
	}
	if (decode_task_handle) {
		ESP_LOGW(TAG, "Decode task did not exit in time");
	}
}

void ogg_opus_pause(void) {
	if (evt_group) {
		xEventGroupSetBits(evt_group, EVT_PAUSE);
	}
}

void ogg_opus_resume(void) {
	if (evt_group) {
		xEventGroupSetBits(evt_group, EVT_RESUME);
	}
}