text/plain
•
10.31 KB
•
325 lines
#include "ogg_opus_decoder.h"
#include "../../src/moon.h"
#include "esp_log.h"
#include "esp_opus_dec.h"
#include "esp_audio_dec.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static const char *TAG = "ogg_opus";
// Event bits for task control
#define EVT_STOP (1 << 0)
#define EVT_PAUSE (1 << 1)
#define EVT_RESUME (1 << 2)
#define OGG_OPUS_TASK_STACK 16384
static TaskHandle_t decode_task_handle = NULL;
static EventGroupHandle_t evt_group = NULL;
// Parameters passed to decode task
typedef struct {
char filepath[STORAGE_MAX_PATH + 8];
ogg_opus_pcm_cb_t pcm_cb;
void *ctx;
} decode_params_t;
static decode_params_t task_params;
// Read little-endian uint16 from buffer
static uint16_t rd_le16(const uint8_t *b) {
return (uint16_t)b[0] | ((uint16_t)b[1] << 8);
}
// Read an OGG page header. Returns total body size, or -1 on error/EOF.
// segment_table must hold 255 bytes. File position is left after segment table.
static int read_ogg_page_header(FILE *f, uint8_t *header_flags,
uint64_t *granule_pos,
uint8_t *segment_table, uint8_t *num_segments) {
uint8_t hdr[27];
if (fread(hdr, 1, 27, f) != 27) return -1;
if (memcmp(hdr, "OggS", 4) != 0) {
ESP_LOGE(TAG, "Bad OGG sync");
return -1;
}
*header_flags = hdr[5];
if (granule_pos) {
memcpy(granule_pos, &hdr[6], 8);
}
*num_segments = hdr[26];
if (fread(segment_table, 1, *num_segments, f) != *num_segments) return -1;
int body_size = 0;
for (int i = 0; i < *num_segments; i++) {
body_size += segment_table[i];
}
return body_size;
}
// Decode task: opens OGG file, parses pages, decodes Opus packets, writes PCM
static void ogg_opus_task(void *arg) {
decode_params_t *params = (decode_params_t *)arg;
FILE *f = NULL;
void *dec_handle = NULL;
uint8_t *pcm_buf = NULL;
uint8_t *page_body = NULL;
uint8_t *stereo_buf = NULL;
uint8_t *seg_table = NULL;
f = fopen(params->filepath, "rb");
if (!f) {
ESP_LOGE(TAG, "Failed to open: %s", params->filepath);
goto done;
}
// Heap-allocate segment table to save stack space for Opus decoder
seg_table = malloc(255);
if (!seg_table) goto done;
uint8_t num_seg, flags;
uint64_t granule;
// --- Page 0: OpusHead ---
int body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
if (body_size < 19 || body_size > 256) {
ESP_LOGE(TAG, "Bad OpusHead page (size=%d)", body_size);
goto done;
}
uint8_t opus_head[19]; // Only need first 19 bytes of OpusHead
if (fread(opus_head, 1, 19, f) != 19) goto done;
// Skip rest of OpusHead body
if (body_size > 19) {
if (fseek(f, body_size - 19, SEEK_CUR) != 0) goto done;
}
if (memcmp(opus_head, "OpusHead", 8) != 0) {
ESP_LOGE(TAG, "Not an Opus stream");
goto done;
}
uint8_t channels = opus_head[9];
uint16_t pre_skip = rd_le16(&opus_head[10]);
ESP_LOGI(TAG, "Opus: channels=%d, pre_skip=%d", channels, pre_skip);
// --- Page 1: OpusTags (skip) ---
body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
if (body_size < 0) {
ESP_LOGE(TAG, "Bad OpusTags page");
goto done;
}
if (fseek(f, body_size, SEEK_CUR) != 0) goto done;
// --- Open Opus decoder ---
esp_opus_dec_cfg_t dec_cfg = {
.sample_rate = 48000,
.channel = channels,
.frame_duration = ESP_OPUS_DEC_FRAME_DURATION_20_MS,
.self_delimited = false,
};
esp_audio_err_t err = esp_opus_dec_open(&dec_cfg, sizeof(dec_cfg), &dec_handle);
if (err != ESP_AUDIO_ERR_OK) {
ESP_LOGE(TAG, "Failed to open Opus decoder: %d", err);
goto done;
}
// 20ms at 48kHz stereo 16-bit = 48000*0.02*2*2 = 3840 bytes per frame
// Use 8KB to handle larger frame sizes
int pcm_buf_size = 8 * 1024;
pcm_buf = malloc(pcm_buf_size);
if (!pcm_buf) {
ESP_LOGE(TAG, "Failed to alloc PCM buffer");
goto done;
}
// Mono-to-stereo conversion buffer (I2S requires stereo)
if (channels == 1) {
stereo_buf = malloc(pcm_buf_size * 2);
if (!stereo_buf) {
ESP_LOGE(TAG, "Failed to alloc stereo buffer");
goto done;
}
}
int samples_to_skip = pre_skip;
// Pre-allocate page body buffer. Max OGG page is 65025 bytes but Opus
// pages are typically under 20KB. 32KB covers all practical cases.
int page_buf_size = 32 * 1024;
page_body = malloc(page_buf_size);
if (!page_body) {
ESP_LOGE(TAG, "Failed to alloc page body buffer");
goto done;
}
// --- Decode audio pages ---
while (1) {
// Check for stop signal
EventBits_t bits = xEventGroupGetBits(evt_group);
if (bits & EVT_STOP) break;
// Handle pause
if (bits & EVT_PAUSE) {
bits = xEventGroupWaitBits(evt_group, EVT_RESUME | EVT_STOP,
pdTRUE, pdFALSE, portMAX_DELAY);
if (bits & EVT_STOP) break;
xEventGroupClearBits(evt_group, EVT_PAUSE);
}
// Read next OGG page
body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
if (body_size < 0) {
ESP_LOGI(TAG, "End of stream (no more pages)");
break;
}
// Read page body
if (body_size > 0) {
if (body_size > page_buf_size) {
ESP_LOGE(TAG, "OGG page too large (%d bytes)", body_size);
break;
}
if ((int)fread(page_body, 1, body_size, f) != body_size) {
break;
}
}
// Extract and decode packets from page segments
// Segments of 255 bytes continue; segment < 255 ends a packet
int offset = 0;
int pkt_start = 0;
for (int i = 0; i < num_seg; i++) {
offset += seg_table[i];
if (seg_table[i] < 255 || i == num_seg - 1) {
int pkt_size = offset - pkt_start;
if (pkt_size > 0 && page_body) {
esp_audio_dec_in_raw_t raw = {
.buffer = page_body + pkt_start,
.len = pkt_size,
.consumed = 0,
.frame_recover = ESP_AUDIO_DEC_RECOVERY_NONE,
};
esp_audio_dec_out_frame_t frame = {
.buffer = pcm_buf,
.len = pcm_buf_size,
.decoded_size = 0,
};
esp_audio_dec_info_t info = {0};
err = esp_opus_dec_decode(dec_handle, &raw, &frame, &info);
if (err == ESP_AUDIO_ERR_OK && frame.decoded_size > 0) {
uint8_t *out_data = pcm_buf;
int out_size = frame.decoded_size;
// Handle pre-skip
if (samples_to_skip > 0) {
int bytes_per_sample = 2 * channels;
int skip_bytes = samples_to_skip * bytes_per_sample;
if (skip_bytes >= out_size) {
samples_to_skip -= out_size / bytes_per_sample;
goto next_packet;
}
out_data += skip_bytes;
out_size -= skip_bytes;
samples_to_skip = 0;
}
// Convert mono to stereo for I2S
if (channels == 1 && stereo_buf) {
int16_t *mono = (int16_t *)out_data;
int16_t *stereo = (int16_t *)stereo_buf;
int num_samples = out_size / 2;
for (int s = 0; s < num_samples; s++) {
stereo[s * 2] = mono[s];
stereo[s * 2 + 1] = mono[s];
}
out_data = stereo_buf;
out_size = num_samples * 4;
}
params->pcm_cb(out_data, out_size, params->ctx);
} else if (err != ESP_AUDIO_ERR_OK) {
ESP_LOGW(TAG, "Decode error: %d (pkt_size=%d)", err, pkt_size);
}
}
next_packet:
pkt_start = offset;
}
}
if (flags & 0x04) {
ESP_LOGI(TAG, "End of stream (EOS flag)");
break;
}
}
done:
free(page_body);
free(pcm_buf);
free(stereo_buf);
free(seg_table);
if (dec_handle) esp_opus_dec_close(dec_handle);
if (f) fclose(f);
ESP_LOGI(TAG, "Decode task finished");
decode_task_handle = NULL;
vTaskDelete(NULL);
}
bool ogg_opus_start(const char *filepath, ogg_opus_pcm_cb_t pcm_cb, void *ctx) {
ogg_opus_stop();
if (!evt_group) {
evt_group = xEventGroupCreate();
if (!evt_group) return false;
}
xEventGroupClearBits(evt_group, EVT_STOP | EVT_PAUSE | EVT_RESUME);
strncpy(task_params.filepath, filepath, sizeof(task_params.filepath) - 1);
task_params.filepath[sizeof(task_params.filepath) - 1] = '\0';
task_params.pcm_cb = pcm_cb;
task_params.ctx = ctx;
BaseType_t ret = xTaskCreatePinnedToCore(ogg_opus_task, "ogg_opus", OGG_OPUS_TASK_STACK,
&task_params, 15, &decode_task_handle, 1);
if (ret != pdPASS) {
ESP_LOGE(TAG, "Failed to create decode task");
return false;
}
ESP_LOGI(TAG, "Started: %s", filepath);
return true;
}
void ogg_opus_stop(void) {
if (!decode_task_handle || !evt_group) return;
xEventGroupSetBits(evt_group, EVT_STOP | EVT_RESUME);
for (int i = 0; i < 300 && decode_task_handle; i++) {
vTaskDelay(pdMS_TO_TICKS(10));
}
if (decode_task_handle) {
ESP_LOGW(TAG, "Decode task did not exit in time");
}
}
void ogg_opus_pause(void) {
if (evt_group) {
xEventGroupSetBits(evt_group, EVT_PAUSE);
}
}
void ogg_opus_resume(void) {
if (evt_group) {
xEventGroupSetBits(evt_group, EVT_RESUME);
}
}