Login
1 branch 0 tags
Ben (Desktop/Arch) Opus decoding 1606e5a 1 month ago 33 Commits
moon / firmware / esp32 / main / ogg_opus_decoder.c
#include "ogg_opus_decoder.h"
#include "esp_log.h"
#include "esp_opus_dec.h"
#include "esp_audio_dec.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

static const char *TAG = "ogg_opus";

// Event bits for task control
#define EVT_STOP   (1 << 0)
#define EVT_PAUSE  (1 << 1)
#define EVT_RESUME (1 << 2)

#define OGG_OPUS_TASK_STACK 16384

static TaskHandle_t decode_task_handle = NULL;
static EventGroupHandle_t evt_group = NULL;

// Parameters passed to decode task
typedef struct {
    char filepath[160];
    ogg_opus_pcm_cb_t pcm_cb;
    void *ctx;
} decode_params_t;

static decode_params_t task_params;

// Read little-endian uint16 from buffer
static uint16_t rd_le16(const uint8_t *b) {
    return (uint16_t)b[0] | ((uint16_t)b[1] << 8);
}

// Read an OGG page header. Returns total body size, or -1 on error/EOF.
// segment_table must hold 255 bytes. File position is left after segment table.
static int read_ogg_page_header(FILE *f, uint8_t *header_flags,
                                uint64_t *granule_pos,
                                uint8_t *segment_table, uint8_t *num_segments) {
    uint8_t hdr[27];
    if (fread(hdr, 1, 27, f) != 27) return -1;
    if (memcmp(hdr, "OggS", 4) != 0) {
        ESP_LOGE(TAG, "Bad OGG sync");
        return -1;
    }

    *header_flags = hdr[5];
    if (granule_pos) {
        memcpy(granule_pos, &hdr[6], 8);
    }
    *num_segments = hdr[26];

    if (fread(segment_table, 1, *num_segments, f) != *num_segments) return -1;

    int body_size = 0;
    for (int i = 0; i < *num_segments; i++) {
        body_size += segment_table[i];
    }
    return body_size;
}

// Decode task: opens OGG file, parses pages, decodes Opus packets, writes PCM
static void ogg_opus_task(void *arg) {
    decode_params_t *params = (decode_params_t *)arg;
    FILE *f = NULL;
    void *dec_handle = NULL;
    uint8_t *pcm_buf = NULL;
    uint8_t *page_body = NULL;
    uint8_t *stereo_buf = NULL;
    uint8_t *seg_table = NULL;

    f = fopen(params->filepath, "rb");
    if (!f) {
        ESP_LOGE(TAG, "Failed to open: %s", params->filepath);
        goto done;
    }

    // Heap-allocate segment table to save stack space for Opus decoder
    seg_table = malloc(255);
    if (!seg_table) goto done;

    uint8_t num_seg, flags;
    uint64_t granule;

    // --- Page 0: OpusHead ---
    int body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
    if (body_size < 19 || body_size > 256) {
        ESP_LOGE(TAG, "Bad OpusHead page (size=%d)", body_size);
        goto done;
    }

    uint8_t opus_head[19]; // Only need first 19 bytes of OpusHead
    if (fread(opus_head, 1, 19, f) != 19) goto done;
    // Skip rest of OpusHead body
    if (body_size > 19) {
        if (fseek(f, body_size - 19, SEEK_CUR) != 0) goto done;
    }

    if (memcmp(opus_head, "OpusHead", 8) != 0) {
        ESP_LOGE(TAG, "Not an Opus stream");
        goto done;
    }

    uint8_t channels = opus_head[9];
    uint16_t pre_skip = rd_le16(&opus_head[10]);
    ESP_LOGI(TAG, "Opus: channels=%d, pre_skip=%d", channels, pre_skip);

    // --- Page 1: OpusTags (skip) ---
    body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
    if (body_size < 0) {
        ESP_LOGE(TAG, "Bad OpusTags page");
        goto done;
    }
    if (fseek(f, body_size, SEEK_CUR) != 0) goto done;

    // --- Open Opus decoder ---
    esp_opus_dec_cfg_t dec_cfg = {
        .sample_rate = 48000,
        .channel = channels,
        .frame_duration = ESP_OPUS_DEC_FRAME_DURATION_20_MS,
        .self_delimited = false,
    };

    esp_audio_err_t err = esp_opus_dec_open(&dec_cfg, sizeof(dec_cfg), &dec_handle);
    if (err != ESP_AUDIO_ERR_OK) {
        ESP_LOGE(TAG, "Failed to open Opus decoder: %d", err);
        goto done;
    }

    // 20ms at 48kHz stereo 16-bit = 48000*0.02*2*2 = 3840 bytes per frame
    // Use 8KB to handle larger frame sizes
    int pcm_buf_size = 8 * 1024;
    pcm_buf = malloc(pcm_buf_size);
    if (!pcm_buf) {
        ESP_LOGE(TAG, "Failed to alloc PCM buffer");
        goto done;
    }

    // Mono-to-stereo conversion buffer (I2S requires stereo)
    if (channels == 1) {
        stereo_buf = malloc(pcm_buf_size * 2);
        if (!stereo_buf) {
            ESP_LOGE(TAG, "Failed to alloc stereo buffer");
            goto done;
        }
    }

    int samples_to_skip = pre_skip;

    // Pre-allocate page body buffer. Max OGG page is 65025 bytes but Opus
    // pages are typically under 20KB. 32KB covers all practical cases.
    int page_buf_size = 32 * 1024;
    page_body = malloc(page_buf_size);
    if (!page_body) {
        ESP_LOGE(TAG, "Failed to alloc page body buffer");
        goto done;
    }

    // --- Decode audio pages ---
    while (1) {
        // Check for stop signal
        EventBits_t bits = xEventGroupGetBits(evt_group);
        if (bits & EVT_STOP) break;

        // Handle pause
        if (bits & EVT_PAUSE) {
            bits = xEventGroupWaitBits(evt_group, EVT_RESUME | EVT_STOP,
                                       pdTRUE, pdFALSE, portMAX_DELAY);
            if (bits & EVT_STOP) break;
            xEventGroupClearBits(evt_group, EVT_PAUSE);
        }

        // Read next OGG page
        body_size = read_ogg_page_header(f, &flags, &granule, seg_table, &num_seg);
        if (body_size < 0) {
            ESP_LOGI(TAG, "End of stream (no more pages)");
            break;
        }

        // Read page body
        if (body_size > 0) {
            if (body_size > page_buf_size) {
                ESP_LOGE(TAG, "OGG page too large (%d bytes)", body_size);
                break;
            }
            if ((int)fread(page_body, 1, body_size, f) != body_size) {
                break;
            }
        }

        // Extract and decode packets from page segments
        // Segments of 255 bytes continue; segment < 255 ends a packet
        int offset = 0;
        int pkt_start = 0;
        for (int i = 0; i < num_seg; i++) {
            offset += seg_table[i];

            if (seg_table[i] < 255 || i == num_seg - 1) {
                int pkt_size = offset - pkt_start;
                if (pkt_size > 0 && page_body) {
                    esp_audio_dec_in_raw_t raw = {
                        .buffer = page_body + pkt_start,
                        .len = pkt_size,
                        .consumed = 0,
                        .frame_recover = ESP_AUDIO_DEC_RECOVERY_NONE,
                    };
                    esp_audio_dec_out_frame_t frame = {
                        .buffer = pcm_buf,
                        .len = pcm_buf_size,
                        .decoded_size = 0,
                    };
                    esp_audio_dec_info_t info = {0};

                    err = esp_opus_dec_decode(dec_handle, &raw, &frame, &info);
                    if (err == ESP_AUDIO_ERR_OK && frame.decoded_size > 0) {
                        uint8_t *out_data = pcm_buf;
                        int out_size = frame.decoded_size;

                        // Handle pre-skip
                        if (samples_to_skip > 0) {
                            int bytes_per_sample = 2 * channels;
                            int skip_bytes = samples_to_skip * bytes_per_sample;
                            if (skip_bytes >= out_size) {
                                samples_to_skip -= out_size / bytes_per_sample;
                                goto next_packet;
                            }
                            out_data += skip_bytes;
                            out_size -= skip_bytes;
                            samples_to_skip = 0;
                        }

                        // Convert mono to stereo for I2S
                        if (channels == 1 && stereo_buf) {
                            int16_t *mono = (int16_t *)out_data;
                            int16_t *stereo = (int16_t *)stereo_buf;
                            int num_samples = out_size / 2;
                            for (int s = 0; s < num_samples; s++) {
                                stereo[s * 2] = mono[s];
                                stereo[s * 2 + 1] = mono[s];
                            }
                            out_data = stereo_buf;
                            out_size = num_samples * 4;
                        }

                        params->pcm_cb(out_data, out_size, params->ctx);
                    } else if (err != ESP_AUDIO_ERR_OK) {
                        ESP_LOGW(TAG, "Decode error: %d (pkt_size=%d)", err, pkt_size);
                    }
                }
            next_packet:
                pkt_start = offset;
            }
        }

        if (flags & 0x04) {
            ESP_LOGI(TAG, "End of stream (EOS flag)");
            break;
        }
    }

done:
    free(page_body);
    free(pcm_buf);
    free(stereo_buf);
    free(seg_table);
    if (dec_handle) esp_opus_dec_close(dec_handle);
    if (f) fclose(f);

    ESP_LOGI(TAG, "Decode task finished");
    decode_task_handle = NULL;
    vTaskDelete(NULL);
}

bool ogg_opus_start(const char *filepath, ogg_opus_pcm_cb_t pcm_cb, void *ctx) {
    ogg_opus_stop();

    if (!evt_group) {
        evt_group = xEventGroupCreate();
        if (!evt_group) return false;
    }
    xEventGroupClearBits(evt_group, EVT_STOP | EVT_PAUSE | EVT_RESUME);

    strncpy(task_params.filepath, filepath, sizeof(task_params.filepath) - 1);
    task_params.filepath[sizeof(task_params.filepath) - 1] = '\0';
    task_params.pcm_cb = pcm_cb;
    task_params.ctx = ctx;

    BaseType_t ret = xTaskCreate(ogg_opus_task, "ogg_opus", OGG_OPUS_TASK_STACK,
                                 &task_params, 5, &decode_task_handle);
    if (ret != pdPASS) {
        ESP_LOGE(TAG, "Failed to create decode task");
        return false;
    }

    ESP_LOGI(TAG, "Started: %s", filepath);
    return true;
}

void ogg_opus_stop(void) {
    if (!decode_task_handle || !evt_group) return;

    xEventGroupSetBits(evt_group, EVT_STOP | EVT_RESUME);
    for (int i = 0; i < 300 && decode_task_handle; i++) {
        vTaskDelay(pdMS_TO_TICKS(10));
    }
    if (decode_task_handle) {
        ESP_LOGW(TAG, "Decode task did not exit in time");
    }
}

void ogg_opus_pause(void) {
    if (evt_group) {
        xEventGroupSetBits(evt_group, EVT_PAUSE);
    }
}

void ogg_opus_resume(void) {
    if (evt_group) {
        xEventGroupSetBits(evt_group, EVT_RESUME);
    }
}