Login
1 branch 0 tags
Ben (T14/NixOS) Fonts 3c0e15e 1 month ago 23 Commits
moon / firmware / src / audio_metadata.c
#include "audio_metadata.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

// Helper to read big-endian uint32
static uint32_t read_be32(const uint8_t *buf) {
    return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) |
           ((uint32_t)buf[2] << 8) | buf[3];
}


// Helper to get file extension (lowercase)
static const char *get_extension(const char *path) {
    const char *dot = strrchr(path, '.');
    return dot ? dot + 1 : "";
}

// Helper to extract filename without extension for fallback title
static void extract_filename(const char *path, char *buf, size_t buf_size) {
    const char *name = strrchr(path, '/');
    name = name ? name + 1 : path;

    const char *dot = strrchr(name, '.');
    size_t len = dot ? (size_t)(dot - name) : strlen(name);
    if (len >= buf_size) len = buf_size - 1;

    memcpy(buf, name, len);
    buf[len] = '\0';
}

// Trim trailing whitespace (ID3v1 uses space padding)
static void trim_trailing(char *str) {
    size_t len = strlen(str);
    while (len > 0 && (str[len-1] == ' ' || str[len-1] == '\0')) {
        str[--len] = '\0';
    }
}

// ============================================================================
// MP3 Parser
// ============================================================================

// MP3 bitrate table [version][layer][index]
// Version: 0=MPEG2.5, 1=reserved, 2=MPEG2, 3=MPEG1
// Layer: 0=reserved, 1=Layer3, 2=Layer2, 3=Layer1
static const uint16_t mp3_bitrates[4][4][16] = {
    // MPEG 2.5
    {
        {0},  // reserved
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0},  // Layer 3
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0},  // Layer 2
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0},  // Layer 1
    },
    // Reserved
    {{0}, {0}, {0}, {0}},
    // MPEG 2
    {
        {0},  // reserved
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0},  // Layer 3
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0},  // Layer 2
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0},  // Layer 1
    },
    // MPEG 1
    {
        {0},  // reserved
        {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0},  // Layer 3
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0},  // Layer 2
        {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0},  // Layer 1
    },
};

// MP3 sample rate table [version][index]
static const uint16_t mp3_sample_rates[4][4] = {
    {11025, 12000, 8000, 0},   // MPEG 2.5
    {0, 0, 0, 0},              // Reserved
    {22050, 24000, 16000, 0},  // MPEG 2
    {44100, 48000, 32000, 0},  // MPEG 1
};

// Samples per frame [version][layer]
static const uint16_t mp3_samples_per_frame[4][4] = {
    {0, 576, 1152, 384},   // MPEG 2.5
    {0, 0, 0, 0},          // Reserved
    {0, 576, 1152, 384},   // MPEG 2
    {0, 1152, 1152, 384},  // MPEG 1
};

// Parse ID3v2 header and return its total size (including header)
static uint32_t parse_id3v2_size(FILE *f) {
    uint8_t header[10];

    if (fread(header, 1, 10, f) != 10) {
        fseek(f, 0, SEEK_SET);
        return 0;
    }

    // Check for "ID3" magic
    if (header[0] != 'I' || header[1] != 'D' || header[2] != '3') {
        fseek(f, 0, SEEK_SET);
        return 0;
    }

    // ID3v2 size is syncsafe integer (7 bits per byte)
    uint32_t size = ((uint32_t)(header[6] & 0x7F) << 21) |
                    ((uint32_t)(header[7] & 0x7F) << 14) |
                    ((uint32_t)(header[8] & 0x7F) << 7) |
                    (header[9] & 0x7F);

    return size + 10;  // Add header size
}

// Parse ID3v2 text frame
static void parse_id3v2_text_frame(FILE *f, uint32_t size, char *dest, size_t dest_size) {
    if (size == 0 || size > 1024) return;

    uint8_t *buf = malloc(size);
    if (!buf) return;

    if (fread(buf, 1, size, f) == size) {
        // First byte is encoding: 0=ISO-8859-1, 1=UTF-16, 2=UTF-16BE, 3=UTF-8
        uint8_t encoding = buf[0];
        const char *text = (const char *)&buf[1];
        size_t text_len = size - 1;

        if (encoding == 0 || encoding == 3) {
            // ISO-8859-1 or UTF-8 - copy directly
            if (text_len >= dest_size) text_len = dest_size - 1;
            memcpy(dest, text, text_len);
            dest[text_len] = '\0';
        } else if (encoding == 1 && text_len >= 2) {
            // UTF-16 with BOM - simple ASCII extraction
            const uint8_t *utf16 = (const uint8_t *)text;
            int little_endian = (utf16[0] == 0xFF && utf16[1] == 0xFE);
            utf16 += 2;
            text_len -= 2;

            size_t j = 0;
            for (size_t i = 0; i + 1 < text_len && j < dest_size - 1; i += 2) {
                uint16_t ch = little_endian ? (utf16[i] | (utf16[i+1] << 8))
                                            : ((utf16[i] << 8) | utf16[i+1]);
                if (ch > 0 && ch < 128) {
                    dest[j++] = (char)ch;
                }
            }
            dest[j] = '\0';
        }
        trim_trailing(dest);
    }

    free(buf);
}

// Parse ID3v2 tags
static void parse_id3v2_tags(FILE *f, uint32_t tag_size, audio_metadata_t *meta) {
    long start_pos = ftell(f);
    long end_pos = start_pos + tag_size - 10;  // Subtract header we already read

    // Skip to after header (we're already there from size parsing)

    while (ftell(f) < end_pos - 10) {
        uint8_t frame_header[10];
        if (fread(frame_header, 1, 10, f) != 10) break;

        // Frame ID is 4 chars
        char frame_id[5] = {frame_header[0], frame_header[1], frame_header[2], frame_header[3], 0};

        // Frame size (ID3v2.4 uses syncsafe, v2.3 uses regular - try both)
        uint32_t frame_size = read_be32(&frame_header[4]);

        // Check for padding (all zeros)
        if (frame_id[0] == 0) break;

        // Sanity check
        if (frame_size > 10000000) break;

        if (strcmp(frame_id, "TIT2") == 0) {
            parse_id3v2_text_frame(f, frame_size, meta->title, sizeof(meta->title));
        } else if (strcmp(frame_id, "TPE1") == 0) {
            parse_id3v2_text_frame(f, frame_size, meta->artist, sizeof(meta->artist));
        } else if (strcmp(frame_id, "TALB") == 0) {
            parse_id3v2_text_frame(f, frame_size, meta->album, sizeof(meta->album));
        } else {
            // Skip unknown frame
            fseek(f, frame_size, SEEK_CUR);
        }
    }
}

// Parse ID3v1 tags (last 128 bytes of file)
static void parse_id3v1_tags(FILE *f, audio_metadata_t *meta) {
    long pos = ftell(f);

    fseek(f, -128, SEEK_END);

    uint8_t tag[128];
    if (fread(tag, 1, 128, f) != 128) {
        fseek(f, pos, SEEK_SET);
        return;
    }

    // Check for "TAG" magic
    if (tag[0] != 'T' || tag[1] != 'A' || tag[2] != 'G') {
        fseek(f, pos, SEEK_SET);
        return;
    }

    // Only use ID3v1 if ID3v2 didn't provide the info
    if (meta->title[0] == '\0') {
        memcpy(meta->title, &tag[3], 30);
        meta->title[30] = '\0';
        trim_trailing(meta->title);
    }
    if (meta->artist[0] == '\0') {
        memcpy(meta->artist, &tag[33], 30);
        meta->artist[30] = '\0';
        trim_trailing(meta->artist);
    }
    if (meta->album[0] == '\0') {
        memcpy(meta->album, &tag[63], 30);
        meta->album[30] = '\0';
        trim_trailing(meta->album);
    }

    fseek(f, pos, SEEK_SET);
}

// Find and parse MP3 frame header, return true if valid
static bool parse_mp3_frame_header(const uint8_t *buf, uint8_t *version, uint8_t *layer,
                                   uint16_t *bitrate, uint16_t *sample_rate, uint8_t *channels) {
    // Check sync word (11 bits set)
    if (buf[0] != 0xFF || (buf[1] & 0xE0) != 0xE0) {
        return false;
    }

    *version = (buf[1] >> 3) & 0x03;  // 0=2.5, 1=reserved, 2=MPEG2, 3=MPEG1
    *layer = (buf[1] >> 1) & 0x03;    // 0=reserved, 1=L3, 2=L2, 3=L1

    if (*version == 1 || *layer == 0) return false;  // Reserved

    uint8_t bitrate_idx = (buf[2] >> 4) & 0x0F;
    uint8_t srate_idx = (buf[2] >> 2) & 0x03;
    uint8_t channel_mode = (buf[3] >> 6) & 0x03;

    *bitrate = mp3_bitrates[*version][*layer][bitrate_idx];
    *sample_rate = mp3_sample_rates[*version][srate_idx];
    *channels = (channel_mode == 3) ? 1 : 2;  // 3 = mono

    return *bitrate > 0 && *sample_rate > 0;
}

// Parse Xing/VBRI header for VBR info
static bool parse_xing_header(FILE *f, long frame_start, uint32_t *total_frames) {
    // Xing header offset depends on version and channel mode
    // For MPEG1: 32 bytes for stereo, 17 for mono (after frame header)
    // For MPEG2/2.5: 17 bytes for stereo, 9 for mono

    uint8_t buf[120];
    fseek(f, frame_start, SEEK_SET);
    if (fread(buf, 1, 120, f) != 120) return false;

    // Search for "Xing" or "Info" tag
    for (int offset = 4; offset < 40; offset++) {
        if ((buf[offset] == 'X' && buf[offset+1] == 'i' && buf[offset+2] == 'n' && buf[offset+3] == 'g') ||
            (buf[offset] == 'I' && buf[offset+1] == 'n' && buf[offset+2] == 'f' && buf[offset+3] == 'o')) {

            uint32_t flags = read_be32(&buf[offset + 4]);
            if (flags & 0x01) {  // Frames field present
                *total_frames = read_be32(&buf[offset + 8]);
                return true;
            }
        }
    }

    // Try VBRI header (always at offset 32 after frame sync)
    if (buf[36] == 'V' && buf[37] == 'B' && buf[38] == 'R' && buf[39] == 'I') {
        *total_frames = read_be32(&buf[50]);
        return true;
    }

    return false;
}

static bool parse_mp3(const char *path, audio_metadata_t *meta) {
    FILE *f = fopen(path, "rb");
    if (!f) return false;

    // Get file size
    fseek(f, 0, SEEK_END);
    long file_size = ftell(f);
    fseek(f, 0, SEEK_SET);

    // Parse ID3v2 if present
    uint32_t id3v2_size = parse_id3v2_size(f);
    if (id3v2_size > 0) {
        fseek(f, 10, SEEK_SET);  // Skip header, parse tags
        parse_id3v2_tags(f, id3v2_size, meta);
        fseek(f, id3v2_size, SEEK_SET);  // Skip to audio data
    }

    // Parse ID3v1 at end of file
    parse_id3v1_tags(f, meta);

    // Find first valid MP3 frame
    long audio_start = ftell(f);
    uint8_t buf[4];
    uint8_t version, layer, channels;
    uint16_t bitrate, sample_rate;
    bool found_frame = false;

    // Search for sync in first 64KB
    for (int i = 0; i < 65536 && !found_frame; i++) {
        if (fread(buf, 1, 4, f) != 4) break;

        if (parse_mp3_frame_header(buf, &version, &layer, &bitrate, &sample_rate, &channels)) {
            audio_start = ftell(f) - 4;
            found_frame = true;
        } else {
            fseek(f, -3, SEEK_CUR);  // Move back and try next byte
        }
    }

    if (!found_frame) {
        fclose(f);
        return false;
    }

    meta->bitrate = bitrate * 1000;
    meta->sample_rate = sample_rate;
    meta->channels = channels;

    // Try to get VBR info from Xing/VBRI header
    uint32_t total_frames = 0;
    if (parse_xing_header(f, audio_start, &total_frames) && total_frames > 0) {
        // Calculate duration from frame count
        uint16_t samples_per_frame = mp3_samples_per_frame[version][layer];
        uint64_t total_samples = (uint64_t)total_frames * samples_per_frame;
        meta->duration_ms = (uint32_t)((total_samples * 1000) / sample_rate);
    } else {
        // CBR estimation: duration = (audio_bytes * 8) / bitrate
        long audio_size = file_size - audio_start;
        // Check for ID3v1 tag at end
        fseek(f, -128, SEEK_END);
        if (fread(buf, 1, 3, f) == 3 && buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G') {
            audio_size -= 128;
        }
        if (meta->bitrate > 0) {
            meta->duration_ms = (uint32_t)((audio_size * 8000ULL) / meta->bitrate);
        }
    }

    meta->valid = true;
    fclose(f);
    return true;
}

// ============================================================================
// M4A/MP4 Parser
// ============================================================================

// Read atom header, return atom size (0 on error)
static uint64_t read_atom_header(FILE *f, char *type) {
    uint8_t header[8];
    if (fread(header, 1, 8, f) != 8) return 0;

    uint32_t size = read_be32(header);
    memcpy(type, &header[4], 4);
    type[4] = '\0';

    if (size == 1) {
        // Extended size (64-bit)
        uint8_t ext[8];
        if (fread(ext, 1, 8, f) != 8) return 0;
        return ((uint64_t)read_be32(ext) << 32) | read_be32(&ext[4]);
    }

    return size;
}

// Parse mvhd atom for duration
static bool parse_mvhd(FILE *f, uint32_t size, audio_metadata_t *meta) {
    if (size < 20) return false;

    uint8_t buf[32];
    if (fread(buf, 1, 32, f) != 32) return false;

    uint8_t version = buf[0];
    uint32_t timescale, duration;

    if (version == 0) {
        // 32-bit values
        timescale = read_be32(&buf[12]);
        duration = read_be32(&buf[16]);
    } else {
        // 64-bit values - need to read more
        fseek(f, -32, SEEK_CUR);
        uint8_t buf64[40];
        if (fread(buf64, 1, 40, f) != 40) return false;
        timescale = read_be32(&buf64[20]);
        duration = read_be32(&buf64[28]);  // Only use lower 32 bits
    }

    if (timescale > 0) {
        meta->duration_ms = (uint32_t)((uint64_t)duration * 1000 / timescale);
    }

    return true;
}

// Parse iTunes metadata atom
static void parse_ilst_data(FILE *f, uint32_t size, char *dest, size_t dest_size) {
    // Skip to 'data' atom
    long end = ftell(f) + size - 8;

    while (ftell(f) < end) {
        char type[5];
        uint64_t atom_size = read_atom_header(f, type);
        if (atom_size == 0) break;

        if (strcmp(type, "data") == 0) {
            // Skip version and flags (4 bytes) and null (4 bytes)
            fseek(f, 8, SEEK_CUR);

            size_t text_len = atom_size - 16;  // 8 header + 8 skipped
            if (text_len > 0 && text_len < 1024) {
                if (text_len >= dest_size) text_len = dest_size - 1;
                if (fread(dest, 1, text_len, f) == text_len) {
                    dest[text_len] = '\0';
                }
            }
            return;
        }

        // Skip this atom
        fseek(f, atom_size - 8, SEEK_CUR);
    }
}

// Parse ilst (iTunes metadata list)
static void parse_ilst(FILE *f, uint32_t size, audio_metadata_t *meta) {
    long end = ftell(f) + size - 8;

    while (ftell(f) < end) {
        char type[5];
        uint64_t atom_size = read_atom_header(f, type);
        if (atom_size == 0) break;

        // iTunes uses special atoms: \251nam (title), \251ART (artist), \251alb (album)
        if (strcmp(type, "\251nam") == 0) {
            parse_ilst_data(f, atom_size, meta->title, sizeof(meta->title));
        } else if (strcmp(type, "\251ART") == 0) {
            parse_ilst_data(f, atom_size, meta->artist, sizeof(meta->artist));
        } else if (strcmp(type, "\251alb") == 0) {
            parse_ilst_data(f, atom_size, meta->album, sizeof(meta->album));
        } else {
            fseek(f, atom_size - 8, SEEK_CUR);
        }
    }
}

// Recursively search for atoms in moov
static void parse_moov_recursive(FILE *f, long end, audio_metadata_t *meta) {
    while (ftell(f) < end) {
        long atom_start = ftell(f);
        char type[5];
        uint64_t atom_size = read_atom_header(f, type);

        if (atom_size == 0 || atom_size > (uint64_t)(end - atom_start)) break;

        if (strcmp(type, "mvhd") == 0) {
            parse_mvhd(f, atom_size, meta);
        } else if (strcmp(type, "udta") == 0 || strcmp(type, "meta") == 0) {
            // meta has 4 extra bytes (version/flags)
            if (strcmp(type, "meta") == 0) {
                fseek(f, 4, SEEK_CUR);
            }
            parse_moov_recursive(f, atom_start + atom_size, meta);
        } else if (strcmp(type, "ilst") == 0) {
            parse_ilst(f, atom_size, meta);
        }

        fseek(f, atom_start + atom_size, SEEK_SET);
    }
}

static bool parse_m4a(const char *path, audio_metadata_t *meta) {
    FILE *f = fopen(path, "rb");
    if (!f) return false;

    fseek(f, 0, SEEK_END);
    long file_size = ftell(f);
    fseek(f, 0, SEEK_SET);

    // Look for moov atom
    while (ftell(f) < file_size) {
        long atom_start = ftell(f);
        char type[5];
        uint64_t atom_size = read_atom_header(f, type);

        if (atom_size == 0) break;

        if (strcmp(type, "moov") == 0) {
            parse_moov_recursive(f, atom_start + atom_size, meta);
            meta->valid = (meta->duration_ms > 0);
            break;
        }

        fseek(f, atom_start + atom_size, SEEK_SET);
    }

    fclose(f);
    return meta->valid;
}

// ============================================================================
// Public API
// ============================================================================

bool audio_metadata_get(const char *path, audio_metadata_t *meta) {
    if (!path || !meta) return false;

    // Initialize metadata
    memset(meta, 0, sizeof(*meta));

    // Set filename as fallback title
    extract_filename(path, meta->title, sizeof(meta->title));

    // Get extension
    const char *ext = get_extension(path);

    // Dispatch to format-specific parser
    bool success = false;

    if (strcasecmp(ext, "mp3") == 0) {
        success = parse_mp3(path, meta);
    } else if (strcasecmp(ext, "m4a") == 0 || strcasecmp(ext, "mp4") == 0 ||
               strcasecmp(ext, "aac") == 0) {
        success = parse_m4a(path, meta);
    }

    // If parsing failed but we have a filename, still return partial success
    if (!success && meta->title[0] != '\0') {
        meta->valid = false;  // Indicate metadata is incomplete
    }

    return success;
}