text/plain
•
17.46 KB
•
548 lines
#include "audio_metadata.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
// Helper to read big-endian uint32
static uint32_t read_be32(const uint8_t *buf) {
return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) |
((uint32_t)buf[2] << 8) | buf[3];
}
// Helper to get file extension (lowercase)
static const char *get_extension(const char *path) {
const char *dot = strrchr(path, '.');
return dot ? dot + 1 : "";
}
// Helper to extract filename without extension for fallback title
static void extract_filename(const char *path, char *buf, size_t buf_size) {
const char *name = strrchr(path, '/');
name = name ? name + 1 : path;
const char *dot = strrchr(name, '.');
size_t len = dot ? (size_t)(dot - name) : strlen(name);
if (len >= buf_size) len = buf_size - 1;
memcpy(buf, name, len);
buf[len] = '\0';
}
// Trim trailing whitespace (ID3v1 uses space padding)
static void trim_trailing(char *str) {
size_t len = strlen(str);
while (len > 0 && (str[len-1] == ' ' || str[len-1] == '\0')) {
str[--len] = '\0';
}
}
// ============================================================================
// MP3 Parser
// ============================================================================
// MP3 bitrate table [version][layer][index]
// Version: 0=MPEG2.5, 1=reserved, 2=MPEG2, 3=MPEG1
// Layer: 0=reserved, 1=Layer3, 2=Layer2, 3=Layer1
static const uint16_t mp3_bitrates[4][4][16] = {
// MPEG 2.5
{
{0}, // reserved
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0}, // Layer 3
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0}, // Layer 2
{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0}, // Layer 1
},
// Reserved
{{0}, {0}, {0}, {0}},
// MPEG 2
{
{0}, // reserved
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0}, // Layer 3
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0}, // Layer 2
{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0}, // Layer 1
},
// MPEG 1
{
{0}, // reserved
{0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0}, // Layer 3
{0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0}, // Layer 2
{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0}, // Layer 1
},
};
// MP3 sample rate table [version][index]
static const uint16_t mp3_sample_rates[4][4] = {
{11025, 12000, 8000, 0}, // MPEG 2.5
{0, 0, 0, 0}, // Reserved
{22050, 24000, 16000, 0}, // MPEG 2
{44100, 48000, 32000, 0}, // MPEG 1
};
// Samples per frame [version][layer]
static const uint16_t mp3_samples_per_frame[4][4] = {
{0, 576, 1152, 384}, // MPEG 2.5
{0, 0, 0, 0}, // Reserved
{0, 576, 1152, 384}, // MPEG 2
{0, 1152, 1152, 384}, // MPEG 1
};
// Parse ID3v2 header and return its total size (including header)
static uint32_t parse_id3v2_size(FILE *f) {
uint8_t header[10];
if (fread(header, 1, 10, f) != 10) {
fseek(f, 0, SEEK_SET);
return 0;
}
// Check for "ID3" magic
if (header[0] != 'I' || header[1] != 'D' || header[2] != '3') {
fseek(f, 0, SEEK_SET);
return 0;
}
// ID3v2 size is syncsafe integer (7 bits per byte)
uint32_t size = ((uint32_t)(header[6] & 0x7F) << 21) |
((uint32_t)(header[7] & 0x7F) << 14) |
((uint32_t)(header[8] & 0x7F) << 7) |
(header[9] & 0x7F);
return size + 10; // Add header size
}
// Parse ID3v2 text frame
static void parse_id3v2_text_frame(FILE *f, uint32_t size, char *dest, size_t dest_size) {
if (size == 0 || size > 1024) return;
uint8_t *buf = malloc(size);
if (!buf) return;
if (fread(buf, 1, size, f) == size) {
// First byte is encoding: 0=ISO-8859-1, 1=UTF-16, 2=UTF-16BE, 3=UTF-8
uint8_t encoding = buf[0];
const char *text = (const char *)&buf[1];
size_t text_len = size - 1;
if (encoding == 0 || encoding == 3) {
// ISO-8859-1 or UTF-8 - copy directly
if (text_len >= dest_size) text_len = dest_size - 1;
memcpy(dest, text, text_len);
dest[text_len] = '\0';
} else if (encoding == 1 && text_len >= 2) {
// UTF-16 with BOM - simple ASCII extraction
const uint8_t *utf16 = (const uint8_t *)text;
int little_endian = (utf16[0] == 0xFF && utf16[1] == 0xFE);
utf16 += 2;
text_len -= 2;
size_t j = 0;
for (size_t i = 0; i + 1 < text_len && j < dest_size - 1; i += 2) {
uint16_t ch = little_endian ? (utf16[i] | (utf16[i+1] << 8))
: ((utf16[i] << 8) | utf16[i+1]);
if (ch > 0 && ch < 128) {
dest[j++] = (char)ch;
}
}
dest[j] = '\0';
}
trim_trailing(dest);
}
free(buf);
}
// Parse ID3v2 tags
static void parse_id3v2_tags(FILE *f, uint32_t tag_size, audio_metadata_t *meta) {
long start_pos = ftell(f);
long end_pos = start_pos + tag_size - 10; // Subtract header we already read
// Skip to after header (we're already there from size parsing)
while (ftell(f) < end_pos - 10) {
uint8_t frame_header[10];
if (fread(frame_header, 1, 10, f) != 10) break;
// Frame ID is 4 chars
char frame_id[5] = {frame_header[0], frame_header[1], frame_header[2], frame_header[3], 0};
// Frame size (ID3v2.4 uses syncsafe, v2.3 uses regular - try both)
uint32_t frame_size = read_be32(&frame_header[4]);
// Check for padding (all zeros)
if (frame_id[0] == 0) break;
// Sanity check
if (frame_size > 10000000) break;
if (strcmp(frame_id, "TIT2") == 0) {
parse_id3v2_text_frame(f, frame_size, meta->title, sizeof(meta->title));
} else if (strcmp(frame_id, "TPE1") == 0) {
parse_id3v2_text_frame(f, frame_size, meta->artist, sizeof(meta->artist));
} else if (strcmp(frame_id, "TALB") == 0) {
parse_id3v2_text_frame(f, frame_size, meta->album, sizeof(meta->album));
} else {
// Skip unknown frame
fseek(f, frame_size, SEEK_CUR);
}
}
}
// Parse ID3v1 tags (last 128 bytes of file)
static void parse_id3v1_tags(FILE *f, audio_metadata_t *meta) {
long pos = ftell(f);
fseek(f, -128, SEEK_END);
uint8_t tag[128];
if (fread(tag, 1, 128, f) != 128) {
fseek(f, pos, SEEK_SET);
return;
}
// Check for "TAG" magic
if (tag[0] != 'T' || tag[1] != 'A' || tag[2] != 'G') {
fseek(f, pos, SEEK_SET);
return;
}
// Only use ID3v1 if ID3v2 didn't provide the info
if (meta->title[0] == '\0') {
memcpy(meta->title, &tag[3], 30);
meta->title[30] = '\0';
trim_trailing(meta->title);
}
if (meta->artist[0] == '\0') {
memcpy(meta->artist, &tag[33], 30);
meta->artist[30] = '\0';
trim_trailing(meta->artist);
}
if (meta->album[0] == '\0') {
memcpy(meta->album, &tag[63], 30);
meta->album[30] = '\0';
trim_trailing(meta->album);
}
fseek(f, pos, SEEK_SET);
}
// Find and parse MP3 frame header, return true if valid
static bool parse_mp3_frame_header(const uint8_t *buf, uint8_t *version, uint8_t *layer,
uint16_t *bitrate, uint16_t *sample_rate, uint8_t *channels) {
// Check sync word (11 bits set)
if (buf[0] != 0xFF || (buf[1] & 0xE0) != 0xE0) {
return false;
}
*version = (buf[1] >> 3) & 0x03; // 0=2.5, 1=reserved, 2=MPEG2, 3=MPEG1
*layer = (buf[1] >> 1) & 0x03; // 0=reserved, 1=L3, 2=L2, 3=L1
if (*version == 1 || *layer == 0) return false; // Reserved
uint8_t bitrate_idx = (buf[2] >> 4) & 0x0F;
uint8_t srate_idx = (buf[2] >> 2) & 0x03;
uint8_t channel_mode = (buf[3] >> 6) & 0x03;
*bitrate = mp3_bitrates[*version][*layer][bitrate_idx];
*sample_rate = mp3_sample_rates[*version][srate_idx];
*channels = (channel_mode == 3) ? 1 : 2; // 3 = mono
return *bitrate > 0 && *sample_rate > 0;
}
// Parse Xing/VBRI header for VBR info
static bool parse_xing_header(FILE *f, long frame_start, uint32_t *total_frames) {
// Xing header offset depends on version and channel mode
// For MPEG1: 32 bytes for stereo, 17 for mono (after frame header)
// For MPEG2/2.5: 17 bytes for stereo, 9 for mono
uint8_t buf[120];
fseek(f, frame_start, SEEK_SET);
if (fread(buf, 1, 120, f) != 120) return false;
// Search for "Xing" or "Info" tag
for (int offset = 4; offset < 40; offset++) {
if ((buf[offset] == 'X' && buf[offset+1] == 'i' && buf[offset+2] == 'n' && buf[offset+3] == 'g') ||
(buf[offset] == 'I' && buf[offset+1] == 'n' && buf[offset+2] == 'f' && buf[offset+3] == 'o')) {
uint32_t flags = read_be32(&buf[offset + 4]);
if (flags & 0x01) { // Frames field present
*total_frames = read_be32(&buf[offset + 8]);
return true;
}
}
}
// Try VBRI header (always at offset 32 after frame sync)
if (buf[36] == 'V' && buf[37] == 'B' && buf[38] == 'R' && buf[39] == 'I') {
*total_frames = read_be32(&buf[50]);
return true;
}
return false;
}
static bool parse_mp3(const char *path, audio_metadata_t *meta) {
FILE *f = fopen(path, "rb");
if (!f) return false;
// Get file size
fseek(f, 0, SEEK_END);
long file_size = ftell(f);
fseek(f, 0, SEEK_SET);
// Parse ID3v2 if present
uint32_t id3v2_size = parse_id3v2_size(f);
if (id3v2_size > 0) {
fseek(f, 10, SEEK_SET); // Skip header, parse tags
parse_id3v2_tags(f, id3v2_size, meta);
fseek(f, id3v2_size, SEEK_SET); // Skip to audio data
}
// Parse ID3v1 at end of file
parse_id3v1_tags(f, meta);
// Find first valid MP3 frame
long audio_start = ftell(f);
uint8_t buf[4];
uint8_t version, layer, channels;
uint16_t bitrate, sample_rate;
bool found_frame = false;
// Search for sync in first 64KB
for (int i = 0; i < 65536 && !found_frame; i++) {
if (fread(buf, 1, 4, f) != 4) break;
if (parse_mp3_frame_header(buf, &version, &layer, &bitrate, &sample_rate, &channels)) {
audio_start = ftell(f) - 4;
found_frame = true;
} else {
fseek(f, -3, SEEK_CUR); // Move back and try next byte
}
}
if (!found_frame) {
fclose(f);
return false;
}
meta->bitrate = bitrate * 1000;
meta->sample_rate = sample_rate;
meta->channels = channels;
// Try to get VBR info from Xing/VBRI header
uint32_t total_frames = 0;
if (parse_xing_header(f, audio_start, &total_frames) && total_frames > 0) {
// Calculate duration from frame count
uint16_t samples_per_frame = mp3_samples_per_frame[version][layer];
uint64_t total_samples = (uint64_t)total_frames * samples_per_frame;
meta->duration_ms = (uint32_t)((total_samples * 1000) / sample_rate);
} else {
// CBR estimation: duration = (audio_bytes * 8) / bitrate
long audio_size = file_size - audio_start;
// Check for ID3v1 tag at end
fseek(f, -128, SEEK_END);
if (fread(buf, 1, 3, f) == 3 && buf[0] == 'T' && buf[1] == 'A' && buf[2] == 'G') {
audio_size -= 128;
}
if (meta->bitrate > 0) {
meta->duration_ms = (uint32_t)((audio_size * 8000ULL) / meta->bitrate);
}
}
meta->valid = true;
fclose(f);
return true;
}
// ============================================================================
// M4A/MP4 Parser
// ============================================================================
// Read atom header, return atom size (0 on error)
static uint64_t read_atom_header(FILE *f, char *type) {
uint8_t header[8];
if (fread(header, 1, 8, f) != 8) return 0;
uint32_t size = read_be32(header);
memcpy(type, &header[4], 4);
type[4] = '\0';
if (size == 1) {
// Extended size (64-bit)
uint8_t ext[8];
if (fread(ext, 1, 8, f) != 8) return 0;
return ((uint64_t)read_be32(ext) << 32) | read_be32(&ext[4]);
}
return size;
}
// Parse mvhd atom for duration
static bool parse_mvhd(FILE *f, uint32_t size, audio_metadata_t *meta) {
if (size < 20) return false;
uint8_t buf[32];
if (fread(buf, 1, 32, f) != 32) return false;
uint8_t version = buf[0];
uint32_t timescale, duration;
if (version == 0) {
// 32-bit values
timescale = read_be32(&buf[12]);
duration = read_be32(&buf[16]);
} else {
// 64-bit values - need to read more
fseek(f, -32, SEEK_CUR);
uint8_t buf64[40];
if (fread(buf64, 1, 40, f) != 40) return false;
timescale = read_be32(&buf64[20]);
duration = read_be32(&buf64[28]); // Only use lower 32 bits
}
if (timescale > 0) {
meta->duration_ms = (uint32_t)((uint64_t)duration * 1000 / timescale);
}
return true;
}
// Parse iTunes metadata atom
static void parse_ilst_data(FILE *f, uint32_t size, char *dest, size_t dest_size) {
// Skip to 'data' atom
long end = ftell(f) + size - 8;
while (ftell(f) < end) {
char type[5];
uint64_t atom_size = read_atom_header(f, type);
if (atom_size == 0) break;
if (strcmp(type, "data") == 0) {
// Skip version and flags (4 bytes) and null (4 bytes)
fseek(f, 8, SEEK_CUR);
size_t text_len = atom_size - 16; // 8 header + 8 skipped
if (text_len > 0 && text_len < 1024) {
if (text_len >= dest_size) text_len = dest_size - 1;
if (fread(dest, 1, text_len, f) == text_len) {
dest[text_len] = '\0';
}
}
return;
}
// Skip this atom
fseek(f, atom_size - 8, SEEK_CUR);
}
}
// Parse ilst (iTunes metadata list)
static void parse_ilst(FILE *f, uint32_t size, audio_metadata_t *meta) {
long end = ftell(f) + size - 8;
while (ftell(f) < end) {
char type[5];
uint64_t atom_size = read_atom_header(f, type);
if (atom_size == 0) break;
// iTunes uses special atoms: \251nam (title), \251ART (artist), \251alb (album)
if (strcmp(type, "\251nam") == 0) {
parse_ilst_data(f, atom_size, meta->title, sizeof(meta->title));
} else if (strcmp(type, "\251ART") == 0) {
parse_ilst_data(f, atom_size, meta->artist, sizeof(meta->artist));
} else if (strcmp(type, "\251alb") == 0) {
parse_ilst_data(f, atom_size, meta->album, sizeof(meta->album));
} else {
fseek(f, atom_size - 8, SEEK_CUR);
}
}
}
// Recursively search for atoms in moov
static void parse_moov_recursive(FILE *f, long end, audio_metadata_t *meta) {
while (ftell(f) < end) {
long atom_start = ftell(f);
char type[5];
uint64_t atom_size = read_atom_header(f, type);
if (atom_size == 0 || atom_size > (uint64_t)(end - atom_start)) break;
if (strcmp(type, "mvhd") == 0) {
parse_mvhd(f, atom_size, meta);
} else if (strcmp(type, "udta") == 0 || strcmp(type, "meta") == 0) {
// meta has 4 extra bytes (version/flags)
if (strcmp(type, "meta") == 0) {
fseek(f, 4, SEEK_CUR);
}
parse_moov_recursive(f, atom_start + atom_size, meta);
} else if (strcmp(type, "ilst") == 0) {
parse_ilst(f, atom_size, meta);
}
fseek(f, atom_start + atom_size, SEEK_SET);
}
}
static bool parse_m4a(const char *path, audio_metadata_t *meta) {
FILE *f = fopen(path, "rb");
if (!f) return false;
fseek(f, 0, SEEK_END);
long file_size = ftell(f);
fseek(f, 0, SEEK_SET);
// Look for moov atom
while (ftell(f) < file_size) {
long atom_start = ftell(f);
char type[5];
uint64_t atom_size = read_atom_header(f, type);
if (atom_size == 0) break;
if (strcmp(type, "moov") == 0) {
parse_moov_recursive(f, atom_start + atom_size, meta);
meta->valid = (meta->duration_ms > 0);
break;
}
fseek(f, atom_start + atom_size, SEEK_SET);
}
fclose(f);
return meta->valid;
}
// ============================================================================
// Public API
// ============================================================================
bool audio_metadata_get(const char *path, audio_metadata_t *meta) {
if (!path || !meta) return false;
// Initialize metadata
memset(meta, 0, sizeof(*meta));
// Set filename as fallback title
extract_filename(path, meta->title, sizeof(meta->title));
// Get extension
const char *ext = get_extension(path);
// Dispatch to format-specific parser
bool success = false;
if (strcasecmp(ext, "mp3") == 0) {
success = parse_mp3(path, meta);
} else if (strcasecmp(ext, "m4a") == 0 || strcasecmp(ext, "mp4") == 0 ||
strcasecmp(ext, "aac") == 0) {
success = parse_m4a(path, meta);
}
// If parsing failed but we have a filename, still return partial success
if (!success && meta->title[0] != '\0') {
meta->valid = false; // Indicate metadata is incomplete
}
return success;
}