Login
1 branch 0 tags
Ben (Desktop/Arch) Fixed some memory issues and added valgrind 1bc241d 29 days ago 83 Commits
moon / src / metadata / mp3.c
#include "../audio_metadata.h"
#include "helper.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// ============================================================================
// MP3 Parser
// ============================================================================

// MP3 bitrate table [version][layer][index]
// Version: 0=MPEG2.5, 1=reserved, 2=MPEG2, 3=MPEG1
// Layer: 0=reserved, 1=Layer3, 2=Layer2, 3=Layer1
static const uint16_t mp3_bitrates[4][4][16] = {
    // MPEG 2.5
    {
        {0},  // reserved
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,
         0},  // Layer 3
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,
         0},  // Layer 2
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,
         0},  // Layer 1
    },
    // Reserved
    {{0}, {0}, {0}, {0}},
    // MPEG 2
    {
        {0},  // reserved
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,
         0},  // Layer 3
        {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,
         0},  // Layer 2
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,
         0},  // Layer 1
    },
    // MPEG 1
    {
        {0},  // reserved
        {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,
         0},  // Layer 3
        {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,
         0},  // Layer 2
        {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,
         0},  // Layer 1
    },
};

// MP3 sample rate table [version][index]
static const uint16_t mp3_sample_rates[4][4] = {
    {11025, 12000, 8000, 0},   // MPEG 2.5
    {0, 0, 0, 0},              // Reserved
    {22050, 24000, 16000, 0},  // MPEG 2
    {44100, 48000, 32000, 0},  // MPEG 1
};

// Samples per frame [version][layer]
static const uint16_t mp3_samples_per_frame[4][4] = {
    {0, 576, 1152, 384},   // MPEG 2.5
    {0, 0, 0, 0},          // Reserved
    {0, 576, 1152, 384},   // MPEG 2
    {0, 1152, 1152, 384},  // MPEG 1
};

// Parse ID3v2 header and return its total size (including header)
static uint32_t parse_id3v2_size(FILE* f) {
	uint8_t header[10];

	if (fread(header, 1, 10, f) != 10) {
		fseek(f, 0, SEEK_SET);
		return 0;
	}

	// Check for "ID3" magic
	if (header[0] != 'I' || header[1] != 'D' || header[2] != '3') {
		fseek(f, 0, SEEK_SET);
		return 0;
	}

	// ID3v2 size is syncsafe integer (7 bits per byte)
	uint32_t size = ((uint32_t)(header[6] & 0x7F) << 21) |
	                ((uint32_t)(header[7] & 0x7F) << 14) |
	                ((uint32_t)(header[8] & 0x7F) << 7) | (header[9] & 0x7F);

	return size + 10;  // Add header size
}

// Parse ID3v2 text frame
static void parse_id3v2_text_frame(FILE* f,
                                   uint32_t size,
                                   char* dest,
                                   size_t dest_size) {
	if (size == 0 || size > 1024) {
		return;
	}

	uint8_t* buf = malloc(size);
	if (!buf) {
		return;
	}

	if (fread(buf, 1, size, f) == size) {
		// First byte is encoding: 0=ISO-8859-1, 1=UTF-16, 2=UTF-16BE, 3=UTF-8
		uint8_t encoding = buf[0];
		const char* text = (const char*)&buf[1];
		size_t text_len = size - 1;

		if (encoding == 0 || encoding == 3) {
			// ISO-8859-1 or UTF-8 - copy directly
			if (text_len >= dest_size) {
				text_len = dest_size - 1;
			}
			memcpy(dest, text, text_len);
			dest[text_len] = '\0';
		} else if (encoding == 1 && text_len >= 2) {
			// UTF-16 with BOM - simple ASCII extraction
			const uint8_t* utf16 = (const uint8_t*)text;
			int little_endian = (utf16[0] == 0xFF && utf16[1] == 0xFE);
			utf16 += 2;
			text_len -= 2;

			size_t j = 0;
			for (size_t i = 0; i + 1 < text_len && j < dest_size - 1; i += 2) {
				uint16_t ch = little_endian ? (utf16[i] | (utf16[i + 1] << 8))
				                            : ((utf16[i] << 8) | utf16[i + 1]);
				if (ch > 0 && ch < 128) {
					dest[j++] = (char)ch;
				}
			}
			dest[j] = '\0';
		}
		trim_trailing(dest);
	}

	free(buf);
}

// Parse ID3v2 tags
static void parse_id3v2_tags(FILE* f,
                             uint32_t tag_size,
                             audio_metadata_t* meta) {
	long start_pos = ftell(f);
	long end_pos =
	    start_pos + tag_size - 10;  // Subtract header we already read

	// Skip to after header (we're already there from size parsing)

	while (ftell(f) < end_pos - 10) {
		uint8_t frame_header[10];
		if (fread(frame_header, 1, 10, f) != 10) {
			break;
		}

		// Frame ID is 4 chars
		char frame_id[5] = {frame_header[0], frame_header[1], frame_header[2],
		                    frame_header[3], 0};

		// Frame size (ID3v2.4 uses syncsafe, v2.3 uses regular - try both)
		uint32_t frame_size = read_be32(&frame_header[4]);

		// Check for padding (all zeros)
		if (frame_id[0] == 0) {
			break;
		}

		// Sanity check
		if (frame_size > 10000000) {
			break;
		}

		if (strcmp(frame_id, "TIT2") == 0) {
			parse_id3v2_text_frame(f, frame_size, meta->title,
			                       sizeof(meta->title));
		} else if (strcmp(frame_id, "TPE1") == 0) {
			parse_id3v2_text_frame(f, frame_size, meta->artist,
			                       sizeof(meta->artist));
		} else if (strcmp(frame_id, "TALB") == 0) {
			parse_id3v2_text_frame(f, frame_size, meta->album,
			                       sizeof(meta->album));
		} else {
			// Skip unknown frame
			fseek(f, frame_size, SEEK_CUR);
		}
	}
}

// Parse ID3v1 tags (last 128 bytes of file)
static void parse_id3v1_tags(FILE* f, audio_metadata_t* meta) {
	long pos = ftell(f);

	fseek(f, -128, SEEK_END);

	uint8_t tag[128];
	if (fread(tag, 1, 128, f) != 128) {
		fseek(f, pos, SEEK_SET);
		return;
	}

	// Check for "TAG" magic
	if (tag[0] != 'T' || tag[1] != 'A' || tag[2] != 'G') {
		fseek(f, pos, SEEK_SET);
		return;
	}

	// Only use ID3v1 if ID3v2 didn't provide the info
	if (meta->title[0] == '\0') {
		memcpy(meta->title, &tag[3], 30);
		meta->title[30] = '\0';
		trim_trailing(meta->title);
	}
	if (meta->artist[0] == '\0') {
		memcpy(meta->artist, &tag[33], 30);
		meta->artist[30] = '\0';
		trim_trailing(meta->artist);
	}
	if (meta->album[0] == '\0') {
		memcpy(meta->album, &tag[63], 30);
		meta->album[30] = '\0';
		trim_trailing(meta->album);
	}

	fseek(f, pos, SEEK_SET);
}

// Find and parse MP3 frame header, return true if valid
static bool parse_mp3_frame_header(const uint8_t* buf,
                                   uint8_t* version,
                                   uint8_t* layer,
                                   uint16_t* bitrate,
                                   uint16_t* sample_rate,
                                   uint8_t* channels) {
	// Check sync word (11 bits set)
	if (buf[0] != 0xFF || (buf[1] & 0xE0) != 0xE0) {
		return false;
	}

	*version = (buf[1] >> 3) & 0x03;  // 0=2.5, 1=reserved, 2=MPEG2, 3=MPEG1
	*layer = (buf[1] >> 1) & 0x03;    // 0=reserved, 1=L3, 2=L2, 3=L1

	if (*version == 1 || *layer == 0) {
		return false;  // Reserved
	}

	uint8_t bitrate_idx = (buf[2] >> 4) & 0x0F;
	uint8_t srate_idx = (buf[2] >> 2) & 0x03;
	uint8_t channel_mode = (buf[3] >> 6) & 0x03;

	*bitrate = mp3_bitrates[*version][*layer][bitrate_idx];
	*sample_rate = mp3_sample_rates[*version][srate_idx];
	*channels = (channel_mode == 3) ? 1 : 2;  // 3 = mono

	return *bitrate > 0 && *sample_rate > 0;
}

// Parse Xing/VBRI header for VBR info
static bool parse_xing_header(FILE* f,
                              long frame_start,
                              uint32_t* total_frames) {
	// Xing header offset depends on version and channel mode
	// For MPEG1: 32 bytes for stereo, 17 for mono (after frame header)
	// For MPEG2/2.5: 17 bytes for stereo, 9 for mono

	uint8_t buf[120];
	fseek(f, frame_start, SEEK_SET);
	if (fread(buf, 1, 120, f) != 120) {
		return false;
	}

	// Search for "Xing" or "Info" tag
	for (int offset = 4; offset < 40; offset++) {
		if ((buf[offset] == 'X' && buf[offset + 1] == 'i' &&
		     buf[offset + 2] == 'n' && buf[offset + 3] == 'g') ||
		    (buf[offset] == 'I' && buf[offset + 1] == 'n' &&
		     buf[offset + 2] == 'f' && buf[offset + 3] == 'o')) {
			uint32_t flags = read_be32(&buf[offset + 4]);
			if (flags & 0x01) {  // Frames field present
				*total_frames = read_be32(&buf[offset + 8]);
				return true;
			}
		}
	}

	// Try VBRI header (always at offset 32 after frame sync)
	if (buf[36] == 'V' && buf[37] == 'B' && buf[38] == 'R' && buf[39] == 'I') {
		*total_frames = read_be32(&buf[50]);
		return true;
	}

	return false;
}

bool parse_mp3(const char* path, audio_metadata_t* meta) {
	FILE* f = fopen(path, "rb");
	if (!f) {
		return false;
	}

	// Get file size
	fseek(f, 0, SEEK_END);
	long file_size = ftell(f);
	fseek(f, 0, SEEK_SET);

	// Parse ID3v2 if present
	uint32_t id3v2_size = parse_id3v2_size(f);
	if (id3v2_size > 0) {
		fseek(f, 10, SEEK_SET);  // Skip header, parse tags
		parse_id3v2_tags(f, id3v2_size, meta);
		fseek(f, id3v2_size, SEEK_SET);  // Skip to audio data
	}

	// Parse ID3v1 at end of file
	parse_id3v1_tags(f, meta);

	// Find first valid MP3 frame
	long audio_start = ftell(f);
	uint8_t buf[4];
	uint8_t version, layer, channels;
	uint16_t bitrate, sample_rate;
	bool found_frame = false;

	// Search for sync in first 64KB
	for (int i = 0; i < 65536 && !found_frame; i++) {
		if (fread(buf, 1, 4, f) != 4) {
			break;
		}

		if (parse_mp3_frame_header(buf, &version, &layer, &bitrate,
		                           &sample_rate, &channels)) {
			audio_start = ftell(f) - 4;
			found_frame = true;
		} else {
			fseek(f, -3, SEEK_CUR);  // Move back and try next byte
		}
	}

	if (!found_frame) {
		fclose(f);
		return false;
	}

	meta->audio_data_offset = (uint32_t)audio_start;
	meta->bitrate = bitrate * 1000;
	meta->sample_rate = sample_rate;
	meta->channels = channels;

	// Try to get VBR info from Xing/VBRI header
	uint32_t total_frames = 0;
	if (parse_xing_header(f, audio_start, &total_frames) && total_frames > 0) {
		// Calculate duration from frame count
		uint16_t samples_per_frame = mp3_samples_per_frame[version][layer];
		uint64_t total_samples = (uint64_t)total_frames * samples_per_frame;
		meta->duration_ms = (uint32_t)((total_samples * 1000) / sample_rate);
	} else {
		// CBR estimation: duration = (audio_bytes * 8) / bitrate
		long audio_size = file_size - audio_start;
		// Check for ID3v1 tag at end
		fseek(f, -128, SEEK_END);
		if (fread(buf, 1, 3, f) == 3 && buf[0] == 'T' && buf[1] == 'A' &&
		    buf[2] == 'G') {
			audio_size -= 128;
		}
		if (meta->bitrate > 0) {
			meta->duration_ms =
			    (uint32_t)((audio_size * 8000ULL) / meta->bitrate);
		}
	}

	meta->valid = true;
	fclose(f);
	return true;
}