Login
1 branch 0 tags
Ben (Desktop/Arch) Limited podcast episode list to 16 entries b36bce8 29 days ago 80 Commits
moon / src / podcast_xml.c
#include "podcast_xml.h"
#include <string.h>
#include "storage.h"
#include "yxml.h"

#define XML_BUF_SIZE 1024
#define XML_STACK_SIZE 2048

// Append character to buffer with bounds check
static void buf_append(char* buf, size_t buf_size, size_t* len, char c) {
	if (*len < buf_size - 1) {
		buf[*len] = c;
		(*len)++;
		buf[*len] = '\0';
	}
}

// Append yxml data string to buffer
static void buf_append_data(char* buf,
                            size_t buf_size,
                            size_t* len,
                            const char* data) {
	for (const char* p = data; *p; p++) {
		buf_append(buf, buf_size, len, *p);
	}
}

int podcast_parse_opml(const char* path, podcast_feed_t* feeds, int max_feeds) {
	storage_file_t f = storage_open(path, "r");
	if (!f) {
		return -1;
	}

	unsigned char stack[XML_STACK_SIZE];
	yxml_t x;
	yxml_init(&x, stack, sizeof(stack));

	char read_buf[XML_BUF_SIZE];
	int count = 0;

	// Track current attribute being read
	char current_attr[32] = {0};
	char attr_text[128] = {0};
	size_t attr_text_len = 0;
	char attr_url[256] = {0};
	size_t attr_url_len = 0;
	char attr_type[16] = {0};
	size_t attr_type_len = 0;
	bool in_outline = false;

	size_t n;
	while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
		for (size_t i = 0; i < n; i++) {
			yxml_ret_t r = yxml_parse(&x, read_buf[i]);
			if (r < 0) {
				goto done;
			}

			switch (r) {
				case YXML_ELEMSTART:
					if (strcmp(x.elem, "outline") == 0) {
						in_outline = true;
						attr_text[0] = '\0';
						attr_text_len = 0;
						attr_url[0] = '\0';
						attr_url_len = 0;
						attr_type[0] = '\0';
						attr_type_len = 0;
					}
					break;

				case YXML_ATTRSTART:
					strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
					current_attr[sizeof(current_attr) - 1] = '\0';
					break;

				case YXML_ATTRVAL:
					if (in_outline) {
						if (strcmp(current_attr, "text") == 0 ||
						    strcmp(current_attr, "title") == 0) {
							buf_append_data(attr_text, sizeof(attr_text),
							                &attr_text_len, x.data);
						} else if (strcmp(current_attr, "xmlUrl") == 0) {
							buf_append_data(attr_url, sizeof(attr_url),
							                &attr_url_len, x.data);
						} else if (strcmp(current_attr, "type") == 0) {
							buf_append_data(attr_type, sizeof(attr_type),
							                &attr_type_len, x.data);
						}
					}
					break;

				case YXML_ATTREND:
					current_attr[0] = '\0';
					break;

				case YXML_ELEMEND:
					if (in_outline && attr_url[0] != '\0' &&
					    count < max_feeds) {
						// Accept rss type or any outline with xmlUrl
						if (attr_type[0] == '\0' ||
						    strcmp(attr_type, "rss") == 0) {
							strncpy(feeds[count].title, attr_text,
							        sizeof(feeds[count].title) - 1);
							feeds[count].title[sizeof(feeds[count].title) - 1] =
							    '\0';
							strncpy(feeds[count].url, attr_url,
							        sizeof(feeds[count].url) - 1);
							feeds[count].url[sizeof(feeds[count].url) - 1] =
							    '\0';
							count++;
						}
					}
					in_outline = false;
					break;

				default:
					break;
			}
		}
	}

done:
	storage_close(f);
	return count;
}

int podcast_parse_rss(const char* path,
                      podcast_episode_t* episodes,
                      int max_episodes) {
	storage_file_t f = storage_open(path, "r");
	if (!f) {
		return -1;
	}

	unsigned char stack[XML_STACK_SIZE];
	yxml_t x;
	yxml_init(&x, stack, sizeof(stack));

	char read_buf[XML_BUF_SIZE];
	int count = 0;

	// Parser state
	bool in_item = false;
	int depth = 0;
	int item_depth = 0;

	char current_elem[64] = {0};
	char current_attr[32] = {0};
	char title[128] = {0};
	size_t title_len = 0;
	char guid[128] = {0};
	size_t guid_len = 0;
	char enclosure_url[256] = {0};
	size_t enclosure_url_len = 0;
	char enclosure_type[64] = {0};
	size_t enclosure_type_len = 0;

	size_t n;
	while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
		for (size_t i = 0; i < n; i++) {
			yxml_ret_t r = yxml_parse(&x, read_buf[i]);
			if (r < 0) {
				goto rss_done;
			}

			switch (r) {
				case YXML_ELEMSTART:
					depth++;
					strncpy(current_elem, x.elem, sizeof(current_elem) - 1);
					current_elem[sizeof(current_elem) - 1] = '\0';

					if (strcmp(x.elem, "item") == 0 && !in_item) {
						in_item = true;
						item_depth = depth;
						title[0] = '\0';
						title_len = 0;
						guid[0] = '\0';
						guid_len = 0;
						enclosure_url[0] = '\0';
						enclosure_url_len = 0;
						enclosure_type[0] = '\0';
						enclosure_type_len = 0;
					}
					break;

				case YXML_CONTENT:
					if (in_item) {
						if (strcmp(current_elem, "title") == 0) {
							buf_append_data(title, sizeof(title), &title_len,
							                x.data);
						} else if (strcmp(current_elem, "guid") == 0) {
							buf_append_data(guid, sizeof(guid), &guid_len,
							                x.data);
						}
					}
					break;

				case YXML_ATTRSTART:
					strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
					current_attr[sizeof(current_attr) - 1] = '\0';
					break;

				case YXML_ATTRVAL:
					if (in_item && strcmp(current_elem, "enclosure") == 0) {
						if (strcmp(current_attr, "url") == 0) {
							buf_append_data(enclosure_url,
							                sizeof(enclosure_url),
							                &enclosure_url_len, x.data);
						} else if (strcmp(current_attr, "type") == 0) {
							buf_append_data(enclosure_type,
							                sizeof(enclosure_type),
							                &enclosure_type_len, x.data);
						}
					}
					break;

				case YXML_ATTREND:
					current_attr[0] = '\0';
					break;

				case YXML_ELEMEND:
					if (in_item && depth == item_depth) {
						// End of <item>
						if (enclosure_url[0] != '\0' &&
						    strncmp(enclosure_type, "audio/", 6) == 0 &&
						    count < max_episodes) {
							podcast_episode_t* ep = &episodes[count];
							strncpy(ep->title, title, sizeof(ep->title) - 1);
							ep->title[sizeof(ep->title) - 1] = '\0';
							strncpy(ep->guid, guid[0] ? guid : title,
							        sizeof(ep->guid) - 1);
							ep->guid[sizeof(ep->guid) - 1] = '\0';
							strncpy(ep->enclosure_url, enclosure_url,
							        sizeof(ep->enclosure_url) - 1);
							ep->enclosure_url[sizeof(ep->enclosure_url) - 1] =
							    '\0';
							count++;
						}
						in_item = false;
					}
					depth--;
					break;

				default:
					break;
			}
		}
	}

rss_done:
	storage_close(f);
	return count;
}

void podcast_sanitize_title(const char* title, char* out, size_t out_size) {
	if (out_size == 0) {
		return;
	}
	size_t max_len = out_size - 1;
	size_t len = 0;
	for (const char* p = title; *p && len < max_len; p++) {
		char c = *p;
		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
		    (c >= '0' && c <= '9') || c == '.' || c == '_' || c == '-') {
			out[len++] = c;
		} else if (len > 0 && out[len - 1] != '_') {
			out[len++] = '_';
		}
	}
	while (len > 0 && out[len - 1] == '_') {
		len--;
	}
	if (len == 0) {
		out[0] = '_';
		len = 1;
	}
	out[len] = '\0';
}

bool podcast_extract_item(const char* rss_path,
                          int item_index,
                          const char* out_path) {
	storage_file_t f = storage_open(rss_path, "r");
	if (!f) {
		return false;
	}

	storage_file_t out = storage_open(out_path, "w");
	if (!out) {
		storage_close(f);
		return false;
	}

	unsigned char stack[XML_STACK_SIZE];
	yxml_t x;
	yxml_init(&x, stack, sizeof(stack));

	char read_buf[XML_BUF_SIZE];
	int current_item = -1;
	int depth = 0;
	int item_depth = 0;
	bool in_target = false;
	bool found = false;

	// We need to capture raw XML bytes while inside the target item.
	// Track when we enter/leave the target item and write raw bytes.
	size_t n;
	while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
		for (size_t i = 0; i < n; i++) {
			yxml_ret_t r = yxml_parse(&x, read_buf[i]);
			if (r < 0) {
				goto extract_done;
			}

			if (r == YXML_ELEMSTART) {
				depth++;
				if (strcmp(x.elem, "item") == 0 && current_item < item_depth) {
					current_item++;
					item_depth = depth;
					if (current_item == item_index) {
						in_target = true;
						// Write opening tag
						const char* open = "<item>";
						storage_write(out, open, strlen(open));
					}
				}
			}

			if (in_target && r != YXML_ELEMSTART) {
				// Write raw byte
				storage_write(out, &read_buf[i], 1);
			}

			if (r == YXML_ELEMEND) {
				if (in_target && depth == item_depth) {
					in_target = false;
					found = true;
					goto extract_done;
				}
				depth--;
			}
		}
	}

extract_done:
	storage_close(f);
	storage_close(out);

	if (!found) {
		storage_remove(out_path);
	}
	return found;
}