text/plain
•
11.48 KB
•
475 lines
#include "podcast_xml.h"
#include <stdio.h>
#include <string.h>
#include "storage.h"
#include "yxml.h"
#define XML_BUF_SIZE 1024
#define XML_STACK_SIZE 2048
// Trim leading and trailing whitespace in place
static void str_trim(char* s) {
// Trim leading
char* start = s;
while (*start == ' ' || *start == '\n' || *start == '\r' ||
*start == '\t') {
start++;
}
if (start != s) {
memmove(s, start, strlen(start) + 1);
}
// Trim trailing
size_t len = strlen(s);
while (len > 0 && (s[len - 1] == ' ' || s[len - 1] == '\n' ||
s[len - 1] == '\r' || s[len - 1] == '\t')) {
s[--len] = '\0';
}
}
// Append character to buffer with bounds check
static void buf_append(char* buf, size_t buf_size, size_t* len, char c) {
if (*len < buf_size - 1) {
buf[*len] = c;
(*len)++;
buf[*len] = '\0';
}
}
// Append yxml data string to buffer
static void buf_append_data(char* buf,
size_t buf_size,
size_t* len,
const char* data) {
for (const char* p = data; *p; p++) {
buf_append(buf, buf_size, len, *p);
}
}
int podcast_parse_opml(const char* path, podcast_feed_t* feeds, int max_feeds) {
storage_file_t f = storage_open(path, "r");
if (!f) {
return -1;
}
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
int count = 0;
// Track current attribute being read
char current_attr[32] = {0};
char attr_text[128] = {0};
size_t attr_text_len = 0;
char attr_url[256] = {0};
size_t attr_url_len = 0;
char attr_type[16] = {0};
size_t attr_type_len = 0;
bool in_outline = false;
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto done;
}
switch (r) {
case YXML_ELEMSTART:
if (strcmp(x.elem, "outline") == 0) {
in_outline = true;
attr_text[0] = '\0';
attr_text_len = 0;
attr_url[0] = '\0';
attr_url_len = 0;
attr_type[0] = '\0';
attr_type_len = 0;
}
break;
case YXML_ATTRSTART:
strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
current_attr[sizeof(current_attr) - 1] = '\0';
break;
case YXML_ATTRVAL:
if (in_outline) {
if (strcmp(current_attr, "text") == 0 ||
strcmp(current_attr, "title") == 0) {
buf_append_data(attr_text, sizeof(attr_text),
&attr_text_len, x.data);
} else if (strcmp(current_attr, "xmlUrl") == 0) {
buf_append_data(attr_url, sizeof(attr_url),
&attr_url_len, x.data);
} else if (strcmp(current_attr, "type") == 0) {
buf_append_data(attr_type, sizeof(attr_type),
&attr_type_len, x.data);
}
}
break;
case YXML_ATTREND:
current_attr[0] = '\0';
break;
case YXML_ELEMEND:
if (in_outline && attr_url[0] != '\0' &&
count < max_feeds) {
// Accept rss type or any outline with xmlUrl
if (attr_type[0] == '\0' ||
strcmp(attr_type, "rss") == 0) {
strncpy(feeds[count].title, attr_text,
sizeof(feeds[count].title) - 1);
feeds[count].title[sizeof(feeds[count].title) - 1] =
'\0';
strncpy(feeds[count].url, attr_url,
sizeof(feeds[count].url) - 1);
feeds[count].url[sizeof(feeds[count].url) - 1] =
'\0';
count++;
}
}
in_outline = false;
break;
default:
break;
}
}
}
done:
storage_close(f);
return count;
}
int podcast_parse_rss(const char* path,
podcast_episode_t* episodes,
int max_episodes) {
storage_file_t f = storage_open(path, "r");
if (!f) {
return -1;
}
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
int count = 0;
// Parser state
bool in_item = false;
int depth = 0;
int item_depth = 0;
char current_elem[64] = {0};
char current_attr[32] = {0};
char title[128] = {0};
size_t title_len = 0;
char guid[128] = {0};
size_t guid_len = 0;
char enclosure_url[256] = {0};
size_t enclosure_url_len = 0;
char enclosure_type[64] = {0};
size_t enclosure_type_len = 0;
char pub_date[64] = {0};
size_t pub_date_len = 0;
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto rss_done;
}
switch (r) {
case YXML_ELEMSTART:
depth++;
strncpy(current_elem, x.elem, sizeof(current_elem) - 1);
current_elem[sizeof(current_elem) - 1] = '\0';
if (strcmp(x.elem, "item") == 0 && !in_item) {
in_item = true;
item_depth = depth;
title[0] = '\0';
title_len = 0;
guid[0] = '\0';
guid_len = 0;
enclosure_url[0] = '\0';
enclosure_url_len = 0;
enclosure_type[0] = '\0';
enclosure_type_len = 0;
pub_date[0] = '\0';
pub_date_len = 0;
}
break;
case YXML_CONTENT:
if (in_item) {
if (strcmp(current_elem, "title") == 0) {
buf_append_data(title, sizeof(title), &title_len,
x.data);
} else if (strcmp(current_elem, "guid") == 0) {
buf_append_data(guid, sizeof(guid), &guid_len,
x.data);
} else if (strcmp(current_elem, "pubDate") == 0) {
buf_append_data(pub_date, sizeof(pub_date),
&pub_date_len, x.data);
}
}
break;
case YXML_ATTRSTART:
strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
current_attr[sizeof(current_attr) - 1] = '\0';
break;
case YXML_ATTRVAL:
if (in_item && strcmp(current_elem, "enclosure") == 0) {
if (strcmp(current_attr, "url") == 0) {
buf_append_data(enclosure_url,
sizeof(enclosure_url),
&enclosure_url_len, x.data);
} else if (strcmp(current_attr, "type") == 0) {
buf_append_data(enclosure_type,
sizeof(enclosure_type),
&enclosure_type_len, x.data);
}
}
break;
case YXML_ATTREND:
current_attr[0] = '\0';
break;
case YXML_ELEMEND:
current_elem[0] = '\0';
if (in_item && depth == item_depth) {
// End of <item>
if (enclosure_url[0] != '\0' &&
strncmp(enclosure_type, "audio/", 6) == 0 &&
count < max_episodes) {
podcast_episode_t* ep = &episodes[count];
strncpy(ep->title, title, sizeof(ep->title) - 1);
ep->title[sizeof(ep->title) - 1] = '\0';
strncpy(ep->guid, guid[0] ? guid : title,
sizeof(ep->guid) - 1);
ep->guid[sizeof(ep->guid) - 1] = '\0';
strncpy(ep->enclosure_url, enclosure_url,
sizeof(ep->enclosure_url) - 1);
ep->enclosure_url[sizeof(ep->enclosure_url) - 1] =
'\0';
strncpy(ep->pub_date, pub_date,
sizeof(ep->pub_date) - 1);
ep->pub_date[sizeof(ep->pub_date) - 1] = '\0';
count++;
}
in_item = false;
}
depth--;
break;
default:
break;
}
}
}
rss_done:
storage_close(f);
return count;
}
void podcast_sanitize_title(const char* title, char* out, size_t out_size) {
if (out_size == 0) {
return;
}
size_t max_len = out_size - 1;
size_t len = 0;
for (const char* p = title; *p && len < max_len; p++) {
char c = *p;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '.' || c == '_' || c == '-') {
out[len++] = c;
} else if (len > 0 && out[len - 1] != '_') {
out[len++] = '_';
}
}
while (len > 0 && out[len - 1] == '_') {
len--;
}
if (len == 0) {
out[0] = '_';
len = 1;
}
out[len] = '\0';
}
// Write XML-escaped string to file
static void write_escaped(storage_file_t f, const char* s) {
for (const char* p = s; *p; p++) {
switch (*p) {
case '&':
storage_write(f, "&", 5);
break;
case '<':
storage_write(f, "<", 4);
break;
case '>':
storage_write(f, ">", 4);
break;
case '"':
storage_write(f, """, 6);
break;
default:
storage_write(f, p, 1);
break;
}
}
}
static void write_str(storage_file_t f, const char* s) {
storage_write(f, s, strlen(s));
}
bool podcast_write_entry(const char* out_path, const podcast_episode_t* ep) {
storage_file_t f = storage_open(out_path, "w");
if (!f) {
return false;
}
write_str(f, "<item>\n");
if (ep->title[0]) {
write_str(f, "<title>");
write_escaped(f, ep->title);
write_str(f, "</title>\n");
}
if (ep->guid[0]) {
write_str(f, "<guid>");
write_escaped(f, ep->guid);
write_str(f, "</guid>\n");
}
if (ep->pub_date[0]) {
write_str(f, "<pubDate>");
write_escaped(f, ep->pub_date);
write_str(f, "</pubDate>\n");
}
if (ep->enclosure_url[0]) {
write_str(f, "<enclosure url=\"");
write_escaped(f, ep->enclosure_url);
write_str(f, "\" type=\"audio/mpeg\"/>\n");
}
write_str(f, "</item>\n");
storage_close(f);
return true;
}
bool podcast_parse_entry(const char* path, podcast_episode_t* ep) {
storage_file_t f = storage_open(path, "r");
if (!f) {
return false;
}
memset(ep, 0, sizeof(*ep));
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
char current_elem[64] = {0};
char current_attr[32] = {0};
size_t title_len = 0;
size_t guid_len = 0;
size_t url_len = 0;
size_t type_len = 0;
size_t date_len = 0;
char enclosure_type[64] = {0};
bool in_item = false;
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto entry_done;
}
switch (r) {
case YXML_ELEMSTART:
strncpy(current_elem, x.elem, sizeof(current_elem) - 1);
current_elem[sizeof(current_elem) - 1] = '\0';
if (strcmp(x.elem, "item") == 0) {
in_item = true;
}
break;
case YXML_CONTENT:
if (!in_item) {
break;
}
if (strcmp(current_elem, "title") == 0) {
buf_append_data(ep->title, sizeof(ep->title),
&title_len, x.data);
} else if (strcmp(current_elem, "guid") == 0) {
buf_append_data(ep->guid, sizeof(ep->guid), &guid_len,
x.data);
} else if (strcmp(current_elem, "pubDate") == 0) {
buf_append_data(ep->pub_date, sizeof(ep->pub_date),
&date_len, x.data);
}
break;
case YXML_ATTRSTART:
strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
current_attr[sizeof(current_attr) - 1] = '\0';
break;
case YXML_ATTRVAL:
if (in_item && strcmp(current_elem, "enclosure") == 0) {
if (strcmp(current_attr, "url") == 0) {
buf_append_data(ep->enclosure_url,
sizeof(ep->enclosure_url), &url_len,
x.data);
} else if (strcmp(current_attr, "type") == 0) {
buf_append_data(enclosure_type,
sizeof(enclosure_type), &type_len,
x.data);
}
}
break;
case YXML_ATTREND:
current_attr[0] = '\0';
break;
default:
break;
}
}
}
entry_done:
storage_close(f);
str_trim(ep->title);
str_trim(ep->guid);
str_trim(ep->pub_date);
str_trim(ep->enclosure_url);
// Use title as guid fallback
if (!ep->guid[0] && ep->title[0]) {
snprintf(ep->guid, sizeof(ep->guid), "%s", ep->title);
}
return ep->title[0] || ep->enclosure_url[0];
}