text/plain
•
8.47 KB
•
354 lines
#include "podcast_xml.h"
#include <string.h>
#include "storage.h"
#include "yxml.h"
#define XML_BUF_SIZE 1024
#define XML_STACK_SIZE 2048
// Append character to buffer with bounds check
static void buf_append(char* buf, size_t buf_size, size_t* len, char c) {
if (*len < buf_size - 1) {
buf[*len] = c;
(*len)++;
buf[*len] = '\0';
}
}
// Append yxml data string to buffer
static void buf_append_data(char* buf,
size_t buf_size,
size_t* len,
const char* data) {
for (const char* p = data; *p; p++) {
buf_append(buf, buf_size, len, *p);
}
}
int podcast_parse_opml(const char* path, podcast_feed_t* feeds, int max_feeds) {
storage_file_t f = storage_open(path, "r");
if (!f) {
return -1;
}
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
int count = 0;
// Track current attribute being read
char current_attr[32] = {0};
char attr_text[128] = {0};
size_t attr_text_len = 0;
char attr_url[256] = {0};
size_t attr_url_len = 0;
char attr_type[16] = {0};
size_t attr_type_len = 0;
bool in_outline = false;
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto done;
}
switch (r) {
case YXML_ELEMSTART:
if (strcmp(x.elem, "outline") == 0) {
in_outline = true;
attr_text[0] = '\0';
attr_text_len = 0;
attr_url[0] = '\0';
attr_url_len = 0;
attr_type[0] = '\0';
attr_type_len = 0;
}
break;
case YXML_ATTRSTART:
strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
current_attr[sizeof(current_attr) - 1] = '\0';
break;
case YXML_ATTRVAL:
if (in_outline) {
if (strcmp(current_attr, "text") == 0 ||
strcmp(current_attr, "title") == 0) {
buf_append_data(attr_text, sizeof(attr_text),
&attr_text_len, x.data);
} else if (strcmp(current_attr, "xmlUrl") == 0) {
buf_append_data(attr_url, sizeof(attr_url),
&attr_url_len, x.data);
} else if (strcmp(current_attr, "type") == 0) {
buf_append_data(attr_type, sizeof(attr_type),
&attr_type_len, x.data);
}
}
break;
case YXML_ATTREND:
current_attr[0] = '\0';
break;
case YXML_ELEMEND:
if (in_outline && attr_url[0] != '\0' &&
count < max_feeds) {
// Accept rss type or any outline with xmlUrl
if (attr_type[0] == '\0' ||
strcmp(attr_type, "rss") == 0) {
strncpy(feeds[count].title, attr_text,
sizeof(feeds[count].title) - 1);
feeds[count].title[sizeof(feeds[count].title) - 1] =
'\0';
strncpy(feeds[count].url, attr_url,
sizeof(feeds[count].url) - 1);
feeds[count].url[sizeof(feeds[count].url) - 1] =
'\0';
count++;
}
}
in_outline = false;
break;
default:
break;
}
}
}
done:
storage_close(f);
return count;
}
int podcast_parse_rss(const char* path,
podcast_episode_t* episodes,
int max_episodes) {
storage_file_t f = storage_open(path, "r");
if (!f) {
return -1;
}
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
int count = 0;
// Parser state
bool in_item = false;
int depth = 0;
int item_depth = 0;
char current_elem[64] = {0};
char current_attr[32] = {0};
char title[128] = {0};
size_t title_len = 0;
char guid[128] = {0};
size_t guid_len = 0;
char enclosure_url[256] = {0};
size_t enclosure_url_len = 0;
char enclosure_type[64] = {0};
size_t enclosure_type_len = 0;
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto rss_done;
}
switch (r) {
case YXML_ELEMSTART:
depth++;
strncpy(current_elem, x.elem, sizeof(current_elem) - 1);
current_elem[sizeof(current_elem) - 1] = '\0';
if (strcmp(x.elem, "item") == 0 && !in_item) {
in_item = true;
item_depth = depth;
title[0] = '\0';
title_len = 0;
guid[0] = '\0';
guid_len = 0;
enclosure_url[0] = '\0';
enclosure_url_len = 0;
enclosure_type[0] = '\0';
enclosure_type_len = 0;
}
break;
case YXML_CONTENT:
if (in_item) {
if (strcmp(current_elem, "title") == 0) {
buf_append_data(title, sizeof(title), &title_len,
x.data);
} else if (strcmp(current_elem, "guid") == 0) {
buf_append_data(guid, sizeof(guid), &guid_len,
x.data);
}
}
break;
case YXML_ATTRSTART:
strncpy(current_attr, x.attr, sizeof(current_attr) - 1);
current_attr[sizeof(current_attr) - 1] = '\0';
break;
case YXML_ATTRVAL:
if (in_item && strcmp(current_elem, "enclosure") == 0) {
if (strcmp(current_attr, "url") == 0) {
buf_append_data(enclosure_url,
sizeof(enclosure_url),
&enclosure_url_len, x.data);
} else if (strcmp(current_attr, "type") == 0) {
buf_append_data(enclosure_type,
sizeof(enclosure_type),
&enclosure_type_len, x.data);
}
}
break;
case YXML_ATTREND:
current_attr[0] = '\0';
break;
case YXML_ELEMEND:
if (in_item && depth == item_depth) {
// End of <item>
if (enclosure_url[0] != '\0' &&
strncmp(enclosure_type, "audio/", 6) == 0 &&
count < max_episodes) {
podcast_episode_t* ep = &episodes[count];
strncpy(ep->title, title, sizeof(ep->title) - 1);
ep->title[sizeof(ep->title) - 1] = '\0';
strncpy(ep->guid, guid[0] ? guid : title,
sizeof(ep->guid) - 1);
ep->guid[sizeof(ep->guid) - 1] = '\0';
strncpy(ep->enclosure_url, enclosure_url,
sizeof(ep->enclosure_url) - 1);
ep->enclosure_url[sizeof(ep->enclosure_url) - 1] =
'\0';
count++;
}
in_item = false;
}
depth--;
break;
default:
break;
}
}
}
rss_done:
storage_close(f);
return count;
}
void podcast_sanitize_title(const char* title, char* out, size_t out_size) {
if (out_size == 0) {
return;
}
size_t max_len = out_size - 1;
size_t len = 0;
for (const char* p = title; *p && len < max_len; p++) {
char c = *p;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') || c == '.' || c == '_' || c == '-') {
out[len++] = c;
} else if (len > 0 && out[len - 1] != '_') {
out[len++] = '_';
}
}
while (len > 0 && out[len - 1] == '_') {
len--;
}
if (len == 0) {
out[0] = '_';
len = 1;
}
out[len] = '\0';
}
bool podcast_extract_item(const char* rss_path,
int item_index,
const char* out_path) {
storage_file_t f = storage_open(rss_path, "r");
if (!f) {
return false;
}
storage_file_t out = storage_open(out_path, "w");
if (!out) {
storage_close(f);
return false;
}
unsigned char stack[XML_STACK_SIZE];
yxml_t x;
yxml_init(&x, stack, sizeof(stack));
char read_buf[XML_BUF_SIZE];
int current_item = -1;
int depth = 0;
int item_depth = 0;
bool in_target = false;
bool found = false;
// We need to capture raw XML bytes while inside the target item.
// Track when we enter/leave the target item and write raw bytes.
size_t n;
while ((n = storage_read(f, read_buf, sizeof(read_buf))) > 0) {
for (size_t i = 0; i < n; i++) {
yxml_ret_t r = yxml_parse(&x, read_buf[i]);
if (r < 0) {
goto extract_done;
}
if (r == YXML_ELEMSTART) {
depth++;
if (strcmp(x.elem, "item") == 0 && current_item < item_depth) {
current_item++;
item_depth = depth;
if (current_item == item_index) {
in_target = true;
// Write opening tag
const char* open = "<item>";
storage_write(out, open, strlen(open));
}
}
}
if (in_target && r != YXML_ELEMSTART) {
// Write raw byte
storage_write(out, &read_buf[i], 1);
}
if (r == YXML_ELEMEND) {
if (in_target && depth == item_depth) {
in_target = false;
found = true;
goto extract_done;
}
depth--;
}
}
}
extract_done:
storage_close(f);
storage_close(out);
if (!found) {
storage_remove(out_path);
}
return found;
}