File size: 1,153 Bytes
4e6bed1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import feedparser
import requests
import streamlit as st
from tqdm import tqdm
from pathlib import Path


@st.cache
def get_matadata():
    METADATA_MAP = {}

    transcript_path = Path("making_sense_transcripts/")
    MAKING_SENSE_RSS = "https://wakingup.libsyn.com/rss"
    response = requests.get(MAKING_SENSE_RSS)
    rss_feed = feedparser.parse(response.content)

    podcast_thumbnail = rss_feed.feed["image"]["href"]
    for episode in tqdm(rss_feed.entries, total=len(rss_feed.entries)):
        title = episode["title"]
        desc = episode["description"]
        episode_id = episode["id"]

        thumbnail = episode.get("image", {}).get("href", None)
        if not thumbnail:
            thumbnail = podcast_thumbnail

        if "/" not in episode_id:
            episode_path = transcript_path / (episode_id + ".txt")
        else:
            episode_path = transcript_path / (episode_id.replace("/", "_") + ".txt")

        episode_path = str(episode_path)

        METADATA_MAP[episode_path] = {
            "title": title,
            "episode_id": episode_id,
            "thumbnail": thumbnail,
        }
    return METADATA_MAP