ginipick commited on
Commit
6085299
ยท
verified ยท
1 Parent(s): 04168dc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +444 -0
app.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ถ„์„ ์‹œ์Šคํ…œ
4
+ - AI Times ๋‰ด์Šค ํฌ๋กค๋ง ๋ฐ ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜
5
+ - ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ/์ŠคํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ •๋ณด ์ˆ˜์ง‘
6
+ - Fireworks AI (Qwen) ๋ฅผ ํ†ตํ•œ ๋‰ด์Šค ๋ถ„์„
7
+ - Brave Search๋ฅผ ํ†ตํ•œ ํŒฉํŠธ ์ฒดํฌ
8
+ """
9
+
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ import json
13
+ from datetime import datetime
14
+ from typing import List, Dict, Optional
15
+ import time
16
+ import re
17
+
18
+
19
+ class AINewsAnalyzer:
20
+ def __init__(self, fireworks_api_key: str, brave_api_key: str):
21
+ """
22
+ Args:
23
+ fireworks_api_key: Fireworks AI API ํ‚ค
24
+ brave_api_key: Brave Search API ํ‚ค
25
+ """
26
+ self.fireworks_api_key = fireworks_api_key
27
+ self.brave_api_key = brave_api_key
28
+
29
+ # ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ์ •์˜
30
+ self.categories = {
31
+ "์‚ฐ์—…๋™ํ–ฅ": ["์‚ฐ์—…", "๊ธฐ์—…", "ํˆฌ์ž", "์ธ์ˆ˜", "ํŒŒํŠธ๋„ˆ์‹ญ", "์‹œ์žฅ"],
32
+ "๊ธฐ์ˆ ํ˜์‹ ": ["๊ธฐ์ˆ ", "๋ชจ๋ธ", "์•Œ๊ณ ๋ฆฌ์ฆ˜", "๊ฐœ๋ฐœ", "์—ฐ๊ตฌ", "๋…ผ๋ฌธ"],
33
+ "์ œํ’ˆ์ถœ์‹œ": ["์ถœ์‹œ", "๊ณต๊ฐœ", "๋ฐœํ‘œ", "์„œ๋น„์Šค", "์ œํ’ˆ"],
34
+ "์ •์ฑ…๊ทœ์ œ": ["๊ทœ์ œ", "์ •์ฑ…", "๋ฒ•", "์ •๋ถ€", "์ œ์žฌ"],
35
+ "๋ณด์•ˆ์ด์Šˆ": ["๋ณด์•ˆ", "์ทจ์•ฝ์ ", "ํ•ดํ‚น", "์œ„ํ—˜", "ํ”„๋ผ์ด๋ฒ„์‹œ"],
36
+ }
37
+
38
+ self.huggingface_data = {
39
+ "models": [],
40
+ "spaces": []
41
+ }
42
+
43
+ self.news_data = []
44
+
45
+ def fetch_aitimes_news(self, urls: List[str]) -> List[Dict]:
46
+ """AI Times ๋‰ด์Šค ํฌ๋กค๋ง"""
47
+ all_news = []
48
+
49
+ for url in urls:
50
+ try:
51
+ print(f"๐Ÿ“ฐ ๋‰ด์Šค ํฌ๋กค๋ง ์ค‘: {url}")
52
+ response = requests.get(url, headers={
53
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
54
+ })
55
+ soup = BeautifulSoup(response.content, 'html.parser')
56
+
57
+ # ๋‰ด์Šค ๊ธฐ์‚ฌ ์ถ”์ถœ (์‹ค์ œ ๊ตฌ์กฐ์— ๋งž๊ฒŒ ์กฐ์ • ํ•„์š”)
58
+ articles = []
59
+
60
+ # ์ œ๋ชฉ๊ณผ ๋งํฌ๊ฐ€ ์žˆ๋Š” a ํƒœ๊ทธ ์ฐพ๊ธฐ
61
+ for link in soup.find_all('a', href=True):
62
+ if '/news/articleView.html' in link['href']:
63
+ title = link.get_text(strip=True)
64
+ article_url = link['href']
65
+
66
+ if not article_url.startswith('http'):
67
+ article_url = 'https://www.aitimes.com' + article_url
68
+
69
+ # ๋‚ ์งœ ์ถ”์ถœ (ํ˜•์ œ ์š”์†Œ์—์„œ)
70
+ date_text = ""
71
+ parent = link.parent
72
+ if parent:
73
+ date_elem = parent.find(text=re.compile(r'\d{2}-\d{2}'))
74
+ if date_elem:
75
+ date_text = date_elem.strip()
76
+
77
+ if title and len(title) > 10:
78
+ articles.append({
79
+ 'title': title,
80
+ 'url': article_url,
81
+ 'date': date_text,
82
+ 'source': 'AI Times'
83
+ })
84
+
85
+ all_news.extend(articles[:10]) # ์ƒ์œ„ 10๊ฐœ๋งŒ
86
+ time.sleep(1) # ํฌ๋กค๋ง ์˜ˆ์˜
87
+
88
+ except Exception as e:
89
+ print(f"โŒ ํฌ๋กค๋ง ์˜ค๋ฅ˜: {e}")
90
+
91
+ return all_news
92
+
93
+ def fetch_huggingface_trending(self) -> Dict:
94
+ """ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ ๋ฐ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘"""
95
+ print("๐Ÿค— ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ •๋ณด ์ˆ˜์ง‘ ์ค‘...")
96
+
97
+ # ๋ชจ๋ธ ํŠธ๋ Œ๋”ฉ
98
+ try:
99
+ models_url = "https://huggingface.co/api/models"
100
+ params = {
101
+ 'sort': 'trending',
102
+ 'limit': 30
103
+ }
104
+
105
+ response = requests.get(models_url, params=params, timeout=10)
106
+ if response.status_code == 200:
107
+ models = response.json()
108
+
109
+ for model in models[:30]:
110
+ self.huggingface_data['models'].append({
111
+ 'name': model.get('id', 'Unknown'),
112
+ 'downloads': model.get('downloads', 0),
113
+ 'likes': model.get('likes', 0),
114
+ 'task': model.get('pipeline_tag', 'N/A'),
115
+ 'url': f"https://huggingface.co/{model.get('id', '')}"
116
+ })
117
+
118
+ print(f"โœ… {len(self.huggingface_data['models'])}๊ฐœ ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ ์ˆ˜์ง‘ ์™„๋ฃŒ")
119
+
120
+ except Exception as e:
121
+ print(f"โŒ ๋ชจ๋ธ ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}")
122
+
123
+ # ์ŠคํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ (์›น ํฌ๋กค๋ง)
124
+ try:
125
+ spaces_url = "https://huggingface.co/spaces"
126
+ response = requests.get(spaces_url, headers={
127
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
128
+ }, timeout=10)
129
+
130
+ soup = BeautifulSoup(response.content, 'html.parser')
131
+
132
+ # ์ŠคํŽ˜์ด์Šค ๋งํฌ ์ถ”์ถœ
133
+ space_count = 0
134
+ for link in soup.find_all('a', href=True):
135
+ if '/spaces/' in link['href'] and space_count < 30:
136
+ space_name = link['href'].replace('/spaces/', '')
137
+ if '/' in space_name and len(space_name) > 3:
138
+ title = link.get_text(strip=True)
139
+ if title:
140
+ self.huggingface_data['spaces'].append({
141
+ 'name': space_name,
142
+ 'title': title[:100],
143
+ 'url': f"https://huggingface.co{link['href']}"
144
+ })
145
+ space_count += 1
146
+
147
+ print(f"โœ… {len(self.huggingface_data['spaces'])}๊ฐœ ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘ ์™„๋ฃŒ")
148
+
149
+ except Exception as e:
150
+ print(f"โŒ ์ŠคํŽ˜์ด์Šค ์ˆ˜์ง‘ ์˜ค๋ฅ˜: {e}")
151
+
152
+ return self.huggingface_data
153
+
154
+ def categorize_news(self, news_list: List[Dict]) -> List[Dict]:
155
+ """๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜"""
156
+ for news in news_list:
157
+ title = news['title'].lower()
158
+ news['category'] = "๊ธฐํƒ€"
159
+
160
+ for category, keywords in self.categories.items():
161
+ if any(keyword in title for keyword in keywords):
162
+ news['category'] = category
163
+ break
164
+
165
+ return news_list
166
+
167
+ def analyze_with_qwen(self, text: str, instruction: str) -> str:
168
+ """Fireworks AI Qwen ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ๋ถ„์„"""
169
+ url = "https://api.fireworks.ai/inference/v1/chat/completions"
170
+
171
+ payload = {
172
+ "model": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
173
+ "max_tokens": 4096,
174
+ "top_p": 1,
175
+ "top_k": 40,
176
+ "presence_penalty": 0,
177
+ "frequency_penalty": 0,
178
+ "temperature": 0.6,
179
+ "messages": [
180
+ {
181
+ "role": "system",
182
+ "content": "๋‹น์‹ ์€ AI ๋‰ด์Šค๋ฅผ ์ดˆ๋“ฑํ•™์ƒ๋„ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ์‰ฝ๊ฒŒ ์„ค๋ช…ํ•˜๋Š” ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
183
+ },
184
+ {
185
+ "role": "user",
186
+ "content": f"{instruction}\n\n๋‰ด์Šค: {text}"
187
+ }
188
+ ]
189
+ }
190
+
191
+ headers = {
192
+ "Accept": "application/json",
193
+ "Content-Type": "application/json",
194
+ "Authorization": f"Bearer {self.fireworks_api_key}"
195
+ }
196
+
197
+ try:
198
+ response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
199
+
200
+ if response.status_code == 200:
201
+ result = response.json()
202
+ return result['choices'][0]['message']['content']
203
+ else:
204
+ return f"๋ถ„์„ ์‹คํŒจ (์ƒํƒœ ์ฝ”๋“œ: {response.status_code})"
205
+
206
+ except Exception as e:
207
+ return f"๋ถ„์„ ์˜ค๋ฅ˜: {str(e)}"
208
+
209
+ def fact_check_with_brave(self, query: str) -> List[Dict]:
210
+ """Brave Search๋ฅผ ํ†ตํ•œ ํŒฉํŠธ ์ฒดํฌ"""
211
+ url = "https://api.search.brave.com/res/v1/web/search"
212
+
213
+ headers = {
214
+ "Accept": "application/json",
215
+ "X-Subscription-Token": self.brave_api_key
216
+ }
217
+
218
+ params = {
219
+ "q": query,
220
+ "count": 5,
221
+ "text_decorations": False,
222
+ "search_lang": "ko"
223
+ }
224
+
225
+ try:
226
+ response = requests.get(url, headers=headers, params=params, timeout=10)
227
+
228
+ if response.status_code == 200:
229
+ data = response.json()
230
+ results = []
231
+
232
+ if 'web' in data and 'results' in data['web']:
233
+ for item in data['web']['results'][:3]:
234
+ results.append({
235
+ 'title': item.get('title', ''),
236
+ 'description': item.get('description', ''),
237
+ 'url': item.get('url', '')
238
+ })
239
+
240
+ return results
241
+ else:
242
+ return []
243
+
244
+ except Exception as e:
245
+ print(f"โŒ Brave Search ์˜ค๋ฅ˜: {e}")
246
+ return []
247
+
248
+ def generate_report(self, news_list: List[Dict], analyze_news: bool = True) -> str:
249
+ """์ข…ํ•ฉ ๋ฆฌํฌํŠธ ์ƒ์„ฑ"""
250
+ report = []
251
+ report.append("=" * 80)
252
+ report.append("๐Ÿ“Š AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ข…ํ•ฉ ๋ฆฌํฌํŠธ")
253
+ report.append(f"๐Ÿ“… ์ƒ์„ฑ์ผ์‹œ: {datetime.now().strftime('%Y๋…„ %m์›” %d์ผ %H:%M')}")
254
+ report.append("=" * 80)
255
+ report.append("")
256
+
257
+ # 1. ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๋‰ด์Šค ๋ถ„์„
258
+ report.append("๐Ÿ“ฐ === AI TIMES ๋‰ด์Šค ๋ถ„์„ ===")
259
+ report.append("")
260
+
261
+ categorized_news = {}
262
+ for news in news_list:
263
+ category = news.get('category', '๊ธฐํƒ€')
264
+ if category not in categorized_news:
265
+ categorized_news[category] = []
266
+ categorized_news[category].append(news)
267
+
268
+ for category, articles in categorized_news.items():
269
+ report.append(f"๐Ÿ“Œ [{category}] ({len(articles)}๊ฑด)")
270
+ report.append("-" * 80)
271
+
272
+ for i, article in enumerate(articles[:5], 1): # ์นดํ…Œ๊ณ ๋ฆฌ๋‹น 5๊ฐœ๋งŒ
273
+ report.append(f"{i}. {article['title']}")
274
+ report.append(f" ๐Ÿ”— {article['url']}")
275
+ report.append(f" ๐Ÿ“… {article.get('date', 'N/A')}")
276
+
277
+ # LLM ๋ถ„์„ (์„ ํƒ์ )
278
+ if analyze_news and i <= 2: # ๊ฐ ์นดํ…Œ๊ณ ๋ฆฌ ์ƒ์œ„ 2๊ฐœ๋งŒ ๋ถ„์„
279
+ print(f"๐Ÿค– LLM ๋ถ„์„ ์ค‘: {article['title'][:50]}...")
280
+
281
+ instruction = """์ด ๋‰ด์Šค๋ฅผ ๋‹ค์Œ ํ˜•์‹์œผ๋กœ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”:
282
+ 1. ํ•ต์‹ฌ ๋‚ด์šฉ (2-3๋ฌธ์žฅ, ์ดˆ๋“ฑํ•™์ƒ ์ˆ˜์ค€)
283
+ 2. ์™œ ์ค‘์š”ํ•œ๊ฐ€? (1-2๋ฌธ์žฅ)
284
+ 3. ๋‹น์‹ ์ด ํ•ด์•ผ ํ•  ํ–‰๋™ (1-2๊ฐœ ํ•ญ๋ชฉ)
285
+
286
+ ๊ฐ„๊ฒฐํ•˜๊ณ  ๋ช…ํ™•ํ•˜๊ฒŒ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”."""
287
+
288
+ analysis = self.analyze_with_qwen(article['title'], instruction)
289
+ report.append(f"\n ๐Ÿค– AI ๋ถ„์„:")
290
+ for line in analysis.split('\n'):
291
+ if line.strip():
292
+ report.append(f" {line.strip()}")
293
+
294
+ # ํŒฉํŠธ ์ฒดํฌ (์„ ํƒ์ )
295
+ fact_check = self.fact_check_with_brave(article['title'][:100])
296
+ if fact_check:
297
+ report.append(f"\n โœ… ํŒฉํŠธ ์ฒดํฌ (Brave Search):")
298
+ for fc in fact_check[:2]:
299
+ report.append(f" โ€ข {fc['title']}")
300
+ report.append(f" {fc['url']}")
301
+
302
+ time.sleep(2) # API ๋ ˆ์ดํŠธ ๋ฆฌ๋ฐ‹ ๊ณ ๋ ค
303
+
304
+ report.append("")
305
+
306
+ report.append("")
307
+
308
+ # 2. ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ
309
+ report.append("๐Ÿค— === ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ TOP 30 ===")
310
+ report.append("")
311
+
312
+ # ๋ชจ๋ธ
313
+ report.append("๐Ÿ”ฅ ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ TOP 30")
314
+ report.append("-" * 80)
315
+ for i, model in enumerate(self.huggingface_data['models'][:30], 1):
316
+ report.append(f"{i:2d}. {model['name']}")
317
+ report.append(f" ๐Ÿ“Š ๋‹ค์šด๋กœ๋“œ: {model['downloads']:,} | โค๏ธ ์ข‹์•„์š”: {model['likes']:,}")
318
+ report.append(f" ๐Ÿท๏ธ Task: {model['task']}")
319
+ report.append(f" ๐Ÿ”— {model['url']}")
320
+ report.append("")
321
+
322
+ report.append("")
323
+
324
+ # ์ŠคํŽ˜์ด์Šค
325
+ report.append("๐Ÿš€ ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค TOP 30")
326
+ report.append("-" * 80)
327
+ for i, space in enumerate(self.huggingface_data['spaces'][:30], 1):
328
+ report.append(f"{i:2d}. {space['name']}")
329
+ report.append(f" ๐Ÿ“ {space['title']}")
330
+ report.append(f" ๐Ÿ”— {space['url']}")
331
+ report.append("")
332
+
333
+ # 3. ์ข…ํ•ฉ ์š”์•ฝ
334
+ report.append("=" * 80)
335
+ report.append("๐Ÿ“ˆ ์ข…ํ•ฉ ์š”์•ฝ")
336
+ report.append("=" * 80)
337
+ report.append(f"โ€ข ์ด ๋‰ด์Šค ์ˆ˜์ง‘: {len(news_list)}๊ฑด")
338
+ report.append(f"โ€ข ์นดํ…Œ๊ณ ๋ฆฌ ์ˆ˜: {len(categorized_news)}๊ฐœ")
339
+ report.append(f"โ€ข ํŠธ๋ Œ๋”ฉ ๋ชจ๋ธ: {len(self.huggingface_data['models'])}๊ฐœ")
340
+ report.append(f"โ€ข ํŠธ๋ Œ๋”ฉ ์ŠคํŽ˜์ด์Šค: {len(self.huggingface_data['spaces'])}๊ฐœ")
341
+ report.append("")
342
+
343
+ return '\n'.join(report)
344
+
345
+ def run_full_analysis(self, news_urls: List[str], analyze_with_llm: bool = True) -> str:
346
+ """์ „์ฒด ๋ถ„์„ ์‹คํ–‰"""
347
+ print("๐Ÿš€ AI ๋‰ด์Šค & ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ๋ถ„์„ ์‹œ์ž‘...")
348
+ print("")
349
+
350
+ # 1. ๋‰ด์Šค ์ˆ˜์ง‘
351
+ news_list = self.fetch_aitimes_news(news_urls)
352
+ print(f"โœ… ์ด {len(news_list)}๊ฑด์˜ ๋‰ด์Šค ์ˆ˜์ง‘ ์™„๋ฃŒ")
353
+ print("")
354
+
355
+ # 2. ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜
356
+ categorized_news = self.categorize_news(news_list)
357
+ print("โœ… ๋‰ด์Šค ์นดํ…Œ๊ณ ๋ฆฌ ๋ถ„๋ฅ˜ ์™„๋ฃŒ")
358
+ print("")
359
+
360
+ # 3. ํ—ˆ๊น…ํŽ˜์ด์Šค ํŠธ๋ Œ๋”ฉ ์ˆ˜์ง‘
361
+ self.fetch_huggingface_trending()
362
+ print("")
363
+
364
+ # 4. ๋ฆฌํฌํŠธ ์ƒ์„ฑ
365
+ print("๐Ÿ“ ๋ฆฌํฌํŠธ ์ƒ์„ฑ ์ค‘...")
366
+ report = self.generate_report(categorized_news, analyze_news=analyze_with_llm)
367
+
368
+ print("")
369
+ print("โœ… ๋ถ„์„ ์™„๋ฃŒ!")
370
+
371
+ return report
372
+
373
+ def save_report(self, report: str, filename: str = None):
374
+ """๋ฆฌํฌํŠธ ์ €์žฅ"""
375
+ if filename is None:
376
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
377
+ filename = f"ai_news_report_{timestamp}.txt"
378
+
379
+ with open(filename, 'w', encoding='utf-8') as f:
380
+ f.write(report)
381
+
382
+ print(f"๐Ÿ’พ ๋ฆฌํฌํŠธ ์ €์žฅ ์™„๋ฃŒ: {filename}")
383
+
384
+
385
+ # ==================== ์‚ฌ์šฉ ์˜ˆ์‹œ ====================
386
+
387
+ def main():
388
+ """๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
389
+
390
+ # API ํ‚ค ์„ค์ •
391
+ FIREWORKS_API_KEY = "YOUR_FIREWORKS_API_KEY" # ์—ฌ๊ธฐ์— Fireworks API ํ‚ค ์ž…๋ ฅ
392
+ BRAVE_API_KEY = "YOUR_BRAVE_API_KEY" # ์—ฌ๊ธฐ์— Brave Search API ํ‚ค ์ž…๋ ฅ
393
+
394
+ # AI Times ๋‰ด์Šค URL
395
+ news_urls = [
396
+ "https://www.aitimes.com/news/articleList.html?sc_multi_code=S2&view_type=sm", # AI ์‚ฐ์—…
397
+ "https://www.aitimes.com/news/articleList.html?sc_section_code=S1N24&view_type=sm" # AI ๊ธฐ์ˆ 
398
+ ]
399
+
400
+ # ๋ถ„์„๊ธฐ ์ดˆ๊ธฐํ™”
401
+ analyzer = AINewsAnalyzer(
402
+ fireworks_api_key=FIREWORKS_API_KEY,
403
+ brave_api_key=BRAVE_API_KEY
404
+ )
405
+
406
+ # ์ „์ฒด ๋ถ„์„ ์‹คํ–‰
407
+ # analyze_with_llm=False๋กœ ์„ค์ •ํ•˜๋ฉด LLM ๋ถ„์„ ์—†์ด ๋น ๋ฅด๊ฒŒ ์ˆ˜์ง‘๋งŒ ํ•จ
408
+ report = analyzer.run_full_analysis(
409
+ news_urls=news_urls,
410
+ analyze_with_llm=True # LLM ๋ถ„์„ ํ™œ์„ฑํ™” (์‹œ๊ฐ„์ด ์˜ค๋ž˜ ๊ฑธ๋ฆผ)
411
+ )
412
+
413
+ # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
414
+ print("\n" + "=" * 80)
415
+ print(report)
416
+
417
+ # ํŒŒ์ผ ์ €์žฅ
418
+ analyzer.save_report(report)
419
+
420
+
421
+ if __name__ == "__main__":
422
+ main()
423
+
424
+
425
+ # ==================== ์‚ฌ์šฉ ํŒ ====================
426
+ """
427
+ 1. API ํ‚ค ์„ค์ •:
428
+ - Fireworks AI: https://fireworks.ai/
429
+ - Brave Search: https://brave.com/search/api/
430
+
431
+ 2. ๋น ๋ฅธ ํ…Œ์ŠคํŠธ (LLM ๋ถ„์„ ์—†์ด):
432
+ analyzer.run_full_analysis(news_urls, analyze_with_llm=False)
433
+
434
+ 3. ํŠน์ • ์นดํ…Œ๊ณ ๋ฆฌ๋งŒ ๋ถ„์„:
435
+ categorized_news์—์„œ ์›ํ•˜๋Š” ์นดํ…Œ๊ณ ๋ฆฌ ํ•„ํ„ฐ๋ง
436
+
437
+ 4. ํฌ๋กค๋ง ์ฃผ๊ธฐ ์กฐ์ •:
438
+ time.sleep() ๊ฐ’์„ ์กฐ์ •ํ•˜์—ฌ ์†๋„/์•ˆ์ •์„ฑ ๊ท ํ˜•
439
+
440
+ 5. ๊ฒฐ๊ณผ ํ™œ์šฉ:
441
+ - JSON์œผ๋กœ ์ €์žฅ: json.dumps(analyzer.huggingface_data)
442
+ - ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ €์žฅ
443
+ - ๋Œ€์‹œ๋ณด๋“œ ์—ฐ๋™
444
+ """