Files
cf-zeiten/core/data_processor.py
2026-02-21 20:44:40 +01:00

84 lines
3.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import requests
import markdown
from io import StringIO
from datetime import datetime, timedelta
from .config_loader import config
_cache = {"events": None, "remarks": None, "timestamp": None}
HEADERS = {"User-Agent": "Mozilla/5.0"}
def invalidate_cache():
global _cache
_cache = {"events": None, "remarks": None, "timestamp": None}
return "Cache gelöscht"
def _is_cache_valid():
if _cache["timestamp"] is None:
return False
return (datetime.now() - _cache["timestamp"]) < timedelta(hours=1)
def get_upcoming_events(days_to_show=None, limit=None):
# Sofort Standardwert aus Config setzen, falls None oder 0
if not days_to_show:
days_to_show = config['processing']['days_to_show']
# 1. Daten laden (entweder aus Cache oder von Google)
if not _is_cache_valid() or _cache["events"] is None:
url = config['links']['times_csv']
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
response.encoding = 'utf-8'
df = pd.read_csv(StringIO(response.text))
# Typografie: Bindestrich durch En-Dash () ersetzen
for col in ['Morgen', 'Nachmittag']:
df[col] = df[col].fillna('').astype(str).str.replace('-', ' ', regex=False)
date_col = config['google_sheet']['date_column']
df = df.dropna(subset=[date_col])
df[date_col] = pd.to_datetime(df[date_col], dayfirst=True, errors='coerce')
wt_map = {0: 'Mo', 1: 'Di', 2: 'Mi', 3: 'Do', 4: 'Fr', 5: 'Sa', 6: 'So'}
df['Wochentag'] = df[date_col].dt.weekday.map(wt_map)
_cache["events"] = df.sort_values(by=date_col).to_dict(orient='records')
_cache["timestamp"] = datetime.now()
heute = pd.Timestamp(datetime.now().date())
date_col = config['google_sheet']['date_column']
# PRIORITÄT 1: Zeilen-Limit (gewinnt immer)
if limit and limit > 0:
return [e for e in _cache["events"] if e[date_col] >= heute][:limit]
# PRIORITÄT 2: Tage-Logik
ende = heute + timedelta(days=int(days_to_show))
return [e for e in _cache["events"] if heute <= e[date_col] < ende]
def get_remarks():
if _is_cache_valid() and _cache["remarks"] is not None:
return _cache["remarks"]
url = config['links']['remarks_csv']
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
response.encoding = 'utf-8'
df = pd.read_csv(StringIO(response.text), skiprows=2, header=None)
if not df.empty:
raw_remarks = df[0].dropna().astype(str).tolist()
processed = []
for r in raw_remarks:
html = markdown.markdown(r.strip())
if html.startswith("<p>") and html.endswith("</p>"):
html = html[3:-4]
processed.append(html)
_cache["remarks"] = processed
_cache["timestamp"] = datetime.now()
else:
_cache["remarks"] = []
return _cache["remarks"]