kspsuti-teacher-schedule/app.py

from flask import Flask, render_template, jsonify, request
import requests
from bs4 import BeautifulSoup
import re
import yaml
import os

app = Flask(__name__)

# ── Загрузка конфига ─────────────────────────────────────────────────────────
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")

def load_config():
    with open(CONFIG_PATH, "r", encoding="utf-8") as f:
        return yaml.safe_load(f)

config = load_config()
BASE_URL = config["base_url"]
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}


# ── Парсер ───────────────────────────────────────────────────────────────────
def fetch_schedule(obj=None, wk=None):
    if obj is None:
        obj = DEFAULT_TEACHER

    params = {"mn": "3", "obj": obj}
    if wk:
        params["wk"] = wk

    try:
        resp = requests.get(BASE_URL, params=params, timeout=10)
        resp.encoding = "utf-8"
        html = resp.text
    except Exception as e:
        return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None}

    return parse_schedule(html)


def parse_schedule(html):
    soup = BeautifulSoup(html, "lxml")
    result = {
        "days": [],
        "week_range": "",
        "prev_wk": None,
        "next_wk": None,
        "error": None
    }

    # Диапазон недели
    for td in soup.find_all("td"):
        text = td.get_text(strip=True)
        if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
            result["week_range"] = text
            break

    # Ссылки на соседние недели
    seen_wk = set()
    for a in soup.find_all("a", href=True):
        href = a["href"]
        if "wk=" not in href:
            continue
        wk_val = re.search(r"wk=(\d+)", href)
        if not wk_val:
            continue
        wk_num = int(wk_val.group(1))
        if wk_num in seen_wk:
            continue
        seen_wk.add(wk_num)
        text = a.get_text(strip=True).lower()
        if "предыдущая" in text and result["prev_wk"] is None:
            result["prev_wk"] = wk_num
        elif "следующая" in text and result["next_wk"] is None:
            result["next_wk"] = wk_num

    # Дни недели
    day_anchors = soup.find_all("a", class_="t_wth")

    for anchor in day_anchors:
        day_text = anchor.get_text(strip=True)
        m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text)
        if not m:
            continue

        day_name, day_date, week_num = m.group(1), m.group(2), m.group(3)

        # Находим родительскую таблицу с парами
        parent_table = anchor
        for _ in range(10):
            parent_table = parent_table.parent
            if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1":
                break

        lessons = []
        if parent_table:
            for row in parent_table.find_all("tr"):
                tds = row.find_all("td", recursive=False)
                if len(tds) != 4:
                    continue

                num_td   = tds[0].get_text(strip=True)
                time_td  = tds[1].get_text(strip=True)
                subj_td  = tds[2]
                room_td  = tds[3].get_text(strip=True)

                if not re.match(r"^\d+$", num_td):
                    continue

                info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""}

                bold = subj_td.find("b")
                if bold:
                    info["subject"] = bold.get_text(strip=True)

                font_green = subj_td.find("font", class_="t_green_10")
                if font_green:
                    info["location"] = font_green.get_text(strip=True)

                raw = ""
                if bold:
                    for node in bold.next_siblings:
                        if hasattr(node, "name"):
                            if node.name == "font":
                                break
                            if node.name == "br":
                                continue
                            raw += node.get_text(strip=True)
                        else:
                            raw += str(node).strip()
                raw = raw.strip()

                if raw:
                    info["group"] = raw
                    m_grp = re.search(r'\(([^)]+)\)', raw)
                    if m_grp:
                        info["group_short"] = m_grp.group(1)

                    after = raw[raw.find(")")+1:].strip() if ")" in raw else ""
                    if after:
                        unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip())
                        inner = re.search(r'\(([^()]+)\)\s*$', unwrapped)
                        info["lesson_type"] = inner.group(1) if inner else unwrapped

                lessons.append({
                    "num":         num_td,
                    "time":        time_td,
                    "subject":     info["subject"],
                    "group":       info["group"],
                    "group_short": info["group_short"],
                    "lesson_type": info["lesson_type"],
                    "location":    info["location"],
                    "room":        room_td,
                    "has_class":   bool(info["subject"])
                })

        result["days"].append({
            "name":        day_name,
            "date":        day_date,
            "week_num":    week_num,
            "lessons":     lessons,
            "has_classes": any(l["has_class"] for l in lessons)
        })

    return result


# ── Кэш списка преподавателей ────────────────────────────────────────────────
_teachers_cache = {"data": None, "ts": 0}
CACHE_TTL = 3600  # обновлять раз в час

def fetch_all_teachers():
    import time
    now = time.time()
    if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL:
        return _teachers_cache["data"]
    try:
        resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10)
        resp.encoding = "utf-8"
        soup = BeautifulSoup(resp.text, "lxml")
        teachers = []
        for a in soup.find_all("a", href=lambda h: h and "obj=" in h):
            m = re.search(r"obj=(\d+)", a["href"])
            bold = a.find("b")
            if m and bold:
                teachers.append({
                    "id": m.group(1),
                    "name": bold.get_text(strip=True)
                })
        _teachers_cache["data"] = teachers
        _teachers_cache["ts"] = now
        return teachers
    except Exception as e:
        return []

@app.route("/api/all-teachers")
def api_all_teachers():
    """Полный список преподавателей с сайта (с кэшем на 1 час)."""
    teachers = fetch_all_teachers()
    return jsonify({"teachers": teachers, "count": len(teachers)})

# ── Маршруты ─────────────────────────────────────────────────────────────────
@app.route("/")
def index():
    return render_template("index.html")

@app.route("/api/teachers")
def api_teachers():
    teachers_list = [
        {"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])}
        for t in config.get("teachers", [])
    ]
    return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER})

@app.route("/api/schedule")
def api_schedule():
    obj = request.args.get("obj", DEFAULT_TEACHER)
    wk  = request.args.get("wk", None)
    # Имя: сначала из конфига, потом из полного списка
    teacher_info = TEACHERS.get(str(obj), {})
    name = teacher_info.get("name", "")
    if not name:
        all_t = fetch_all_teachers()
        match = next((t for t in all_t if t["id"] == str(obj)), None)
        if match:
            name = match["name"]
    data = fetch_schedule(obj=obj, wk=wk)
    data["teacher_name"] = name
    return jsonify(data)

@app.route("/api/reload-config")
def api_reload():
    global config, BASE_URL, DEFAULT_TEACHER, TEACHERS
    config = load_config()
    BASE_URL        = config["base_url"]
    DEFAULT_TEACHER = str(config.get("default_teacher", ""))
    TEACHERS        = {str(t["id"]): t for t in config.get("teachers", [])}
    return jsonify({"ok": True, "teachers_count": len(TEACHERS)})


#if __name__ == "__main__":
#    app.run(debug=False, host="0.0.0.0", port=5609)

@app.route("/api/debug")
def api_debug():
    """Отладка: показывает что нашёл парсер."""
    obj = request.args.get("obj", DEFAULT_TEACHER)
    params = {"mn": "3", "obj": obj}
    try:
        resp = requests.get(BASE_URL, params=params, timeout=10)
        resp.encoding = "utf-8"
        html = resp.text
    except Exception as e:
        return jsonify({"error": str(e)})

    soup = BeautifulSoup(html, "lxml")
    day_anchors = soup.find_all("a", class_="t_wth")
    week_range = ""
    for td in soup.find_all("td"):
        text = td.get_text(strip=True)
        if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
            week_range = text
            break

    return jsonify({
        "html_length": len(html),
        "html_snippet": html[:300],
        "week_range_found": week_range,
        "day_anchors_count": len(day_anchors),
        "day_anchors": [a.get_text(strip=True) for a in day_anchors],
        "full_parse": parse_schedule(html),
    })