Initial commit

2026-03-02 14:21:02 +04:00
commit d78668ce3a
11 changed files with 1278 additions and 0 deletions
--- a/app.py
+++ b/app.py
@@ -0,0 +1,274 @@
+from flask import Flask, render_template, jsonify, request
+import requests
+from bs4 import BeautifulSoup
+import re
+import yaml
+import os
+
+app = Flask(__name__)
+
+# ── Загрузка конфига ─────────────────────────────────────────────────────────
+CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
+
+def load_config():
+    with open(CONFIG_PATH, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+config = load_config()
+BASE_URL = config["base_url"]
+DEFAULT_TEACHER = str(config.get("default_teacher", ""))
+TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
+
+
+# ── Парсер ───────────────────────────────────────────────────────────────────
+def fetch_schedule(obj=None, wk=None):
+    if obj is None:
+        obj = DEFAULT_TEACHER
+
+    params = {"mn": "3", "obj": obj}
+    if wk:
+        params["wk"] = wk
+
+    try:
+        resp = requests.get(BASE_URL, params=params, timeout=10)
+        resp.encoding = "utf-8"
+        html = resp.text
+    except Exception as e:
+        return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None}
+
+    return parse_schedule(html)
+
+
+def parse_schedule(html):
+    soup = BeautifulSoup(html, "lxml")
+    result = {
+        "days": [],
+        "week_range": "",
+        "prev_wk": None,
+        "next_wk": None,
+        "error": None
+    }
+
+    # Диапазон недели
+    for td in soup.find_all("td"):
+        text = td.get_text(strip=True)
+        if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
+            result["week_range"] = text
+            break
+
+    # Ссылки на соседние недели
+    seen_wk = set()
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        if "wk=" not in href:
+            continue
+        wk_val = re.search(r"wk=(\d+)", href)
+        if not wk_val:
+            continue
+        wk_num = int(wk_val.group(1))
+        if wk_num in seen_wk:
+            continue
+        seen_wk.add(wk_num)
+        text = a.get_text(strip=True).lower()
+        if "предыдущая" in text and result["prev_wk"] is None:
+            result["prev_wk"] = wk_num
+        elif "следующая" in text and result["next_wk"] is None:
+            result["next_wk"] = wk_num
+
+    # Дни недели
+    day_anchors = soup.find_all("a", class_="t_wth")
+
+    for anchor in day_anchors:
+        day_text = anchor.get_text(strip=True)
+        m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text)
+        if not m:
+            continue
+
+        day_name, day_date, week_num = m.group(1), m.group(2), m.group(3)
+
+        # Находим родительскую таблицу с парами
+        parent_table = anchor
+        for _ in range(10):
+            parent_table = parent_table.parent
+            if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1":
+                break
+
+        lessons = []
+        if parent_table:
+            for row in parent_table.find_all("tr"):
+                tds = row.find_all("td", recursive=False)
+                if len(tds) != 4:
+                    continue
+
+                num_td   = tds[0].get_text(strip=True)
+                time_td  = tds[1].get_text(strip=True)
+                subj_td  = tds[2]
+                room_td  = tds[3].get_text(strip=True)
+
+                if not re.match(r"^\d+$", num_td):
+                    continue
+
+                info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""}
+
+                bold = subj_td.find("b")
+                if bold:
+                    info["subject"] = bold.get_text(strip=True)
+
+                font_green = subj_td.find("font", class_="t_green_10")
+                if font_green:
+                    info["location"] = font_green.get_text(strip=True)
+
+                raw = ""
+                if bold:
+                    for node in bold.next_siblings:
+                        if hasattr(node, "name"):
+                            if node.name == "font":
+                                break
+                            if node.name == "br":
+                                continue
+                            raw += node.get_text(strip=True)
+                        else:
+                            raw += str(node).strip()
+                raw = raw.strip()
+
+                if raw:
+                    info["group"] = raw
+                    m_grp = re.search(r'\(([^)]+)\)', raw)
+                    if m_grp:
+                        info["group_short"] = m_grp.group(1)
+
+                    after = raw[raw.find(")")+1:].strip() if ")" in raw else ""
+                    if after:
+                        unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip())
+                        inner = re.search(r'\(([^()]+)\)\s*$', unwrapped)
+                        info["lesson_type"] = inner.group(1) if inner else unwrapped
+
+                lessons.append({
+                    "num":         num_td,
+                    "time":        time_td,
+                    "subject":     info["subject"],
+                    "group":       info["group"],
+                    "group_short": info["group_short"],
+                    "lesson_type": info["lesson_type"],
+                    "location":    info["location"],
+                    "room":        room_td,
+                    "has_class":   bool(info["subject"])
+                })
+
+        result["days"].append({
+            "name":        day_name,
+            "date":        day_date,
+            "week_num":    week_num,
+            "lessons":     lessons,
+            "has_classes": any(l["has_class"] for l in lessons)
+        })
+
+    return result
+
+
+
+# ── Кэш списка преподавателей ────────────────────────────────────────────────
+_teachers_cache = {"data": None, "ts": 0}
+CACHE_TTL = 3600  # обновлять раз в час
+
+def fetch_all_teachers():
+    import time
+    now = time.time()
+    if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL:
+        return _teachers_cache["data"]
+    try:
+        resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10)
+        resp.encoding = "utf-8"
+        soup = BeautifulSoup(resp.text, "lxml")
+        teachers = []
+        for a in soup.find_all("a", href=lambda h: h and "obj=" in h):
+            m = re.search(r"obj=(\d+)", a["href"])
+            bold = a.find("b")
+            if m and bold:
+                teachers.append({
+                    "id": m.group(1),
+                    "name": bold.get_text(strip=True)
+                })
+        _teachers_cache["data"] = teachers
+        _teachers_cache["ts"] = now
+        return teachers
+    except Exception as e:
+        return []
+
+@app.route("/api/all-teachers")
+def api_all_teachers():
+    """Полный список преподавателей с сайта (с кэшем на 1 час)."""
+    teachers = fetch_all_teachers()
+    return jsonify({"teachers": teachers, "count": len(teachers)})
+
+# ── Маршруты ─────────────────────────────────────────────────────────────────
+@app.route("/")
+def index():
+    return render_template("index.html")
+
+@app.route("/api/teachers")
+def api_teachers():
+    teachers_list = [
+        {"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])}
+        for t in config.get("teachers", [])
+    ]
+    return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER})
+
+@app.route("/api/schedule")
+def api_schedule():
+    obj = request.args.get("obj", DEFAULT_TEACHER)
+    wk  = request.args.get("wk", None)
+    # Имя: сначала из конфига, потом из полного списка
+    teacher_info = TEACHERS.get(str(obj), {})
+    name = teacher_info.get("name", "")
+    if not name:
+        all_t = fetch_all_teachers()
+        match = next((t for t in all_t if t["id"] == str(obj)), None)
+        if match:
+            name = match["name"]
+    data = fetch_schedule(obj=obj, wk=wk)
+    data["teacher_name"] = name
+    return jsonify(data)
+
+@app.route("/api/reload-config")
+def api_reload():
+    global config, BASE_URL, DEFAULT_TEACHER, TEACHERS
+    config = load_config()
+    BASE_URL        = config["base_url"]
+    DEFAULT_TEACHER = str(config.get("default_teacher", ""))
+    TEACHERS        = {str(t["id"]): t for t in config.get("teachers", [])}
+    return jsonify({"ok": True, "teachers_count": len(TEACHERS)})
+
+
+#if __name__ == "__main__":
+#    app.run(debug=False, host="0.0.0.0", port=5609)
+
+@app.route("/api/debug")
+def api_debug():
+    """Отладка: показывает что нашёл парсер."""
+    obj = request.args.get("obj", DEFAULT_TEACHER)
+    params = {"mn": "3", "obj": obj}
+    try:
+        resp = requests.get(BASE_URL, params=params, timeout=10)
+        resp.encoding = "utf-8"
+        html = resp.text
+    except Exception as e:
+        return jsonify({"error": str(e)})
+
+    soup = BeautifulSoup(html, "lxml")
+    day_anchors = soup.find_all("a", class_="t_wth")
+    week_range = ""
+    for td in soup.find_all("td"):
+        text = td.get_text(strip=True)
+        if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
+            week_range = text
+            break
+
+    return jsonify({
+        "html_length": len(html),
+        "html_snippet": html[:300],
+        "week_range_found": week_range,
+        "day_anchors_count": len(day_anchors),
+        "day_anchors": [a.get_text(strip=True) for a in day_anchors],
+        "full_parse": parse_schedule(html),
+    })