from flask import Flask, render_template, jsonify, request import requests from bs4 import BeautifulSoup import re import yaml import os app = Flask(__name__) # ── Загрузка конфига ───────────────────────────────────────────────────────── CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml") def load_config(): with open(CONFIG_PATH, "r", encoding="utf-8") as f: return yaml.safe_load(f) config = load_config() BASE_URL = config["base_url"] DEFAULT_TEACHER = str(config.get("default_teacher", "")) TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])} # ── Парсер ─────────────────────────────────────────────────────────────────── def fetch_schedule(obj=None, wk=None): if obj is None: obj = DEFAULT_TEACHER params = {"mn": "3", "obj": obj} if wk: params["wk"] = wk try: resp = requests.get(BASE_URL, params=params, timeout=10) resp.encoding = "utf-8" html = resp.text except Exception as e: return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None} return parse_schedule(html) def parse_schedule(html): soup = BeautifulSoup(html, "lxml") result = { "days": [], "week_range": "", "prev_wk": None, "next_wk": None, "error": None } # Диапазон недели for td in soup.find_all("td"): text = td.get_text(strip=True) if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text): result["week_range"] = text break # Ссылки на соседние недели seen_wk = set() for a in soup.find_all("a", href=True): href = a["href"] if "wk=" not in href: continue wk_val = re.search(r"wk=(\d+)", href) if not wk_val: continue wk_num = int(wk_val.group(1)) if wk_num in seen_wk: continue seen_wk.add(wk_num) text = a.get_text(strip=True).lower() if "предыдущая" in text and result["prev_wk"] is None: result["prev_wk"] = wk_num elif "следующая" in text and result["next_wk"] is None: result["next_wk"] = wk_num # Дни недели day_anchors = soup.find_all("a", class_="t_wth") for anchor in day_anchors: day_text = anchor.get_text(strip=True) m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text) if not m: continue day_name, day_date, week_num = m.group(1), m.group(2), m.group(3) # Находим родительскую таблицу с парами parent_table = anchor for _ in range(10): parent_table = parent_table.parent if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1": break lessons = [] if parent_table: for row in parent_table.find_all("tr"): tds = row.find_all("td", recursive=False) if len(tds) != 4: continue num_td = tds[0].get_text(strip=True) time_td = tds[1].get_text(strip=True) subj_td = tds[2] room_td = tds[3].get_text(strip=True) if not re.match(r"^\d+$", num_td): continue info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""} bold = subj_td.find("b") if bold: info["subject"] = bold.get_text(strip=True) font_green = subj_td.find("font", class_="t_green_10") if font_green: info["location"] = font_green.get_text(strip=True) raw = "" if bold: for node in bold.next_siblings: if hasattr(node, "name"): if node.name == "font": break if node.name == "br": continue raw += node.get_text(strip=True) else: raw += str(node).strip() raw = raw.strip() if raw: info["group"] = raw m_grp = re.search(r'\(([^)]+)\)', raw) if m_grp: info["group_short"] = m_grp.group(1) after = raw[raw.find(")")+1:].strip() if ")" in raw else "" if after: unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip()) inner = re.search(r'\(([^()]+)\)\s*$', unwrapped) info["lesson_type"] = inner.group(1) if inner else unwrapped lessons.append({ "num": num_td, "time": time_td, "subject": info["subject"], "group": info["group"], "group_short": info["group_short"], "lesson_type": info["lesson_type"], "location": info["location"], "room": room_td, "has_class": bool(info["subject"]) }) result["days"].append({ "name": day_name, "date": day_date, "week_num": week_num, "lessons": lessons, "has_classes": any(l["has_class"] for l in lessons) }) return result # ── Кэш списка преподавателей ──────────────────────────────────────────────── _teachers_cache = {"data": None, "ts": 0} CACHE_TTL = 3600 # обновлять раз в час def fetch_all_teachers(): import time now = time.time() if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL: return _teachers_cache["data"] try: resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10) resp.encoding = "utf-8" soup = BeautifulSoup(resp.text, "lxml") teachers = [] for a in soup.find_all("a", href=lambda h: h and "obj=" in h): m = re.search(r"obj=(\d+)", a["href"]) bold = a.find("b") if m and bold: teachers.append({ "id": m.group(1), "name": bold.get_text(strip=True) }) _teachers_cache["data"] = teachers _teachers_cache["ts"] = now return teachers except Exception as e: return [] @app.route("/api/all-teachers") def api_all_teachers(): """Полный список преподавателей с сайта (с кэшем на 1 час).""" teachers = fetch_all_teachers() return jsonify({"teachers": teachers, "count": len(teachers)}) # ── Маршруты ───────────────────────────────────────────────────────────────── @app.route("/") def index(): return render_template("index.html") @app.route("/api/teachers") def api_teachers(): teachers_list = [ {"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])} for t in config.get("teachers", []) ] return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER}) @app.route("/api/schedule") def api_schedule(): obj = request.args.get("obj", DEFAULT_TEACHER) wk = request.args.get("wk", None) # Имя: сначала из конфига, потом из полного списка teacher_info = TEACHERS.get(str(obj), {}) name = teacher_info.get("name", "") if not name: all_t = fetch_all_teachers() match = next((t for t in all_t if t["id"] == str(obj)), None) if match: name = match["name"] data = fetch_schedule(obj=obj, wk=wk) data["teacher_name"] = name return jsonify(data) @app.route("/api/reload-config") def api_reload(): global config, BASE_URL, DEFAULT_TEACHER, TEACHERS config = load_config() BASE_URL = config["base_url"] DEFAULT_TEACHER = str(config.get("default_teacher", "")) TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])} return jsonify({"ok": True, "teachers_count": len(TEACHERS)}) #if __name__ == "__main__": # app.run(debug=False, host="0.0.0.0", port=5609) @app.route("/api/debug") def api_debug(): """Отладка: показывает что нашёл парсер.""" obj = request.args.get("obj", DEFAULT_TEACHER) params = {"mn": "3", "obj": obj} try: resp = requests.get(BASE_URL, params=params, timeout=10) resp.encoding = "utf-8" html = resp.text except Exception as e: return jsonify({"error": str(e)}) soup = BeautifulSoup(html, "lxml") day_anchors = soup.find_all("a", class_="t_wth") week_range = "" for td in soup.find_all("td"): text = td.get_text(strip=True) if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text): week_range = text break return jsonify({ "html_length": len(html), "html_snippet": html[:300], "week_range_found": week_range, "day_anchors_count": len(day_anchors), "day_anchors": [a.get_text(strip=True) for a in day_anchors], "full_parse": parse_schedule(html), })