Files
2026-03-02 14:21:02 +04:00

275 lines
9.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from flask import Flask, render_template, jsonify, request
import requests
from bs4 import BeautifulSoup
import re
import yaml
import os
app = Flask(__name__)
# ── Загрузка конфига ─────────────────────────────────────────────────────────
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
def load_config():
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
config = load_config()
BASE_URL = config["base_url"]
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
# ── Парсер ───────────────────────────────────────────────────────────────────
def fetch_schedule(obj=None, wk=None):
if obj is None:
obj = DEFAULT_TEACHER
params = {"mn": "3", "obj": obj}
if wk:
params["wk"] = wk
try:
resp = requests.get(BASE_URL, params=params, timeout=10)
resp.encoding = "utf-8"
html = resp.text
except Exception as e:
return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None}
return parse_schedule(html)
def parse_schedule(html):
soup = BeautifulSoup(html, "lxml")
result = {
"days": [],
"week_range": "",
"prev_wk": None,
"next_wk": None,
"error": None
}
# Диапазон недели
for td in soup.find_all("td"):
text = td.get_text(strip=True)
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
result["week_range"] = text
break
# Ссылки на соседние недели
seen_wk = set()
for a in soup.find_all("a", href=True):
href = a["href"]
if "wk=" not in href:
continue
wk_val = re.search(r"wk=(\d+)", href)
if not wk_val:
continue
wk_num = int(wk_val.group(1))
if wk_num in seen_wk:
continue
seen_wk.add(wk_num)
text = a.get_text(strip=True).lower()
if "предыдущая" in text and result["prev_wk"] is None:
result["prev_wk"] = wk_num
elif "следующая" in text and result["next_wk"] is None:
result["next_wk"] = wk_num
# Дни недели
day_anchors = soup.find_all("a", class_="t_wth")
for anchor in day_anchors:
day_text = anchor.get_text(strip=True)
m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text)
if not m:
continue
day_name, day_date, week_num = m.group(1), m.group(2), m.group(3)
# Находим родительскую таблицу с парами
parent_table = anchor
for _ in range(10):
parent_table = parent_table.parent
if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1":
break
lessons = []
if parent_table:
for row in parent_table.find_all("tr"):
tds = row.find_all("td", recursive=False)
if len(tds) != 4:
continue
num_td = tds[0].get_text(strip=True)
time_td = tds[1].get_text(strip=True)
subj_td = tds[2]
room_td = tds[3].get_text(strip=True)
if not re.match(r"^\d+$", num_td):
continue
info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""}
bold = subj_td.find("b")
if bold:
info["subject"] = bold.get_text(strip=True)
font_green = subj_td.find("font", class_="t_green_10")
if font_green:
info["location"] = font_green.get_text(strip=True)
raw = ""
if bold:
for node in bold.next_siblings:
if hasattr(node, "name"):
if node.name == "font":
break
if node.name == "br":
continue
raw += node.get_text(strip=True)
else:
raw += str(node).strip()
raw = raw.strip()
if raw:
info["group"] = raw
m_grp = re.search(r'\(([^)]+)\)', raw)
if m_grp:
info["group_short"] = m_grp.group(1)
after = raw[raw.find(")")+1:].strip() if ")" in raw else ""
if after:
unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip())
inner = re.search(r'\(([^()]+)\)\s*$', unwrapped)
info["lesson_type"] = inner.group(1) if inner else unwrapped
lessons.append({
"num": num_td,
"time": time_td,
"subject": info["subject"],
"group": info["group"],
"group_short": info["group_short"],
"lesson_type": info["lesson_type"],
"location": info["location"],
"room": room_td,
"has_class": bool(info["subject"])
})
result["days"].append({
"name": day_name,
"date": day_date,
"week_num": week_num,
"lessons": lessons,
"has_classes": any(l["has_class"] for l in lessons)
})
return result
# ── Кэш списка преподавателей ────────────────────────────────────────────────
_teachers_cache = {"data": None, "ts": 0}
CACHE_TTL = 3600 # обновлять раз в час
def fetch_all_teachers():
import time
now = time.time()
if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL:
return _teachers_cache["data"]
try:
resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10)
resp.encoding = "utf-8"
soup = BeautifulSoup(resp.text, "lxml")
teachers = []
for a in soup.find_all("a", href=lambda h: h and "obj=" in h):
m = re.search(r"obj=(\d+)", a["href"])
bold = a.find("b")
if m and bold:
teachers.append({
"id": m.group(1),
"name": bold.get_text(strip=True)
})
_teachers_cache["data"] = teachers
_teachers_cache["ts"] = now
return teachers
except Exception as e:
return []
@app.route("/api/all-teachers")
def api_all_teachers():
"""Полный список преподавателей с сайта (с кэшем на 1 час)."""
teachers = fetch_all_teachers()
return jsonify({"teachers": teachers, "count": len(teachers)})
# ── Маршруты ─────────────────────────────────────────────────────────────────
@app.route("/")
def index():
return render_template("index.html")
@app.route("/api/teachers")
def api_teachers():
teachers_list = [
{"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])}
for t in config.get("teachers", [])
]
return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER})
@app.route("/api/schedule")
def api_schedule():
obj = request.args.get("obj", DEFAULT_TEACHER)
wk = request.args.get("wk", None)
# Имя: сначала из конфига, потом из полного списка
teacher_info = TEACHERS.get(str(obj), {})
name = teacher_info.get("name", "")
if not name:
all_t = fetch_all_teachers()
match = next((t for t in all_t if t["id"] == str(obj)), None)
if match:
name = match["name"]
data = fetch_schedule(obj=obj, wk=wk)
data["teacher_name"] = name
return jsonify(data)
@app.route("/api/reload-config")
def api_reload():
global config, BASE_URL, DEFAULT_TEACHER, TEACHERS
config = load_config()
BASE_URL = config["base_url"]
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
return jsonify({"ok": True, "teachers_count": len(TEACHERS)})
#if __name__ == "__main__":
# app.run(debug=False, host="0.0.0.0", port=5609)
@app.route("/api/debug")
def api_debug():
"""Отладка: показывает что нашёл парсер."""
obj = request.args.get("obj", DEFAULT_TEACHER)
params = {"mn": "3", "obj": obj}
try:
resp = requests.get(BASE_URL, params=params, timeout=10)
resp.encoding = "utf-8"
html = resp.text
except Exception as e:
return jsonify({"error": str(e)})
soup = BeautifulSoup(html, "lxml")
day_anchors = soup.find_all("a", class_="t_wth")
week_range = ""
for td in soup.find_all("td"):
text = td.get_text(strip=True)
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
week_range = text
break
return jsonify({
"html_length": len(html),
"html_snippet": html[:300],
"week_range_found": week_range,
"day_anchors_count": len(day_anchors),
"day_anchors": [a.get_text(strip=True) for a in day_anchors],
"full_parse": parse_schedule(html),
})