275 lines
9.8 KiB
Python
275 lines
9.8 KiB
Python
from flask import Flask, render_template, jsonify, request
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
import re
|
||
import yaml
|
||
import os
|
||
|
||
app = Flask(__name__)
|
||
|
||
# ── Загрузка конфига ─────────────────────────────────────────────────────────
|
||
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
|
||
|
||
def load_config():
|
||
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
|
||
return yaml.safe_load(f)
|
||
|
||
config = load_config()
|
||
BASE_URL = config["base_url"]
|
||
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
|
||
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
|
||
|
||
|
||
# ── Парсер ───────────────────────────────────────────────────────────────────
|
||
def fetch_schedule(obj=None, wk=None):
|
||
if obj is None:
|
||
obj = DEFAULT_TEACHER
|
||
|
||
params = {"mn": "3", "obj": obj}
|
||
if wk:
|
||
params["wk"] = wk
|
||
|
||
try:
|
||
resp = requests.get(BASE_URL, params=params, timeout=10)
|
||
resp.encoding = "utf-8"
|
||
html = resp.text
|
||
except Exception as e:
|
||
return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None}
|
||
|
||
return parse_schedule(html)
|
||
|
||
|
||
def parse_schedule(html):
|
||
soup = BeautifulSoup(html, "lxml")
|
||
result = {
|
||
"days": [],
|
||
"week_range": "",
|
||
"prev_wk": None,
|
||
"next_wk": None,
|
||
"error": None
|
||
}
|
||
|
||
# Диапазон недели
|
||
for td in soup.find_all("td"):
|
||
text = td.get_text(strip=True)
|
||
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
|
||
result["week_range"] = text
|
||
break
|
||
|
||
# Ссылки на соседние недели
|
||
seen_wk = set()
|
||
for a in soup.find_all("a", href=True):
|
||
href = a["href"]
|
||
if "wk=" not in href:
|
||
continue
|
||
wk_val = re.search(r"wk=(\d+)", href)
|
||
if not wk_val:
|
||
continue
|
||
wk_num = int(wk_val.group(1))
|
||
if wk_num in seen_wk:
|
||
continue
|
||
seen_wk.add(wk_num)
|
||
text = a.get_text(strip=True).lower()
|
||
if "предыдущая" in text and result["prev_wk"] is None:
|
||
result["prev_wk"] = wk_num
|
||
elif "следующая" in text and result["next_wk"] is None:
|
||
result["next_wk"] = wk_num
|
||
|
||
# Дни недели
|
||
day_anchors = soup.find_all("a", class_="t_wth")
|
||
|
||
for anchor in day_anchors:
|
||
day_text = anchor.get_text(strip=True)
|
||
m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text)
|
||
if not m:
|
||
continue
|
||
|
||
day_name, day_date, week_num = m.group(1), m.group(2), m.group(3)
|
||
|
||
# Находим родительскую таблицу с парами
|
||
parent_table = anchor
|
||
for _ in range(10):
|
||
parent_table = parent_table.parent
|
||
if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1":
|
||
break
|
||
|
||
lessons = []
|
||
if parent_table:
|
||
for row in parent_table.find_all("tr"):
|
||
tds = row.find_all("td", recursive=False)
|
||
if len(tds) != 4:
|
||
continue
|
||
|
||
num_td = tds[0].get_text(strip=True)
|
||
time_td = tds[1].get_text(strip=True)
|
||
subj_td = tds[2]
|
||
room_td = tds[3].get_text(strip=True)
|
||
|
||
if not re.match(r"^\d+$", num_td):
|
||
continue
|
||
|
||
info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""}
|
||
|
||
bold = subj_td.find("b")
|
||
if bold:
|
||
info["subject"] = bold.get_text(strip=True)
|
||
|
||
font_green = subj_td.find("font", class_="t_green_10")
|
||
if font_green:
|
||
info["location"] = font_green.get_text(strip=True)
|
||
|
||
raw = ""
|
||
if bold:
|
||
for node in bold.next_siblings:
|
||
if hasattr(node, "name"):
|
||
if node.name == "font":
|
||
break
|
||
if node.name == "br":
|
||
continue
|
||
raw += node.get_text(strip=True)
|
||
else:
|
||
raw += str(node).strip()
|
||
raw = raw.strip()
|
||
|
||
if raw:
|
||
info["group"] = raw
|
||
m_grp = re.search(r'\(([^)]+)\)', raw)
|
||
if m_grp:
|
||
info["group_short"] = m_grp.group(1)
|
||
|
||
after = raw[raw.find(")")+1:].strip() if ")" in raw else ""
|
||
if after:
|
||
unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip())
|
||
inner = re.search(r'\(([^()]+)\)\s*$', unwrapped)
|
||
info["lesson_type"] = inner.group(1) if inner else unwrapped
|
||
|
||
lessons.append({
|
||
"num": num_td,
|
||
"time": time_td,
|
||
"subject": info["subject"],
|
||
"group": info["group"],
|
||
"group_short": info["group_short"],
|
||
"lesson_type": info["lesson_type"],
|
||
"location": info["location"],
|
||
"room": room_td,
|
||
"has_class": bool(info["subject"])
|
||
})
|
||
|
||
result["days"].append({
|
||
"name": day_name,
|
||
"date": day_date,
|
||
"week_num": week_num,
|
||
"lessons": lessons,
|
||
"has_classes": any(l["has_class"] for l in lessons)
|
||
})
|
||
|
||
return result
|
||
|
||
|
||
|
||
# ── Кэш списка преподавателей ────────────────────────────────────────────────
|
||
_teachers_cache = {"data": None, "ts": 0}
|
||
CACHE_TTL = 3600 # обновлять раз в час
|
||
|
||
def fetch_all_teachers():
|
||
import time
|
||
now = time.time()
|
||
if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL:
|
||
return _teachers_cache["data"]
|
||
try:
|
||
resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10)
|
||
resp.encoding = "utf-8"
|
||
soup = BeautifulSoup(resp.text, "lxml")
|
||
teachers = []
|
||
for a in soup.find_all("a", href=lambda h: h and "obj=" in h):
|
||
m = re.search(r"obj=(\d+)", a["href"])
|
||
bold = a.find("b")
|
||
if m and bold:
|
||
teachers.append({
|
||
"id": m.group(1),
|
||
"name": bold.get_text(strip=True)
|
||
})
|
||
_teachers_cache["data"] = teachers
|
||
_teachers_cache["ts"] = now
|
||
return teachers
|
||
except Exception as e:
|
||
return []
|
||
|
||
@app.route("/api/all-teachers")
|
||
def api_all_teachers():
|
||
"""Полный список преподавателей с сайта (с кэшем на 1 час)."""
|
||
teachers = fetch_all_teachers()
|
||
return jsonify({"teachers": teachers, "count": len(teachers)})
|
||
|
||
# ── Маршруты ─────────────────────────────────────────────────────────────────
|
||
@app.route("/")
|
||
def index():
|
||
return render_template("index.html")
|
||
|
||
@app.route("/api/teachers")
|
||
def api_teachers():
|
||
teachers_list = [
|
||
{"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])}
|
||
for t in config.get("teachers", [])
|
||
]
|
||
return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER})
|
||
|
||
@app.route("/api/schedule")
|
||
def api_schedule():
|
||
obj = request.args.get("obj", DEFAULT_TEACHER)
|
||
wk = request.args.get("wk", None)
|
||
# Имя: сначала из конфига, потом из полного списка
|
||
teacher_info = TEACHERS.get(str(obj), {})
|
||
name = teacher_info.get("name", "")
|
||
if not name:
|
||
all_t = fetch_all_teachers()
|
||
match = next((t for t in all_t if t["id"] == str(obj)), None)
|
||
if match:
|
||
name = match["name"]
|
||
data = fetch_schedule(obj=obj, wk=wk)
|
||
data["teacher_name"] = name
|
||
return jsonify(data)
|
||
|
||
@app.route("/api/reload-config")
|
||
def api_reload():
|
||
global config, BASE_URL, DEFAULT_TEACHER, TEACHERS
|
||
config = load_config()
|
||
BASE_URL = config["base_url"]
|
||
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
|
||
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
|
||
return jsonify({"ok": True, "teachers_count": len(TEACHERS)})
|
||
|
||
|
||
#if __name__ == "__main__":
|
||
# app.run(debug=False, host="0.0.0.0", port=5609)
|
||
|
||
@app.route("/api/debug")
|
||
def api_debug():
|
||
"""Отладка: показывает что нашёл парсер."""
|
||
obj = request.args.get("obj", DEFAULT_TEACHER)
|
||
params = {"mn": "3", "obj": obj}
|
||
try:
|
||
resp = requests.get(BASE_URL, params=params, timeout=10)
|
||
resp.encoding = "utf-8"
|
||
html = resp.text
|
||
except Exception as e:
|
||
return jsonify({"error": str(e)})
|
||
|
||
soup = BeautifulSoup(html, "lxml")
|
||
day_anchors = soup.find_all("a", class_="t_wth")
|
||
week_range = ""
|
||
for td in soup.find_all("td"):
|
||
text = td.get_text(strip=True)
|
||
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
|
||
week_range = text
|
||
break
|
||
|
||
return jsonify({
|
||
"html_length": len(html),
|
||
"html_snippet": html[:300],
|
||
"week_range_found": week_range,
|
||
"day_anchors_count": len(day_anchors),
|
||
"day_anchors": [a.get_text(strip=True) for a in day_anchors],
|
||
"full_parse": parse_schedule(html),
|
||
})
|