Initial commit

This commit is contained in:
kilyabin
2026-03-02 14:21:02 +04:00
commit d78668ce3a
11 changed files with 1278 additions and 0 deletions

274
app.py Normal file
View File

@@ -0,0 +1,274 @@
from flask import Flask, render_template, jsonify, request
import requests
from bs4 import BeautifulSoup
import re
import yaml
import os
app = Flask(__name__)
# ── Загрузка конфига ─────────────────────────────────────────────────────────
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "config.yaml")
def load_config():
with open(CONFIG_PATH, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
config = load_config()
BASE_URL = config["base_url"]
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
# ── Парсер ───────────────────────────────────────────────────────────────────
def fetch_schedule(obj=None, wk=None):
if obj is None:
obj = DEFAULT_TEACHER
params = {"mn": "3", "obj": obj}
if wk:
params["wk"] = wk
try:
resp = requests.get(BASE_URL, params=params, timeout=10)
resp.encoding = "utf-8"
html = resp.text
except Exception as e:
return {"error": str(e), "days": [], "week_range": "", "prev_wk": None, "next_wk": None}
return parse_schedule(html)
def parse_schedule(html):
soup = BeautifulSoup(html, "lxml")
result = {
"days": [],
"week_range": "",
"prev_wk": None,
"next_wk": None,
"error": None
}
# Диапазон недели
for td in soup.find_all("td"):
text = td.get_text(strip=True)
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
result["week_range"] = text
break
# Ссылки на соседние недели
seen_wk = set()
for a in soup.find_all("a", href=True):
href = a["href"]
if "wk=" not in href:
continue
wk_val = re.search(r"wk=(\d+)", href)
if not wk_val:
continue
wk_num = int(wk_val.group(1))
if wk_num in seen_wk:
continue
seen_wk.add(wk_num)
text = a.get_text(strip=True).lower()
if "предыдущая" in text and result["prev_wk"] is None:
result["prev_wk"] = wk_num
elif "следующая" in text and result["next_wk"] is None:
result["next_wk"] = wk_num
# Дни недели
day_anchors = soup.find_all("a", class_="t_wth")
for anchor in day_anchors:
day_text = anchor.get_text(strip=True)
m = re.match(r"(\w+)\s*(\d{2}\.\d{2}\.\d{4})/(\d+)\s+неделя", day_text)
if not m:
continue
day_name, day_date, week_num = m.group(1), m.group(2), m.group(3)
# Находим родительскую таблицу с парами
parent_table = anchor
for _ in range(10):
parent_table = parent_table.parent
if parent_table and parent_table.name == "table" and parent_table.get("cellpadding") == "1":
break
lessons = []
if parent_table:
for row in parent_table.find_all("tr"):
tds = row.find_all("td", recursive=False)
if len(tds) != 4:
continue
num_td = tds[0].get_text(strip=True)
time_td = tds[1].get_text(strip=True)
subj_td = tds[2]
room_td = tds[3].get_text(strip=True)
if not re.match(r"^\d+$", num_td):
continue
info = {"subject": "", "group": "", "group_short": "", "lesson_type": "", "location": ""}
bold = subj_td.find("b")
if bold:
info["subject"] = bold.get_text(strip=True)
font_green = subj_td.find("font", class_="t_green_10")
if font_green:
info["location"] = font_green.get_text(strip=True)
raw = ""
if bold:
for node in bold.next_siblings:
if hasattr(node, "name"):
if node.name == "font":
break
if node.name == "br":
continue
raw += node.get_text(strip=True)
else:
raw += str(node).strip()
raw = raw.strip()
if raw:
info["group"] = raw
m_grp = re.search(r'\(([^)]+)\)', raw)
if m_grp:
info["group_short"] = m_grp.group(1)
after = raw[raw.find(")")+1:].strip() if ")" in raw else ""
if after:
unwrapped = re.sub(r'^\((.+)\)$', r'\1', after.strip())
inner = re.search(r'\(([^()]+)\)\s*$', unwrapped)
info["lesson_type"] = inner.group(1) if inner else unwrapped
lessons.append({
"num": num_td,
"time": time_td,
"subject": info["subject"],
"group": info["group"],
"group_short": info["group_short"],
"lesson_type": info["lesson_type"],
"location": info["location"],
"room": room_td,
"has_class": bool(info["subject"])
})
result["days"].append({
"name": day_name,
"date": day_date,
"week_num": week_num,
"lessons": lessons,
"has_classes": any(l["has_class"] for l in lessons)
})
return result
# ── Кэш списка преподавателей ────────────────────────────────────────────────
_teachers_cache = {"data": None, "ts": 0}
CACHE_TTL = 3600 # обновлять раз в час
def fetch_all_teachers():
import time
now = time.time()
if _teachers_cache["data"] and now - _teachers_cache["ts"] < CACHE_TTL:
return _teachers_cache["data"]
try:
resp = requests.get(BASE_URL, params={"mn": "3"}, timeout=10)
resp.encoding = "utf-8"
soup = BeautifulSoup(resp.text, "lxml")
teachers = []
for a in soup.find_all("a", href=lambda h: h and "obj=" in h):
m = re.search(r"obj=(\d+)", a["href"])
bold = a.find("b")
if m and bold:
teachers.append({
"id": m.group(1),
"name": bold.get_text(strip=True)
})
_teachers_cache["data"] = teachers
_teachers_cache["ts"] = now
return teachers
except Exception as e:
return []
@app.route("/api/all-teachers")
def api_all_teachers():
"""Полный список преподавателей с сайта (с кэшем на 1 час)."""
teachers = fetch_all_teachers()
return jsonify({"teachers": teachers, "count": len(teachers)})
# ── Маршруты ─────────────────────────────────────────────────────────────────
@app.route("/")
def index():
return render_template("index.html")
@app.route("/api/teachers")
def api_teachers():
teachers_list = [
{"id": str(t["id"]), "name": t["name"], "short": t.get("short", t["name"])}
for t in config.get("teachers", [])
]
return jsonify({"teachers": teachers_list, "default": DEFAULT_TEACHER})
@app.route("/api/schedule")
def api_schedule():
obj = request.args.get("obj", DEFAULT_TEACHER)
wk = request.args.get("wk", None)
# Имя: сначала из конфига, потом из полного списка
teacher_info = TEACHERS.get(str(obj), {})
name = teacher_info.get("name", "")
if not name:
all_t = fetch_all_teachers()
match = next((t for t in all_t if t["id"] == str(obj)), None)
if match:
name = match["name"]
data = fetch_schedule(obj=obj, wk=wk)
data["teacher_name"] = name
return jsonify(data)
@app.route("/api/reload-config")
def api_reload():
global config, BASE_URL, DEFAULT_TEACHER, TEACHERS
config = load_config()
BASE_URL = config["base_url"]
DEFAULT_TEACHER = str(config.get("default_teacher", ""))
TEACHERS = {str(t["id"]): t for t in config.get("teachers", [])}
return jsonify({"ok": True, "teachers_count": len(TEACHERS)})
#if __name__ == "__main__":
# app.run(debug=False, host="0.0.0.0", port=5609)
@app.route("/api/debug")
def api_debug():
"""Отладка: показывает что нашёл парсер."""
obj = request.args.get("obj", DEFAULT_TEACHER)
params = {"mn": "3", "obj": obj}
try:
resp = requests.get(BASE_URL, params=params, timeout=10)
resp.encoding = "utf-8"
html = resp.text
except Exception as e:
return jsonify({"error": str(e)})
soup = BeautifulSoup(html, "lxml")
day_anchors = soup.find_all("a", class_="t_wth")
week_range = ""
for td in soup.find_all("td"):
text = td.get_text(strip=True)
if re.match(r"с \d{2}\.\d{2}\.\d{4} по \d{2}\.\d{2}\.\d{4}", text):
week_range = text
break
return jsonify({
"html_length": len(html),
"html_snippet": html[:300],
"week_range_found": week_range,
"day_anchors_count": len(day_anchors),
"day_anchors": [a.get_text(strip=True) for a in day_anchors],
"full_parse": parse_schedule(html),
})