Coverage for tools / sel_tools / file_parsing / slide_parser.py: 100%
28 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-02 18:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-02 18:55 +0000
1"""Parse homework slides for tasks and documentation."""
3import re
4from pathlib import Path
6from sel_tools.config import REPO_DIR
7from sel_tools.utils.task import Task
9TASK_HEADER_PATTERN = r"## (Task \d+ - [^\n]*)\n"
10TASK_FOOTER_PATTERN = r"---"
11DOCUMENTATION_PATTERN = r"## Documentation\n(.*)"
14def get_tasks_from_slides(slides_markdown_file: Path) -> list[Task]:
15 """Parse slides for tasks."""
16 text = slides_markdown_file.read_text()
17 minimal_length_between_markers_pattern = TASK_HEADER_PATTERN + "(.*?)" + TASK_FOOTER_PATTERN
19 matches = re.findall(minimal_length_between_markers_pattern, text, re.DOTALL)
20 if not matches:
21 msg = f"Couldn't find any task in {slides_markdown_file}!"
22 raise LookupError(msg)
24 documentation_match = re.search(DOCUMENTATION_PATTERN, text, re.DOTALL)
25 documentation = documentation_match.group(1) if documentation_match else ""
27 tasks: list[Task] = [Task(title, description, documentation) for (title, description) in matches]
28 fill_attachments(tasks)
30 return tasks
33def fill_attachments(tasks: list[Task]) -> None:
34 """Fill attachments fields in tasks."""
35 for task in tasks:
36 task.attachments = get_attachments(task.description)
39def get_attachments(description: str) -> list[Path]:
40 """Get attachments from task description."""
41 markdown_local_file_link_pattern = r".*?\[[^\[]*?\]\(/(.*?)\).*?"
42 matches = re.findall(markdown_local_file_link_pattern, description, re.DOTALL)
43 if not matches:
44 return []
46 return [REPO_DIR / file_path for file_path in matches]