Coverage for sel_tools/file_parsing/slide_parser.py: 100%
26 statements
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-03 10:48 +0000
« prev ^ index » next coverage.py v7.6.8, created at 2024-12-03 10:48 +0000
1"""Parse homework slides for tasks and documentation."""
3import re
4from pathlib import Path
6from sel_tools.config import REPO_DIR
7from sel_tools.file_parsing.config import (
8 DOCUMENTATION_PATTERN,
9 TASK_FOOTER_PATTERN,
10 TASK_HEADER_PATTERN,
11)
12from sel_tools.utils.task import Task
15def get_tasks_from_slides(slides_markdown_file: Path) -> list[Task]:
16 """Parse slides for tasks."""
17 text = slides_markdown_file.read_text()
18 minimal_length_between_markers_pattern = TASK_HEADER_PATTERN + "(.*?)" + TASK_FOOTER_PATTERN
20 matches = re.findall(minimal_length_between_markers_pattern, text, re.DOTALL)
21 if not matches:
22 msg = f"Couldn't find any task in {slides_markdown_file}!"
23 raise LookupError(msg)
25 documentation_match = re.search(DOCUMENTATION_PATTERN, text, re.DOTALL)
26 documentation = documentation_match.group(1) if documentation_match else ""
28 tasks: list[Task] = [Task(title, description, documentation) for (title, description) in matches]
29 fill_attachments(tasks)
31 return tasks
34def fill_attachments(tasks: list[Task]) -> None:
35 """Fill attachments fields in tasks."""
36 for task in tasks:
37 task.attachments = get_attachments(task.description)
40def get_attachments(description: str) -> list[Path]:
41 """Get attachments from task description."""
42 markdown_local_file_link_pattern = r".*?\[[^\[]*?\]\(/(.*?)\).*?"
43 matches = re.findall(markdown_local_file_link_pattern, description, re.DOTALL)
44 if not matches:
45 return []
47 return [REPO_DIR / file_path for file_path in matches]