Coverage for tools / sel_tools / file_parsing / slide_parser.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-02 18:55 +0000

1"""Parse homework slides for tasks and documentation.""" 

2 

3import re 

4from pathlib import Path 

5 

6from sel_tools.config import REPO_DIR 

7from sel_tools.utils.task import Task 

8 

9TASK_HEADER_PATTERN = r"## (Task \d+ - [^\n]*)\n" 

10TASK_FOOTER_PATTERN = r"---" 

11DOCUMENTATION_PATTERN = r"## Documentation\n(.*)" 

12 

13 

14def get_tasks_from_slides(slides_markdown_file: Path) -> list[Task]: 

15 """Parse slides for tasks.""" 

16 text = slides_markdown_file.read_text() 

17 minimal_length_between_markers_pattern = TASK_HEADER_PATTERN + "(.*?)" + TASK_FOOTER_PATTERN 

18 

19 matches = re.findall(minimal_length_between_markers_pattern, text, re.DOTALL) 

20 if not matches: 

21 msg = f"Couldn't find any task in {slides_markdown_file}!" 

22 raise LookupError(msg) 

23 

24 documentation_match = re.search(DOCUMENTATION_PATTERN, text, re.DOTALL) 

25 documentation = documentation_match.group(1) if documentation_match else "" 

26 

27 tasks: list[Task] = [Task(title, description, documentation) for (title, description) in matches] 

28 fill_attachments(tasks) 

29 

30 return tasks 

31 

32 

33def fill_attachments(tasks: list[Task]) -> None: 

34 """Fill attachments fields in tasks.""" 

35 for task in tasks: 

36 task.attachments = get_attachments(task.description) 

37 

38 

39def get_attachments(description: str) -> list[Path]: 

40 """Get attachments from task description.""" 

41 markdown_local_file_link_pattern = r".*?\[[^\[]*?\]\(/(.*?)\).*?" 

42 matches = re.findall(markdown_local_file_link_pattern, description, re.DOTALL) 

43 if not matches: 

44 return [] 

45 

46 return [REPO_DIR / file_path for file_path in matches]