Coverage for sel_tools/file_parsing/slide_parser.py: 100%

26 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-12-03 10:48 +0000

1"""Parse homework slides for tasks and documentation.""" 

2 

3import re 

4from pathlib import Path 

5 

6from sel_tools.config import REPO_DIR 

7from sel_tools.file_parsing.config import ( 

8 DOCUMENTATION_PATTERN, 

9 TASK_FOOTER_PATTERN, 

10 TASK_HEADER_PATTERN, 

11) 

12from sel_tools.utils.task import Task 

13 

14 

15def get_tasks_from_slides(slides_markdown_file: Path) -> list[Task]: 

16 """Parse slides for tasks.""" 

17 text = slides_markdown_file.read_text() 

18 minimal_length_between_markers_pattern = TASK_HEADER_PATTERN + "(.*?)" + TASK_FOOTER_PATTERN 

19 

20 matches = re.findall(minimal_length_between_markers_pattern, text, re.DOTALL) 

21 if not matches: 

22 msg = f"Couldn't find any task in {slides_markdown_file}!" 

23 raise LookupError(msg) 

24 

25 documentation_match = re.search(DOCUMENTATION_PATTERN, text, re.DOTALL) 

26 documentation = documentation_match.group(1) if documentation_match else "" 

27 

28 tasks: list[Task] = [Task(title, description, documentation) for (title, description) in matches] 

29 fill_attachments(tasks) 

30 

31 return tasks 

32 

33 

34def fill_attachments(tasks: list[Task]) -> None: 

35 """Fill attachments fields in tasks.""" 

36 for task in tasks: 

37 task.attachments = get_attachments(task.description) 

38 

39 

40def get_attachments(description: str) -> list[Path]: 

41 """Get attachments from task description.""" 

42 markdown_local_file_link_pattern = r".*?\[[^\[]*?\]\(/(.*?)\).*?" 

43 matches = re.findall(markdown_local_file_link_pattern, description, re.DOTALL) 

44 if not matches: 

45 return [] 

46 

47 return [REPO_DIR / file_path for file_path in matches]