Source code for app.model.lib.help_topics

import re
from pathlib import Path

from flask import render_template

from markdown_it import MarkdownIt
from markupsafe import Markup
from bs4 import BeautifulSoup
from werkzeug.exceptions import NotFound



[docs]
class HelpTopics:
    """
    A class that reads help topic templates as markdown or HTML and provides an
    interface to search through their text.

    Reads the code once with the ``process_once`` method and stores the
    contents in memory.

    Depends on Flask's ``render_template`` to evaluate the templates as jinja2.
    In the future, it might be nice to isolate it from these, but for now, it
    does the job and nothing else depends on the class, so it's fine as it is.
    """

    def __init__(self, templates_dir, help_topic_dir):
        if not help_topic_dir.startswith(templates_dir):
            raise ValueError(
                f"The help topic directory ({help_topic_dir}) "
                f"must be under the templates directory ({templates_dir})",
            )


[docs]
        self.templates_dir  = Path(templates_dir)


[docs]
        self.help_topic_dir = Path(help_topic_dir)



[docs]
        self.markdown = MarkdownIt().enable('table')


[docs]
        self.word_count = 0


        self._html_cache = {}
        self._text_cache = {}


[docs]
    def render_html(self, base_path):
        try:
            return self._html_cache[base_path]
        except KeyError as e:
            raise NotFound() from e



[docs]
    def search(self, query):
        results = []

        search_pattern = '(' + re.escape(query.strip()) + ')'
        regex = re.compile(search_pattern, re.IGNORECASE)

        for name, text in self._text_cache.items():
            excerpts = []
            ranges = []

            for match in re.finditer(regex, text):
                start_index = max(match.start() - 30, 0)
                end_index   = min(match.end() + 30, len(text))

                if len(ranges) > 0 and start_index in ranges[-1]:
                    # Merge with previous index:
                    ranges[-1] = range(ranges[-1].start, end_index)
                else:
                    ranges.append(range(start_index, end_index))

            excerpts = []

            for excerpt in [text[r.start:r.stop] for r in ranges]:
                # Remove possibly partial first/last word, add truncation:
                if ranges[0].start > 0:
                    excerpt = re.sub(r'^[a-zA-Z.?!,:]*\s*', '', excerpt)
                if ranges[-1].stop < len(text):
                    excerpt = re.sub(r'\s*[a-zA-Z.?!,:]*$', '', excerpt)

                excerpts.append(excerpt)

            if excerpts:
                full_excerpt = ' [...] '.join(excerpts)

                if ranges[0].start > 0:
                    full_excerpt = '...' + full_excerpt
                if ranges[-1].stop < len(text):
                    full_excerpt = full_excerpt + '...'

                results.append({
                    'name': name,
                    'excerpt_html': re.sub(regex, r'<span class="highlight">\1</span>', full_excerpt),
                })

        return results



[docs]
    def process_once(self, debug=False):
        if self._html_cache and not debug:
            return

        self.word_count = 0

        for file in self.help_topic_dir.iterdir():
            extension = file.suffix
            basename  = str(file).removesuffix(extension)

            base_path = basename.removeprefix(str(self.templates_dir) + '/')
            base_key  = basename.removeprefix(str(self.help_topic_dir) + '/')

            if extension == '.md':
                markdown_content = render_template(f"{base_path}.md")
                html_content = Markup(self.markdown.render(markdown_content))
            elif extension == '.html':
                html_content = Markup(render_template(f"{base_path}.html"))
            else:
                html_content = None

            if html_content:
                self._html_cache[base_key] = html_content

                soup = BeautifulSoup(html_content, 'html.parser')
                words = soup.get_text(' ', strip=True).split()

                self._text_cache[base_key] = ' '.join(words)
                self.word_count += len(words)