CourseplayHelpFS25/generate_markdown.py at master · Courseplay/CourseplayHelpFS25 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# generate_markdown.py
# Author: Jan2903
# Date: 08/01/2025

import os
import re
import json
import shutil

# Constants for paths
CURRENT_DIR = os.getcwd()
CONFIG_FILE = os.path.join(CURRENT_DIR, "data", "config.json")
TRANSLATION_DIR = os.path.join(CURRENT_DIR, "data")
OUTPUT_DIR = os.path.join(CURRENT_DIR, "docs")
IMAGES_DIR = os.path.join(CURRENT_DIR, "docs", "assets", "images")

# Ensure required directories exist
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(IMAGES_DIR, exist_ok=True)

def copy_image_to_docs(image_filename):
    """Copy an image from the translation_data folder to the docs/assets/images folder."""
    source_path = os.path.join(TRANSLATION_DIR, image_filename)
    destination_path = os.path.join(IMAGES_DIR, image_filename)
    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
    else:
        print(f"Warning: Image file '{image_filename}' not found in translation_data folder.")

def create_markdown_file(language_code, page, output_dir, file_index, is_index=False):
    """
    Create a Markdown file for a given page in a specific language.

    Args:
        language_code (str): Language code for the output file.
        page (dict): Page data containing title, paragraphs, and images.
        output_dir (str): Output directory for the Markdown file.
        file_index (int): Index number for the file name.
        is_index (bool): Whether this page should be saved as index.md.
    """
    file_name = "index.md" if is_index else f"{file_index:02d}_page_{page['raw_title']}.md"
    file_path = os.path.join(output_dir, file_name)

    with open(file_path, "w", encoding="utf-8") as md_file:
        # Write the page title
        md_file.write(f"# {page['title']}\n\n")

        # Write paragraphs
        for paragraph in page.get("paragraphs", []):
            if paragraph["title"]:
                md_file.write(f"## {paragraph['title']}\n\n")
            if paragraph["text"]:
                # Replace newlines with Markdown-compatible line breaks
                md_file.write(f"{paragraph['text'].replace('\n', '  \n')}\n\n")
            if paragraph["image"]["filename"]:
                # Copy the image to the docs/assets/images folder
                copy_image_to_docs(paragraph["image"]["filename"])
                # Adjust the image path for MkDocs
                image_path = f"../assets/images/{paragraph['image']['filename']}"
                md_file.write(f"![Image]({image_path})\n\n")

def delete_unused_images(used_images):
    """Delete all images in IMAGES_DIR that are not in the used_images set."""
    for image_file in os.listdir(IMAGES_DIR):
        if image_file not in used_images:
            image_path = os.path.join(IMAGES_DIR, image_file)
            os.remove(image_path)
            print(f"Deleted unused image: {image_file}")

def generate_site():
    """
    Main function to generate Markdown files for a multilingual site.
    Reads configuration, processes translations, and manages assets.
    """
    try:
        # Load the configuration file
        with open(CONFIG_FILE, "r", encoding="utf-8") as config_file:
            config = json.load(config_file)

        # Ensure config is a list and contains valid data
        if not isinstance(config, list) or not isinstance(config[0].get("pages", []), list):
            raise ValueError("Invalid config.json format. Ensure it has a list with 'pages' key.")

        # Load the pages from the configuration
        pages = config[0]["pages"]

        # Track used images
        used_images = set()

        # Loop through supported languages
        for language_file in os.listdir(TRANSLATION_DIR):
            if language_file.endswith(".json") and language_file != "config.json":
                language_code = language_file.split(".")[0]

                # Adjust language codes that do not match the official ISO
                mapping = {
                    "br": "pt-BR",
                    "cs": "zh",
                    "ct": "zh-TW",
                    "cz": "cs",
                    "ea": "es-BR",
                    "fc": "fr-CA",
                    "jp": "ja",
                    "kr": "ko",
                    "no": "nb"
                }
                language_code = mapping.get(language_code, language_code)

                # Create language-specific output directory
                language_output_dir = os.path.join(OUTPUT_DIR, language_code)
                os.makedirs(language_output_dir, exist_ok=True)

                # Load the language translation file
                with open(os.path.join(TRANSLATION_DIR, language_file), "r", encoding="utf-8") as lang_file:
                    translations = json.load(lang_file)
                    if not isinstance(translations, dict):
                        raise ValueError(f"Invalid {language_file} format. Ensure it is a JSON object.")

                # Generate Markdown files for each page with numbering
                for index, page in enumerate(pages, start=1):
                    # Translate titles and paragraphs
                    page_data = {
                        "raw_title": page["title"]["raw"],
                        "title": translations.get(page["title"]["raw"], page["title"]["raw"]),
                        "paragraphs": []
                    }

                    for paragraph in page.get("paragraphs", []):
                        translated_paragraph = {
                            "title": translations.get(paragraph["title"]["raw"], paragraph["title"]["raw"]) if paragraph["title"].get("raw") else "",
                            "text": translations.get(paragraph["text"]["raw"], paragraph["text"]["raw"]) if paragraph["text"].get("raw") else "",
                            "image": paragraph["image"]  # Images are the same across all languages
                        }
                        if paragraph["image"].get("filename"):
                            used_images.add(paragraph["image"]["filename"])
                        page_data["paragraphs"].append(translated_paragraph)

                    # Create the Markdown file
                    is_index = (index == 1)  # First page becomes index.md
                    create_markdown_file(language_code, page_data, language_output_dir, index, is_index=is_index)

        # Delete unused images
        delete_unused_images(used_images)

    except Exception as e:
        print(f"Error: {e}")
        raise

def ensure_list_rendering(file_path):
    """
    Ensures proper rendering in a Markdown file for MkDocs Material
    by adding a Markdown-compatible newline between a line ending with ':'
    and the following line if the following line contains text.

    Args:
        file_path (str): Path to the Markdown file.
    """
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.readlines()

    updated_content = []

    for i in range(len(content) - 1):
        updated_content.append(content[i])
        # If a line ends with ':' and the next line contains text, insert a blank line
        if content[i].strip().endswith(":") and content[i + 1].strip():
            updated_content.append("\n")

    # Append the last line
    updated_content.append(content[-1])

    new_content = "".join(updated_content)

    with open(file_path, "r", encoding="utf-8") as file:
        original_content = file.read()

    if original_content != new_content:
        print(f"Updating file: {file_path}")
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(new_content)
    else:
        print(f"No changes needed for: {file_path}")


def post_process_markdown_files(output_dir):
    """
    Post-processes all Markdown files in the output directory
    to ensure proper rendering in MkDocs Material.

    Args:
        output_dir (str): Directory containing the Markdown files.
    """
    for root, _, files in os.walk(output_dir):
        for file in files:
            if file.endswith(".md"):
                file_path = os.path.join(root, file)
                ensure_list_rendering(file_path)

if __name__ == "__main__":
    generate_site()
    # post_process_markdown_files(OUTPUT_DIR)