commit 6b875fc2bf1f261be41b68dfd9d7f94aa941710b Author: bsncubed Date: Sat Mar 30 16:01:47 2024 +1100 File Added diff --git a/book-to-static.php b/book-to-static.php new file mode 100644 index 0000000..c72df43 --- /dev/null +++ b/book-to-static.php @@ -0,0 +1,263 @@ +#!/usr/bin/env php + $book['id']]); +$pages = getAllOfAtListEndpoint("api/pages", ['filter[book_id]' => $book['id']]); + +// Get the full content for each page +foreach ($pages as $index => $page) { + $pages[$index] = apiGetJson("api/pages/{$page['id']}"); +} + +// Create the image output directory +if (!is_dir($outDir . "/images")) { + mkdir($outDir . "/images", 0777, true); +} + +// Find the pages that are not within a chapter +$directBookPages = array_filter($pages, function($page) { + return empty($page['chapter_id']); +}); + +// Create book index file +$bookIndex = getBookHtmlOutput($book, $chapters, $directBookPages); +file_put_contents($outDir . "/index.html", $bookIndex); + +// Create a HTML file for each chapter +// in addition to each page within those chapters +foreach ($chapters as $chapter) { + $childPages = array_filter($pages, function($page) use ($chapter) { + return $page['chapter_id'] == $chapter['id']; + }); + $chapterPage = getChapterHtmlOutput($chapter, $childPages); + file_put_contents($outDir . "/chapter-{$chapter['slug']}.html", $chapterPage); + + foreach ($childPages as $childPage) { + $childPageContent = getPageHtmlOutput($childPage, $chapter); + $childPageContent = extractImagesFromHtml($childPageContent); + file_put_contents($outDir . "/page-{$childPage['slug']}.html", $childPageContent); + } +} + +// Create a file for each direct child book page +foreach ($directBookPages as $directPage) { + $directPageContent = getPageHtmlOutput($directPage, null); + $directPageContent = extractImagesFromHtml($directPageContent); + file_put_contents($outDir . "/page-{$directPage['slug']}.html", $directPageContent); +} + +/** + * Scan the given HTML for image URL's and extract those images + * to save them locally and update the HTML references to point + * to the local files. + */ +function extractImagesFromHtml(string $html): string { + global $outDir; + static $savedImages = []; + $matches = []; + preg_match_all('//i', $html, $matches); + foreach (array_unique($matches[1] ?? []) as $url) { + $image = getImageFile($url); + if ($image === false) { + continue; + } + + $name = basename($url); + $fileName = $name; + $count = 1; + while (isset($savedImages[$fileName])) { + $fileName = $count . '-' . $name; + $count++; + } + + $savedImages[$fileName] = true; + file_put_contents($outDir . "/images/" . $fileName, $image); + $html = str_replace($url, "./images/" . $fileName, $html); + } + return $html; +} + +/** + * Get an image file from the given URL. + * Checks if it's hosted on the same instance as the API we're + * using so that auth details can be provided for BookStack images + * in case local_secure images are in use. + */ +function getImageFile(string $url): string { + global $apiUrl; + if (strpos(strtolower($url), strtolower($apiUrl)) === 0) { + $url = substr($url, strlen($apiUrl)); + return apiGet($url); + } + return @file_get_contents($url); +} + +/** + * Get the HTML representation of a book. + */ +function getBookHtmlOutput(array $book, array $chapters, array $pages): string { + $content = ""; + $content .= "

{$book['name']}

"; + $content .= "

{$book['description']}

"; + $content .= "
"; + if (count($chapters) > 0) { + $content .= "

Chapters

"; + } + if (count($pages) > 0) { + $content .= "

Pages

"; + } + return $content; +} + +/** + * Get the HTML representation of a chapter. + */ +function getChapterHtmlOutput(array $chapter, array $pages): string { + $content = ""; + $content .= "

Back to book

"; + $content .= "

{$chapter['name']}

"; + $content .= "

{$chapter['description']}

"; + $content .= "
"; + if (count($pages) > 0) { + $content .= "

Pages

"; + } + return $content; +} + +/** + * Get the HTML representation of a page. + */ +function getPageHtmlOutput(array $page, ?array $parentChapter): string { + $content = ""; + if (is_null($parentChapter)) { + $content .= "

Back to book

"; + } else { + $content .= "

Back to chapter

"; + } + $content .= "

{$page['name']}

"; + $content .= "
{$page['html']}
"; + + return $content; +} + +/** + * Get a single book by the slug or return null if not exists. + */ +function getBookBySlug(string $slug): ?array { + $endpoint = 'api/books?' . http_build_query(['filter[slug]' => $slug]); + $resp = apiGetJson($endpoint); + $book = $resp['data'][0] ?? null; + + if (!is_null($book)) { + $book = apiGetJson("api/books/{$book['id']}") ?? null; + } + return $book; +} + +/** + * Get all books from the system API. + */ +function getAllOfAtListEndpoint(string $endpoint, array $params): array { + $count = 100; + $offset = 0; + $total = 0; + $all = []; + + do { + $endpoint = $endpoint . '?' . http_build_query(array_merge($params, ['count' => $count, 'offset' => $offset])); + $resp = apiGetJson($endpoint); + + $total = $resp['total'] ?? 0; + $new = $resp['data'] ?? []; + array_push($all, ...$new); + $offset += $count; + } while ($offset < $total); + + return $all; +} + +/** + * Make a simple GET HTTP request to the API. + */ +function apiGet(string $endpoint): string { + global $apiUrl, $clientId, $clientSecret; + $url = rtrim($apiUrl, '/') . '/' . ltrim($endpoint, '/'); + $opts = ['http' => ['header' => "Authorization: Token {$clientId}:{$clientSecret}"]]; + $context = stream_context_create($opts); + return @file_get_contents($url, false, $context); +} + +/** + * Make a simple GET HTTP request to the API & + * decode the JSON response to an array. + */ +function apiGetJson(string $endpoint): array { + $data = apiGet($endpoint); + return json_decode($data, true); +} + +/** + * DEBUG: Dump out the given variables and exit. + */ +function dd(...$args) { + foreach ($args as $arg) { + var_dump($arg); + } + exit(1); +} + +/** + * Alert of an error then exit the script. + */ +function errorOut(string $text) { + echo "ERROR: " . $text; + exit(1); +} \ No newline at end of file