|
1 | | -# "THE BEER-WARE LICENSE" (Revision 42): |
2 | | -# <robin.hahling@gw-computing.net> wrote this file. As long as you retain this |
3 | | -# notice you can do whatever you want with this stuff. If we meet some day, and |
4 | | -# you think this stuff is worth it, you can buy me a beer in return. |
5 | | -# Robin Hahling |
| 1 | +# Enhanced remote markdown fetcher with caching and better error handling |
| 2 | +# Based on original by Robin Hahling |
6 | 3 |
|
7 | 4 | require 'net/http' |
| 5 | +require 'uri' |
| 6 | +require 'fileutils' |
| 7 | +require 'digest/md5' |
| 8 | +require 'timeout' |
8 | 9 |
|
9 | 10 | module Jekyll |
10 | | - # Remotely fetch a markdown file. |
11 | 11 | class RemoteMarkdownTag < Liquid::Tag |
| 12 | + # Configuration |
| 13 | + CACHE_DIR = '_remote_markdown_cache' |
| 14 | + CACHE_EXPIRY = 3600 * 24 * 7 # 7 days in seconds |
| 15 | + TIMEOUT_SECONDS = 10 |
| 16 | + MAX_RETRIES = 3 |
| 17 | + RETRY_DELAY = 1 |
| 18 | + |
| 19 | + # Markdown extensions |
| 20 | + MARKDOWN_EXTENSIONS = %w[.markdown .mkdown .mkdn .mkd .md .MD].freeze |
| 21 | + |
| 22 | + # User agent for requests |
| 23 | + USER_AGENT = 'AndroidUICollection-Jekyll/1.0' |
| 24 | + |
12 | 25 | def initialize(tag_name, text, tokens) |
13 | 26 | super |
14 | | - |
15 | | - puts "download >> #{text}" |
| 27 | + @url = text.strip |
| 28 | + validate_url |
| 29 | + |
| 30 | + # Initialize cache directory |
| 31 | + FileUtils.mkdir_p(CACHE_DIR) unless Dir.exist?(CACHE_DIR) |
| 32 | + |
| 33 | + # Fetch content |
| 34 | + @content = fetch_with_cache(@url) |
| 35 | + end |
| 36 | + |
| 37 | + def render(_context) |
| 38 | + @content |
| 39 | + end |
| 40 | + |
| 41 | + private |
| 42 | + |
| 43 | + def validate_url |
| 44 | + raise ArgumentError, "No URL provided" if @url.empty? |
| 45 | + |
| 46 | + uri = URI.parse(@url) |
| 47 | + unless %w[http https].include?(uri.scheme) |
| 48 | + raise ArgumentError, "Invalid protocol: #{uri.scheme}. Only HTTP(S) allowed." |
| 49 | + end |
| 50 | + |
| 51 | + unless MARKDOWN_EXTENSIONS.include?(File.extname(uri.path).downcase) |
| 52 | + raise ArgumentError, "Invalid file extension. Expected markdown file." |
| 53 | + end |
| 54 | + rescue URI::InvalidURIError => e |
| 55 | + raise ArgumentError, "Invalid URL: #{@url} - #{e.message}" |
| 56 | + end |
| 57 | + |
| 58 | + def fetch_with_cache(url) |
| 59 | + cache_key = Digest::MD5.hexdigest(url) |
| 60 | + cache_file = File.join(CACHE_DIR, "#{cache_key}.md") |
| 61 | + cache_meta_file = File.join(CACHE_DIR, "#{cache_key}.meta") |
| 62 | + |
| 63 | + # Check if cache exists and is valid |
| 64 | + if cache_valid?(cache_file, cache_meta_file) |
| 65 | + Jekyll.logger.info "RemoteMarkdown:", "Using cached content for #{url}" |
| 66 | + return File.read(cache_file, encoding: 'UTF-8') |
| 67 | + end |
| 68 | + |
| 69 | + # Fetch fresh content |
| 70 | + Jekyll.logger.info "RemoteMarkdown:", "Fetching #{url}" |
| 71 | + content = fetch_remote_content(url) |
| 72 | + |
| 73 | + # Save to cache |
| 74 | + if content && !content.start_with?('<!--') |
| 75 | + save_to_cache(cache_file, cache_meta_file, content) |
| 76 | + end |
16 | 77 |
|
17 | | - text.strip! |
18 | | - check_protocol(text) |
19 | | - uri = URI(text) |
| 78 | + content |
| 79 | + end |
20 | 80 |
|
21 | | - check_extension(uri.path) |
| 81 | + def cache_valid?(cache_file, cache_meta_file) |
| 82 | + return false unless File.exist?(cache_file) && File.exist?(cache_meta_file) |
| 83 | + |
| 84 | + # Check cache expiry |
| 85 | + metadata = JSON.parse(File.read(cache_meta_file)) |
| 86 | + cached_time = Time.at(metadata['timestamp']) |
| 87 | + |
| 88 | + Time.now - cached_time < CACHE_EXPIRY |
| 89 | + rescue JSON::ParserError |
| 90 | + false |
| 91 | + end |
22 | 92 |
|
| 93 | + def save_to_cache(cache_file, cache_meta_file, content) |
| 94 | + File.write(cache_file, content) |
| 95 | + File.write(cache_meta_file, JSON.generate({ |
| 96 | + 'timestamp' => Time.now.to_i, |
| 97 | + 'url' => @url |
| 98 | + })) |
| 99 | + rescue => e |
| 100 | + Jekyll.logger.warn "RemoteMarkdown:", "Failed to save cache: #{e.message}" |
| 101 | + end |
| 102 | + |
| 103 | + def fetch_remote_content(url) |
| 104 | + retries = 0 |
| 105 | + |
23 | 106 | begin |
24 | | - res = Net::HTTP.get_response(uri) |
25 | | - if res.is_a?(Net::HTTPSuccess) |
26 | | - md = res.body.force_encoding("UTF-8") |
27 | | - md = md.gsub! '!', '' |
28 | | - @content = md |
29 | | - else |
30 | | - puts "Warning: Failed to fetch #{text} - HTTP #{res.code}" |
31 | | - @content = "<!-- Failed to fetch remote markdown from #{text} -->" |
| 107 | + uri = URI.parse(url) |
| 108 | + |
| 109 | + Timeout.timeout(TIMEOUT_SECONDS) do |
| 110 | + http = Net::HTTP.new(uri.host, uri.port) |
| 111 | + http.use_ssl = (uri.scheme == 'https') |
| 112 | + http.open_timeout = TIMEOUT_SECONDS |
| 113 | + http.read_timeout = TIMEOUT_SECONDS |
| 114 | + |
| 115 | + request = Net::HTTP::Get.new(uri.request_uri) |
| 116 | + request['User-Agent'] = USER_AGENT |
| 117 | + request['Accept'] = 'text/plain, text/markdown' |
| 118 | + |
| 119 | + response = http.request(request) |
| 120 | + |
| 121 | + case response |
| 122 | + when Net::HTTPSuccess |
| 123 | + content = response.body.force_encoding('UTF-8') |
| 124 | + process_markdown_content(content) |
| 125 | + when Net::HTTPRedirection |
| 126 | + # Follow redirect (max 1 level) |
| 127 | + if response['location'] && retries == 0 |
| 128 | + Jekyll.logger.info "RemoteMarkdown:", "Following redirect to #{response['location']}" |
| 129 | + return fetch_remote_content(response['location']) |
| 130 | + else |
| 131 | + error_content("Too many redirects") |
| 132 | + end |
| 133 | + else |
| 134 | + error_content("HTTP #{response.code}: #{response.message}") |
| 135 | + end |
32 | 136 | end |
| 137 | + rescue Timeout::Error |
| 138 | + error_content("Request timeout after #{TIMEOUT_SECONDS} seconds") |
33 | 139 | rescue => e |
34 | | - puts "Warning: Failed to fetch #{text} - #{e.message}" |
35 | | - @content = "<!-- Failed to fetch remote markdown from #{text} -->" |
| 140 | + retries += 1 |
| 141 | + if retries < MAX_RETRIES |
| 142 | + Jekyll.logger.warn "RemoteMarkdown:", "Retry #{retries}/#{MAX_RETRIES} for #{url}" |
| 143 | + sleep(RETRY_DELAY * retries) |
| 144 | + retry |
| 145 | + else |
| 146 | + error_content("Failed after #{MAX_RETRIES} attempts: #{e.message}") |
| 147 | + end |
36 | 148 | end |
37 | | - |
38 | 149 | end |
39 | 150 |
|
40 | | - def render(_context) |
41 | | - @content |
42 | | - |
| 151 | + def process_markdown_content(content) |
| 152 | + # Remove images by default (as in original) |
| 153 | + # This prevents broken image links from external repos |
| 154 | + content = content.gsub(/!\[([^\]]*)\]\([^)]+\)/, '[\1]') |
| 155 | + |
| 156 | + # Remove any potentially problematic HTML |
| 157 | + content = content.gsub(/<script[^>]*>.*?<\/script>/mi, '') |
| 158 | + content = content.gsub(/<iframe[^>]*>.*?<\/iframe>/mi, '') |
| 159 | + |
| 160 | + # Ensure content ends with newline |
| 161 | + content.chomp + "\n" |
43 | 162 | end |
44 | 163 |
|
45 | | - private |
46 | | - |
47 | | - def check_protocol(text) |
48 | | - error_message = "remote_markdown: invalid URI given #{text}" |
49 | | - fail error_message unless text =~ URI.regexp(%w(http https ftp ftps)) |
| 164 | + def error_content(message) |
| 165 | + Jekyll.logger.error "RemoteMarkdown:", "#{message} for #{@url}" |
| 166 | + |
| 167 | + # Return user-friendly error message |
| 168 | + <<~ERROR |
| 169 | + <!-- RemoteMarkdown Error: #{message} --> |
| 170 | + <div class="remote-markdown-error"> |
| 171 | + <p><strong>Unable to load content from:</strong></p> |
| 172 | + <p><code>#{@url}</code></p> |
| 173 | + <p><em>#{message}</em></p> |
| 174 | + </div> |
| 175 | + ERROR |
50 | 176 | end |
51 | | - |
52 | | - def check_extension(path) |
53 | | - mdexts = %w(.markdown .mkdown .mkdn .mkd .md .MD) |
54 | | - error_message = "remote_markdown: URI file extension not in #{mdexts}" |
55 | | - fail error_message unless mdexts.include?(File.extname(path)) |
| 177 | + end |
| 178 | + |
| 179 | + # Cache cleanup task |
| 180 | + class RemoteMarkdownCacheCleanup < Generator |
| 181 | + safe true |
| 182 | + priority :low |
| 183 | + |
| 184 | + def generate(site) |
| 185 | + return unless Dir.exist?(RemoteMarkdownTag::CACHE_DIR) |
| 186 | + |
| 187 | + Dir.glob(File.join(RemoteMarkdownTag::CACHE_DIR, '*.meta')).each do |meta_file| |
| 188 | + begin |
| 189 | + metadata = JSON.parse(File.read(meta_file)) |
| 190 | + cached_time = Time.at(metadata['timestamp']) |
| 191 | + |
| 192 | + # Remove expired cache files |
| 193 | + if Time.now - cached_time > RemoteMarkdownTag::CACHE_EXPIRY |
| 194 | + cache_file = meta_file.sub('.meta', '.md') |
| 195 | + File.delete(meta_file) if File.exist?(meta_file) |
| 196 | + File.delete(cache_file) if File.exist?(cache_file) |
| 197 | + Jekyll.logger.info "RemoteMarkdown:", "Cleaned expired cache for #{metadata['url']}" |
| 198 | + end |
| 199 | + rescue => e |
| 200 | + Jekyll.logger.warn "RemoteMarkdown:", "Error cleaning cache: #{e.message}" |
| 201 | + end |
| 202 | + end |
56 | 203 | end |
57 | 204 | end |
58 | 205 | end |
|
0 commit comments