1
- import glob
2
- import sys
3
- import os .path
4
-
5
1
from enum import Enum
2
+ from itertools import chain
6
3
from tempfile import NamedTemporaryFile
7
- from shutil import copy
8
- from os import unlink
4
+
5
+ import argparse
6
+ import glob
7
+ import os
8
+ import os .path
9
+ import re
10
+ import shutil
11
+ import sys
9
12
10
13
class Codeblock (Enum ):
11
14
IN = 1
12
15
IN_WITH_LANG = 2
13
16
OUT = 3
14
17
18
+ INTLINE_PATTERN = re .compile ("```+[^`]+`" )
15
19
DEFAULT_LANG = "txt"
16
- EDIT_FILES = True
17
20
18
21
try :
19
22
from magika import Magika
20
23
magika = Magika ()
21
24
def guess_language (code ):
22
25
codebytes = code .encode (encoding = "utf-8" )
23
26
lang = magika .identify_bytes (codebytes ).prediction .output .label
24
- if lang == "unknown" : return DEFAULT_LANG
25
- return lang
27
+ return lang if lang != "unknown" else DEFAULT_LANG
26
28
except ImportError :
27
29
try :
28
30
from guesslang import Guess
@@ -34,61 +36,86 @@ def guess_language(code):
34
36
print ("Magika or Guesslang is required to run this script. Install one of them to proceed!" )
35
37
sys .exit (1 )
36
38
37
- if len (sys .argv ) != 2 :
38
- print ("Usage: python codeblocks.py dir" )
39
- sys .exit (1 )
39
+ def add_language (files , edit_files ):
40
+ def is_made_of_char (str , char ):
41
+ return len (str ) == str .count (char )
42
+ # newline="" is important. See below.
43
+ temp = NamedTemporaryFile ("w+" , encoding = "utf-8" , newline = "" , delete = False )
44
+ for file in files :
45
+ blockstate = Codeblock .OUT
46
+ code = []
47
+ # The argument newline="" is important here and in the NamedTemporaryFile() call above.
48
+ # We don't want to change line endings in a file which we're editing.
49
+ with open (file , encoding = "utf-8" , newline = "" ) as f :
50
+ for linenum , line in enumerate (f , 1 ):
51
+ stripped = line .strip ()
52
+ if stripped .startswith ("```" ) and not INTLINE_PATTERN .match (stripped ) \
53
+ and (blockstate == Codeblock .OUT or is_made_of_char (stripped , "`" ) and len (stripped ) >= backticks_num ):
54
+ if blockstate == Codeblock .IN_WITH_LANG :
55
+ blockstate = Codeblock .OUT
56
+ if edit_files : temp .write (line )
57
+ elif blockstate == Codeblock .IN :
58
+ blockstate = Codeblock .OUT
59
+ code_str = "\n " .join (line .removeprefix (indent ).rstrip () for line in code ) + "\n "
60
+ lang = guess_language (code_str ) if code_str else ""
61
+ if edit_files :
62
+ # When editing files, txt is not very useful edit.
63
+ if lang == "txt" : lang = ""
64
+ fence_start = "`" * backticks_num
65
+ temp .write (fence .replace (fence_start , fence_start + lang ))
66
+ temp .writelines (code )
67
+ temp .write (line )
68
+ elif lang :
69
+ print (f"{ file } :{ linenum - len (code ) - 1 } " )
70
+ print (("`" * backticks_num ) + lang + "\n " + code_str + stripped )
71
+ print ()
72
+ code = []
73
+ elif is_made_of_char (stripped , "`" ):
74
+ backticks_num = len (stripped )
75
+ blockstate = Codeblock .IN
76
+ count = len (line ) - len (line .lstrip ())
77
+ indent = line [:count ]
78
+ fence = line
79
+ else :
80
+ backticks_num = stripped .count ("`" )
81
+ blockstate = Codeblock .IN_WITH_LANG
82
+ if edit_files : temp .write (line )
83
+ elif blockstate == Codeblock .IN :
84
+ code .append (line )
85
+ elif edit_files :
86
+ temp .write (line )
87
+ if edit_files :
88
+ if code :
89
+ # non-terminated fence
90
+ temp .write (fence )
91
+ temp .writelines (code )
92
+ temp .flush ()
93
+ shutil .copy (temp .name , file )
94
+ temp .seek (0 )
95
+ temp .truncate (0 )
96
+ temp .close ()
97
+ os .unlink (temp .name )
40
98
41
- files = glob .iglob ("**/*.md" , root_dir = sys .argv [1 ], recursive = True )
42
- # newline="" is important. See below.
43
- temp = NamedTemporaryFile ("w+" , encoding = "utf-8" , newline = "" , delete = False )
99
+ def main ():
100
+ parser = argparse .ArgumentParser (description = "Detect and insert the language in the Markdown code blocks." )
101
+ parser .add_argument ("--edit" , action = "store_true" , help = "Edit files by inserting the language" )
102
+ parser .add_argument ("path" , nargs = "+" , help = "Paths to process" )
103
+ args = parser .parse_args ()
44
104
45
- for file in files :
46
- fullpath = os .path .join (sys .argv [1 ], file )
47
- blockstate = Codeblock .OUT
48
- code = []
49
- fence = ""
50
- # The argument newline="" is important here and in the NamedTemporaryFile() call above.
51
- # We don't want to change line endings in a file which we're editing.
52
- for linenum , line in enumerate (open (fullpath , encoding = "utf-8" , newline = "" ), 1 ):
53
- if line .strip ().startswith ("```" ):
54
- if blockstate == Codeblock .IN_WITH_LANG :
55
- blockstate = Codeblock .OUT
56
- if EDIT_FILES : temp .write (line )
57
- elif blockstate == Codeblock .IN :
58
- blockstate = Codeblock .OUT
59
- indent = len (fence ) - len (fence .lstrip ())
60
- code_str = "\n " .join (line [indent :].strip () for line in code ) + "\n "
61
- lang = guess_language (code_str ) if code_str else ""
62
- if EDIT_FILES :
63
- # When editing files, txt is not very useful edit.
64
- if lang == "txt" : lang = ""
65
- temp .write (fence .replace ("```" , f"```{ lang } " ))
66
- temp .writelines (code )
67
- temp .write (line )
68
- elif lang :
69
- print (f"{ fullpath } :{ linenum - len (code ) - 1 } " )
70
- print (f"```{ lang } \n " + code_str + "```" )
71
- print ()
72
- code = []
73
- elif line .strip () == "```" :
74
- blockstate = Codeblock .IN
75
- fence = line
76
- else :
77
- blockstate = Codeblock .IN_WITH_LANG
78
- if EDIT_FILES : temp .write (line )
79
- elif blockstate == Codeblock .IN :
80
- code .append (line )
81
- elif EDIT_FILES :
82
- temp .write (line )
83
- if EDIT_FILES :
84
- if code :
85
- # non-terminated fence
86
- temp .write (fence )
87
- temp .writelines (code )
88
- temp .flush ()
89
- copy (temp .name , fullpath )
90
- temp .seek (0 )
91
- temp .truncate (0 )
105
+ files = []
106
+ iters = []
107
+
108
+ for path in args .path :
109
+ if os .path .isfile (path ):
110
+ files .append (path )
111
+ elif os .path .isdir (path ):
112
+ iters .append (glob .iglob (os .path .join (path , "**" , "*.md" ), recursive = True ))
113
+ else :
114
+ print (f"Path doesn't exist: \" { path } \" " )
115
+ sys .exit (1 )
116
+
117
+ iters .append (iter (files ))
118
+ add_language (chain .from_iterable (iters ), args .edit )
92
119
93
- temp . close ()
94
- unlink ( temp . name )
120
+ if __name__ == "__main__" :
121
+ main ( )
0 commit comments