forked from antony66/gmail-mbox2maildir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mbox_split.py
executable file
·84 lines (70 loc) · 2.71 KB
/
mbox_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python3
# Adapted from:
# http://wboptimum.com/splitting-gmail-mbox-by-label/
import getopt
import mailbox
import os
import sys
from email.header import decode_header
from email.errors import HeaderParseError
def decode_rfc2822(header_value):
"""Returns the value of the rfc2822 decoded header, or the header_value as-is if it's not encoded."""
result = []
for binary_value, charset in decode_header(header_value):
decoded_value = None
if isinstance(binary_value, str):
result.append(binary_value)
continue
if charset is not None:
try:
decoded_value = binary_value.decode(charset, errors='ignore')
except Exception as e:
pass
if decoded_value is None:
try:
decoded_value = binary_value.decode('utf8', errors='ignore')
except Exception as e:
decoded_value = 'HEX({})'.format(binary_value.hex())
result.append(decoded_value)
return ''.join(result)
def main(argv):
in_mbox = "inbox.mbox"
prefix = "split_"
try:
opts, args = getopt.getopt(argv, "i:p:", ["infile=", "prefix="])
except getopt.GetoptError:
print("Usage:", sys.argv[0], "-i <input_file.mbox> -p <prefix>")
sys.exit(2)
for opt, arg in opts:
if opt in ("-i", "--infile"):
in_mbox = arg
elif opt in ("-p", "--prefix"):
prefix = arg
print("Processing file - " + in_mbox + " with prefix = " + prefix)
boxes = {
"inbox": mailbox.mbox(prefix+"Inbox.mbox", None, True),
"sent": mailbox.mbox(prefix+"Sent.mbox", None, True),
"archive": mailbox.mbox(prefix+"Archive.mbox", None, True),
}
for message in mailbox.mbox(in_mbox):
target = "archive"
gmail_labels = message["X-Gmail-Labels"] or "" # Could possibly be None.
if gmail_labels != "":
gmail_labels = decode_rfc2822(gmail_labels).lower()
if "inbox" in gmail_labels:
target = "inbox"
elif "sent" in gmail_labels:
target = "sent"
else:
for label in gmail_labels.split(','):
if label != "important" and label != "unread" and label != "starred" and label != "newsletters":
target = prefix + label.title().replace(os.pathsep, '.') + ".mbox"
if target not in boxes:
boxes[target] = mailbox.mbox(target, None, True)
break
try:
boxes[target].add(message)
except HeaderParseError as e:
pass # there's nothing we can do, so just skip this message
if __name__ == "__main__":
main(sys.argv[1:])