Skip to content

Commit

Permalink
decode TransferEncoding, Charset, Multipart functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Mizuho32 committed Oct 19, 2024
1 parent 45cca10 commit 9768cb3
Showing 1 changed file with 117 additions and 30 deletions.
147 changes: 117 additions & 30 deletions store/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,40 +215,14 @@ func decodeRFC2047(encoded string) (string, error) {
var decodedStrings []string
for _, match := range matches {
charset := match[1]
encodingName := strings.ToUpper(match[2])
encodingName := match[2]
encodedText := match[3]

// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
switch encodingName {
case "B": // Base64
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return encoded, fmt.Errorf("Base64 decode error: %w", err)
}
case "Q": // Quoted-Printable
decodedBytes, err = io.ReadAll(quotedprintable.NewReader(strings.NewReader(encodedText)))
if err != nil {
return "", fmt.Errorf("Quoted-Printable decode error: %w", err)
}
default:
return encoded, fmt.Errorf("not supported encoding: %s", encodingName)
}

// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
default:
return encoded, fmt.Errorf("not supported charset: %s", charset)
reader, err := decodeTransferEncodeAndCharset(encodingName, charset, encodedText)
if err != nil {
return encoded, err
}

// Decode with charset
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
decodedText, err := io.ReadAll(reader)
if err != nil {
return encoded, err
Expand All @@ -260,3 +234,116 @@ func decodeRFC2047(encoded string) (string, error) {
// Concat multiple strings
return strings.Join(decodedStrings, ""), nil
}

func decodeTransferEncodeAndCharset(encodingName string, charset string, encodedText string) (io.Reader, error) {
decodedString, err := decodeTransferEncode(encodingName, encodedText)
if len(decodedString) == 0 && err != nil {
return nil, err
}

// try to decode even if unknown encoding
reader, err := decodeCharset(charset, decodedString)
if err != nil {
return nil, err
}
return reader, nil
}

// Decode Base64 or Quoted Printable
func decodeTransferEncode(encodingName string, encodedText string) (string, error) {
// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
switch strings.ToUpper(encodingName) {
case "B": // Base64
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return string(decodedBytes), fmt.Errorf("Base64 decode error: %w", err)
}
case "Q": // Quoted-Printable
decodedBytes, err = io.ReadAll(quotedprintable.NewReader(strings.NewReader(encodedText)))
if err != nil {
return string(decodedBytes), fmt.Errorf("Quoted-Printable decode error: %w", err)
}
default:
return encodedText, fmt.Errorf("not supported encoding: %s", encodingName)
}
return string(decodedBytes), nil
}

func decodeCharset(charset string, decodedString string) (io.Reader, error) {
// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
case "us-ascii":
return strings.NewReader(decodedString), nil
default:
return nil, fmt.Errorf("not supported charset: %s", charset)
}

// Decode with charset
reader := transform.NewReader(strings.NewReader(decodedString), enc.NewDecoder())
return reader, nil
}

func decodeMultiPart(body string, boundary string) (io.Reader, error) {
encPattern := `Content-Transfer-Encoding:\s+(\w+)`
charsetPattern := `charset="((?:\w|-)+)"`

// Regexp for MIME encode type & Charset match
encRe, err := regexp.Compile(encPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex:%v", err)
}
charsetRe, err := regexp.Compile(charsetPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex:%v", err)
}

// Split by boundary
parts := strings.Split(body, boundary)
var readers []io.Reader

// Make decoded io.Readers for each part
for _, part := range parts {
part = strings.TrimSpace(part)
if len(part) == 0 {
continue
}

// Extract MIME header and body
headerBody := strings.SplitN(part, "\r\n\r\n", 2)
if len(headerBody) < 2 {
// retry
headerBody = strings.SplitN(part, "\n\n", 2)
if len(headerBody) < 2 {
continue
}
}

mimeHeader := headerBody[0]
encodedBody := headerBody[1]

// Find encode types
encMatches := encRe.FindStringSubmatch(mimeHeader)
charsetMatches := charsetRe.FindStringSubmatch(mimeHeader)

// Decode
if len(encMatches) > 1 && len(charsetMatches) > 1 {
reader, err := decodeTransferEncodeAndCharset(encMatches[1][0:1], charsetMatches[1], encodedBody)
if err != nil {
return nil, err
}
readers = append(readers, reader)

} else {
return nil, fmt.Errorf("failed to match encoding and charset in:\n%s", mimeHeader)
}
}

return io.MultiReader(readers...), nil
}

0 comments on commit 9768cb3

Please sign in to comment.