Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Image Diff for SVG files #14867

Merged
merged 17 commits into from
Jun 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions modules/avatar/avatar.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ import (
"image"
"image/color/palette"

// Enable PNG support:
_ "image/png"
_ "image/gif" // for processing gif images
_ "image/jpeg" // for processing jpeg images
_ "image/png" // for processing png images

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
Expand Down
68 changes: 0 additions & 68 deletions modules/base/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ import (
"encoding/hex"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
Expand All @@ -30,15 +28,6 @@ import (
"github.com/dustin/go-humanize"
)

// Use at most this many bytes to determine Content Type.
const sniffLen = 512

// SVGMimeType MIME type of SVG images.
const SVGMimeType = "image/svg+xml"

var svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
var svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)

// EncodeMD5 encodes string to md5 hex value.
func EncodeMD5(str string) string {
m := md5.New()
Expand Down Expand Up @@ -276,63 +265,6 @@ func IsLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}

// DetectContentType extends http.DetectContentType with more content types.
func DetectContentType(data []byte) string {
ct := http.DetectContentType(data)

if len(data) > sniffLen {
data = data[:sniffLen]
}

if setting.UI.SVG.Enabled &&
((strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data)) {

// SVG is unsupported. https://github.com/golang/go/issues/15888
return SVGMimeType
}
return ct
}

// IsRepresentableAsText returns true if file content can be represented as
// plain text or is empty.
func IsRepresentableAsText(data []byte) bool {
return IsTextFile(data) || IsSVGImageFile(data)
}

// IsTextFile returns true if file content format is plain text or empty.
func IsTextFile(data []byte) bool {
if len(data) == 0 {
return true
}
return strings.Contains(DetectContentType(data), "text/")
}

// IsImageFile detects if data is an image format
func IsImageFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "image/")
}

// IsSVGImageFile detects if data is an SVG image format
func IsSVGImageFile(data []byte) bool {
return strings.Contains(DetectContentType(data), SVGMimeType)
}

// IsPDFFile detects if data is a pdf format
func IsPDFFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "application/pdf")
}

// IsVideoFile detects if data is an video format
func IsVideoFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "video/")
}

// IsAudioFile detects if data is an video format
func IsAudioFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "audio/")
}

// EntryIcon returns the octicon class for displaying files/directories
func EntryIcon(entry *git.TreeEntry) string {
switch {
Expand Down
92 changes: 0 additions & 92 deletions modules/base/tool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package base

import (
"encoding/base64"
"os"
"testing"
"time"
Expand Down Expand Up @@ -246,97 +245,6 @@ func TestIsLetter(t *testing.T) {
assert.False(t, IsLetter(0x93))
}

func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)))
// Longer than sniffLen detects something else.
assert.Equal(t, "text/plain; charset=utf-8", DetectContentType([]byte(`<!--
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment --><svg></svg>`)))
}

// IsRepresentableAsText

func TestIsTextFile(t *testing.T) {
assert.True(t, IsTextFile([]byte{}))
assert.True(t, IsTextFile([]byte("lorem ipsum")))
}

func TestIsImageFile(t *testing.T) {
png, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAG0lEQVQYlWN4+vTpf3SMDTAMBYXYBLFpHgoKAeiOf0SGE9kbAAAAAElFTkSuQmCC")
assert.True(t, IsImageFile(png))
assert.False(t, IsImageFile([]byte("plain text")))
}

func TestIsSVGImageFile(t *testing.T) {
assert.True(t, IsSVGImageFile([]byte("<svg></svg>")))
assert.True(t, IsSVGImageFile([]byte(" <svg></svg>")))
assert.True(t, IsSVGImageFile([]byte(`<svg width="100"></svg>`)))
assert.True(t, IsSVGImageFile([]byte("<svg/>")))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiline
Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multline
Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Multline
Comment -->
<svg></svg>`)))
assert.False(t, IsSVGImageFile([]byte{}))
assert.False(t, IsSVGImageFile([]byte("svg")))
assert.False(t, IsSVGImageFile([]byte("<svgfoo></svgfoo>")))
assert.False(t, IsSVGImageFile([]byte("text<svg></svg>")))
assert.False(t, IsSVGImageFile([]byte("<html><body><svg></svg></body></html>")))
assert.False(t, IsSVGImageFile([]byte(`<script>"<svg></svg>"</script>`)))
assert.False(t, IsSVGImageFile([]byte(`<!-- <svg></svg> inside comment -->
<foo></foo>`)))
assert.False(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)))
}

func TestIsPDFFile(t *testing.T) {
pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
assert.True(t, IsPDFFile(pdf))
assert.False(t, IsPDFFile([]byte("plain text")))
}

func TestIsVideoFile(t *testing.T) {
mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
assert.True(t, IsVideoFile(mp4))
assert.False(t, IsVideoFile([]byte("plain text")))
}

func TestIsAudioFile(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, IsAudioFile(mp3))
assert.False(t, IsAudioFile([]byte("plain text")))
}

// TODO: Test EntryIcon

func TestSetupGiteaRoot(t *testing.T) {
Expand Down
13 changes: 13 additions & 0 deletions modules/git/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"encoding/base64"
"io"
"io/ioutil"

"code.gitea.io/gitea/modules/typesniffer"
)

// This file contains common functions between the gogit and !gogit variants for git Blobs
Expand Down Expand Up @@ -82,3 +84,14 @@ func (b *Blob) GetBlobContentBase64() (string, error) {
}
return string(out), nil
}

// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()

return typesniffer.DetectContentTypeFromReader(r)
}
70 changes: 0 additions & 70 deletions modules/git/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@ import (
"container/list"
"errors"
"fmt"
"image"
"image/color"
_ "image/gif" // for processing gif images
_ "image/jpeg" // for processing jpeg images
_ "image/png" // for processing png images
"io"
"net/http"
"os/exec"
"strconv"
"strings"
Expand Down Expand Up @@ -81,70 +75,6 @@ func (c *Commit) ParentCount() int {
return len(c.Parents)
}

func isImageFile(data []byte) (string, bool) {
contentType := http.DetectContentType(data)
if strings.Contains(contentType, "image/") {
return contentType, true
}
return contentType, false
}

// IsImageFile is a file image type
func (c *Commit) IsImageFile(name string) bool {
blob, err := c.GetBlobByPath(name)
if err != nil {
return false
}

dataRc, err := blob.DataAsync()
if err != nil {
return false
}
defer dataRc.Close()
buf := make([]byte, 1024)
n, _ := dataRc.Read(buf)
buf = buf[:n]
_, isImage := isImageFile(buf)
return isImage
}

// ImageMetaData represents metadata of an image file
type ImageMetaData struct {
ColorModel color.Model
Width int
Height int
ByteSize int64
}

// ImageInfo returns information about the dimensions of an image
func (c *Commit) ImageInfo(name string) (*ImageMetaData, error) {
if !c.IsImageFile(name) {
return nil, nil
}

blob, err := c.GetBlobByPath(name)
if err != nil {
return nil, err
}
reader, err := blob.DataAsync()
if err != nil {
return nil, err
}
defer reader.Close()
config, _, err := image.DecodeConfig(reader)
if err != nil {
return nil, err
}

metadata := ImageMetaData{
ColorModel: config.ColorModel,
Width: config.Width,
Height: config.Height,
ByteSize: blob.Size(),
}
return &metadata, nil
}

// GetCommitByPath return the commit of relative path object.
func (c *Commit) GetCommitByPath(relpath string) (*Commit, error) {
return c.repo.getCommitByPathWithID(c.ID, relpath)
Expand Down
4 changes: 2 additions & 2 deletions modules/indexer/code/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"

"github.com/blevesearch/bleve/v2"
Expand Down Expand Up @@ -211,7 +211,7 @@ func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
return nil
}
Expand Down
4 changes: 2 additions & 2 deletions modules/indexer/code/elastic_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"

"github.com/go-enry/go-enry/v2"
jsoniter "github.com/json-iterator/go"
Expand Down Expand Up @@ -210,7 +210,7 @@ func (b *ElasticSearchIndexer) addUpdate(batchWriter git.WriteCloserError, batch
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
if err != nil {
return nil, err
} else if !base.IsTextFile(fileContents) {
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
return nil, nil
}
Expand Down
Loading