Skip to content

Replace SplitStringAtByteN with EllipsisStringWholeWord #31822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion models/actions/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/json"
api "code.gitea.io/gitea/modules/structs"
Expand Down Expand Up @@ -305,7 +306,7 @@ func InsertRun(ctx context.Context, run *ActionRun, jobs []*jobparser.SingleWork
} else {
hasWaiting = true
}
job.Name, _ = util.SplitStringAtByteN(job.Name, 255)
job.Name = base.EllipsisStringWholeWord(job.Name, 255)
runJobs = append(runJobs, &ActionRunJob{
RunID: run.ID,
RepoID: run.RepoID,
Expand Down
4 changes: 2 additions & 2 deletions models/actions/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
auth_model "code.gitea.io/gitea/models/auth"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unit"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
Expand Down Expand Up @@ -298,9 +299,8 @@ func CreateTaskForRunner(ctx context.Context, runner *ActionRunner) (*ActionTask
if len(workflowJob.Steps) > 0 {
steps := make([]*ActionTaskStep, len(workflowJob.Steps))
for i, v := range workflowJob.Steps {
name, _ := util.SplitStringAtByteN(v.String(), 255)
steps[i] = &ActionTaskStep{
Name: name,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above

Name: base.EllipsisStringWholeWord(v.String(), 255),
TaskID: task.ID,
Index: int64(i),
RepoID: task.RepoID,
Expand Down
23 changes: 23 additions & 0 deletions modules/base/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"

"code.gitea.io/gitea/modules/git"
Expand Down Expand Up @@ -131,6 +132,28 @@ func EllipsisString(str string, length int) string {
return string([]rune(str)[:length-3]) + "..."
}

// EllipsisStringWholeWord returns a truncated short string with … appended if the input is larger then the limit.
// If the input contains spaces the string is truncated at the last space before reaching the length limit.
// If the input does not contain a space before reaching the length limit, the input is truncated in the middle of a word.
func EllipsisStringWholeWord(str string, maxLength int) string {
lastSpace := -1
length := 0
for i, r := range str {
if unicode.IsSpace(r) {
lastSpace = i
}
length++
if length > maxLength {
if lastSpace != -1 {
return str[:lastSpace] + "…"
}
return str[:i] + "…"
}
}

return str
}

// TruncateString returns a truncated string with given limit,
// it returns input string if length is not reached limit.
func TruncateString(str string, limit int) string {
Expand Down
32 changes: 32 additions & 0 deletions modules/base/tool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,38 @@ func TestEllipsisString(t *testing.T) {
assert.Equal(t, "测试文本一二三四", EllipsisString("测试文本一二三四", 10))
}

func TestEllipsisStringWholeWord(t *testing.T) {
assert.Equal(t, "…", EllipsisStringWholeWord("foobar", 0))
assert.Equal(t, "f…", EllipsisStringWholeWord("foobar", 1))
assert.Equal(t, "fo…", EllipsisStringWholeWord("foobar", 2))
assert.Equal(t, "foo…", EllipsisStringWholeWord("foobar", 3))
assert.Equal(t, "foob…", EllipsisStringWholeWord("foobar", 4))
assert.Equal(t, "fooba…", EllipsisStringWholeWord("foobar", 5))
assert.Equal(t, "foobar", EllipsisStringWholeWord("foobar", 6))
assert.Equal(t, "foobar", EllipsisStringWholeWord("foobar", 10))

assert.Equal(t, "…", EllipsisStringWholeWord("foo bar", 0))
assert.Equal(t, "f…", EllipsisStringWholeWord("foo bar", 1))
assert.Equal(t, "fo…", EllipsisStringWholeWord("foo bar", 2))
assert.Equal(t, "foo…", EllipsisStringWholeWord("foo bar", 3))
assert.Equal(t, "foo…", EllipsisStringWholeWord("foo bar", 4))
assert.Equal(t, "foo…", EllipsisStringWholeWord("foo bar", 5))
assert.Equal(t, "foo…", EllipsisStringWholeWord("foo bar", 6))
assert.Equal(t, "foo bar", EllipsisStringWholeWord("foo bar", 7))
assert.Equal(t, "foo bar", EllipsisStringWholeWord("foo bar", 10))

assert.Equal(t, "foo bar…", EllipsisStringWholeWord("foo bar foo", 7))
assert.Equal(t, "foo bar…", EllipsisStringWholeWord("foo bar foo", 8))
assert.Equal(t, "foo bar…", EllipsisStringWholeWord("foo bar foo", 9))
assert.Equal(t, "foo bar…", EllipsisStringWholeWord("foo bar foo", 10))
assert.Equal(t, "foo bar foo", EllipsisStringWholeWord("foo bar foo", 11))

assert.Equal(t, "测试文本…", EllipsisStringWholeWord("测试文本一二三四", 4))
Copy link
Contributor

@wxiaoguang wxiaoguang Aug 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The truncation should be done at "byte" level, but not "rune" level.

The problem of "rune" level is that CJK chars will be too much longer than expected.

assert.Equal(t, "foo…", EllipsisStringWholeWord("foo bar", 4))
assert.Equal(t, "测试文本…", EllipsisStringWholeWord("测试文本一二三四", 4))

See the difference. That's why the old function is "SplitStringAtByteN"

(History: #17928 (review) )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it was intended to be at rune level because EllipsisString works on rune level too. Didn't thought about the database restriction here because I would expect a hard cut.

Copy link
Contributor

@wxiaoguang wxiaoguang Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually there is usually no "database restriction", because VARCHAR(10) could store 10 CJK runes or emoji runes (say, 30 - 40bytes) if its charset/collation is UTF-8 based.

I think the real problem is that CJK chars render much longer than ASCII chars with the same rune length on the UI ......... for example, assume one ASCII char's width is about W.

  • 9 CJK runes: about W * 9 * 2 = W * 18
  • 9 bytes CJK chars: W * 3 * 2 = W * 6
  • 9 bytes ASCII chars: W * 9

So "9 CJK runes" exceeds the width limitation too much on the UI, while usually "9 bytes CJK chars" won't exceed the expected "9 bytes ASCII" width.


(I don't mean blocking for this change, just share the backgrounds, maybe it's worth to document the details no matter what method is finally chosen)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> Refactor "string truncate" #32984

assert.Equal(t, "测试文本…", EllipsisStringWholeWord("测试文本 一二三四", 6)) // contains special unicode space U+2008
assert.Equal(t, "测试文本一二…", EllipsisStringWholeWord("测试文本一二三四", 6))
assert.Equal(t, "测试文本一二三四", EllipsisStringWholeWord("测试文本一二三四", 10))
}

func TestTruncateString(t *testing.T) {
assert.Equal(t, "", TruncateString("foobar", 0))
assert.Equal(t, "f", TruncateString("foobar", 1))
Expand Down
4 changes: 2 additions & 2 deletions modules/issue/template/unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ import (
"path"
"strconv"

"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/markup/markdown"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"

"gopkg.in/yaml.v3"
)
Expand Down Expand Up @@ -109,7 +109,7 @@ func unmarshal(filename string, content []byte) (*api.IssueTemplate, error) {

it.Content = string(content)
it.Name = path.Base(it.FileName) // paths in Git are always '/' separated - do not use filepath!
it.About, _ = util.SplitStringAtByteN(it.Content, 80)
it.About = base.EllipsisStringWholeWord(it.Content, 80)
} else {
it.Content = templateBody
if it.About == "" {
Expand Down
5 changes: 2 additions & 3 deletions routers/api/actions/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/actions"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
actions_service "code.gitea.io/gitea/services/actions"

runnerv1 "code.gitea.io/actions-proto-go/runner/v1"
Expand Down Expand Up @@ -69,10 +69,9 @@ func (s *Service) Register(
labels := req.Msg.Labels

// create new runner
name, _ := util.SplitStringAtByteN(req.Msg.Name, 255)
runner := &actions_model.ActionRunner{
UUID: gouuid.New().String(),
Name: name,
Name: base.EllipsisStringWholeWord(req.Msg.Name, 255),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing an API response in the first place?
Let's leave it to the caller to handle that case.

OwnerID: runnerToken.OwnerID,
RepoID: runnerToken.RepoID,
Version: req.Msg.Version,
Expand Down
12 changes: 2 additions & 10 deletions services/feed/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import (
issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/util"
notify_service "code.gitea.io/gitea/services/notify"
)

Expand Down Expand Up @@ -109,15 +109,7 @@ func (a *actionNotifier) CreateIssueComment(ctx context.Context, doer *user_mode
IsPrivate: issue.Repo.IsPrivate,
}

truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200)
if truncatedRight != "" {
// in case the content is in a Latin family language, we remove the last broken word.
lastSpaceIdx := strings.LastIndex(truncatedContent, " ")
if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) {
truncatedContent = truncatedContent[:lastSpaceIdx] + "…"
}
}
act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent)
act.Content = fmt.Sprintf("%d|%s", issue.Index, base.EllipsisStringWholeWord(comment.Content, 200))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above


if issue.IsPull {
act.OpType = activities_model.ActionCommentPull
Expand Down