diff --git a/channels.go b/channels.go index 9aebf27f..3a0add17 100644 --- a/channels.go +++ b/channels.go @@ -16,6 +16,7 @@ import ( "github.com/slack-go/slack" "github.com/rusq/slackdump/v2/internal/network" + "github.com/rusq/slackdump/v2/internal/structures" ) // Channels keeps slice of channels @@ -133,7 +134,7 @@ func (cs Channels) ToText(w io.Writer, sd *SlackDumper) (err error) { func (sd *SlackDumper) channelName(channel *slack.Channel) (who string) { switch { case channel.IsIM: - who = "@" + sd.username(channel.User) + who = "@" + structures.ResolveUsername(channel.User, sd.UserIndex) case channel.IsMpIM: who = strings.Replace(channel.Purpose.Value, " messaging with", "", -1) case channel.IsPrivate: @@ -143,19 +144,3 @@ func (sd *SlackDumper) channelName(channel *slack.Channel) (who string) { } return who } - -// username tries to resolve the username by ID. If the internal users map is not -// initialised, it will return the ID, otherwise, if the user is not found in -// cache, it will assume that the user is external, and return the ID with -// "external" prefix. -func (sd *SlackDumper) username(id string) string { - if sd.UserIndex == nil { - // no user cache, use the IDs. - return id - } - user, ok := sd.UserIndex[id] - if !ok { - return ":" + id - } - return user.Name -} diff --git a/internal/export/export.go b/internal/export/export.go index 1c49bbd8..1bb6ef35 100644 --- a/internal/export/export.go +++ b/internal/export/export.go @@ -119,9 +119,6 @@ func (se *Export) exportConversation(ctx context.Context, ch slack.Channel, user return nil } -// refRoot is the parent address for the topmost message chunk. -const refRoot = -1 - // downloadFn returns the process function that should be passed to // DumpMessagesRaw that will handle the download of the files. If the // downloader is not started, i.e. if file download is disabled, it will @@ -135,14 +132,14 @@ func (se *Export) downloadFn(dl *downloader.Client, channelName string) func(msg dir := filepath.Join(se.basedir(channelName), dirAttach) return func(msg []slackdump.Message, channelID string) (slackdump.ProcessResult, error) { total := 0 - if err := extractFiles(msg, refRoot, func(file slack.File, addr fileAddr) error { + if err := Extract(msg, Root, func(file slack.File, addr Addr) error { filename, err := dl.DownloadFile(dir, file) if err != nil { return err } dlog.Debugf("submitted for download: %s", file.Name) total++ - return updateFileURL(msg, addr, path.Join(dirAttach, path.Base(filename))) + return UpdateURLs(msg, addr, path.Join(dirAttach, path.Base(filename))) }); err != nil { if errors.Is(err, downloader.ErrNotStarted) { return slackdump.ProcessResult{Entity: entFiles, Count: 0}, nil @@ -154,64 +151,6 @@ func (se *Export) downloadFn(dl *downloader.Client, channelName string) func(msg } } -// updateFileURL updates the URL link for the files in message chunk msgs. Addr contains -// an address of the message to update the URL for, and filename is the path to the file -// on the local filesystem. It will return an error if the address references out of range. -func updateFileURL(msgs []slackdump.Message, addr fileAddr, filename string) error { - if addr.idxParMsg == refRoot { - if addr.idxMsg < 0 || len(msgs) <= addr.idxMsg { - return errors.New("invalid message reference") - } - if addr.idxFile < 0 || len(msgs[addr.idxMsg].Files) < addr.idxFile { - return errors.New("invalid file reference") - } - msgs[addr.idxMsg].Files[addr.idxFile].URLPrivateDownload = filename - msgs[addr.idxMsg].Files[addr.idxFile].URLPrivate = filename - } else { - return updateFileURL( - msgs[addr.idxParMsg].ThreadReplies, - fileAddr{idxMsg: addr.idxMsg, idxParMsg: refRoot, idxFile: addr.idxFile}, - filename, - ) - } - return nil -} - -// fileAddr is the address of the file in the messages slice. -// idxMsg - index of the message or message reply in the provided slice -// idxParMsg - index of the parent message. If it is not equal to refRoot, then -// it is assumed that it is the reference to a message reply: -// i.e.: msg[idxParMsg].ThreadReplies[idxMsg]. -// idxFile - index of the file ine the message's file slice. -type fileAddr struct { - idxMsg int // index of the message in the messages slice - idxParMsg int // index of the parent message, or refRoot if it's the address of the top level message - idxFile int // index of the file in the file slice. -} - -// extractFiles scans the message slice msgs, and calls fn for each file it -// finds. fn is called with the copy of the file and that file's address in the -// provided message slice. idxParentMsg is the index of the parent message (for -// message replies slice), or refRoot if it's the topmost messages slice (see -// invocation in downloadFn). -func extractFiles(msgs []slackdump.Message, idxParentMsg int, fn func(file slack.File, addr fileAddr) error) error { - for mi := range msgs { - if len(msgs[mi].Files) > 0 { - for fileIdx, file := range msgs[mi].Files { - if err := fn(file, fileAddr{idxMsg: mi, idxParMsg: idxParentMsg, idxFile: fileIdx}); err != nil { - return err - } - } - } - if len(msgs[mi].ThreadReplies) > 0 { - if err := extractFiles(msgs[mi].ThreadReplies, mi, fn); err != nil { - return err - } - } - } - return nil -} - // validName returns the channel or user name. Following the naming convention // described by @niklasdahlheimer in this post (thanks to @Neznakomec for // discovering it): diff --git a/internal/export/files.go b/internal/export/files.go new file mode 100644 index 00000000..ddbcfb45 --- /dev/null +++ b/internal/export/files.go @@ -0,0 +1,82 @@ +// Package files contains some additional file logic. +package export + +import ( + "errors" + + "github.com/rusq/slackdump/v2" + "github.com/slack-go/slack" +) + +// Root is the parent address for the topmost message chunk. +const Root = -1 + +// Addr is the address of the file in the messages slice. +// idxMsg - index of the message or message reply in the provided slice +// idxParMsg - index of the parent message. If it is not equal to Root, +// then it's the address of the message: +// +// msg[idxMsg].File[idxFile] +// +// if it is not equal to Root, then it is assumed that it is +// a reference to a message reply: +// +// msg[idxParMsg].ThreadReplies[idxMsg].File[idxFile] +// +// idxFile - index of the file in the message's file slice. +// +type Addr struct { + idxMsg int // index of the message in the messages slice + idxParMsg int // index of the parent message, or refRoot if it's the address of the top level message + idxFile int // index of the file in the file slice. +} + +// UpdateURLs updates the URL link for the files in message chunk msgs. Addr +// contains an address of the message and the file within the message slice to +// update the URL for, and filename is the path to the file on the local +// filesystem. It will return an error if the address references out of range. +func UpdateURLs(msgs []slackdump.Message, addr Addr, filename string) error { + if addr.idxParMsg != Root { + return UpdateURLs( + msgs[addr.idxParMsg].ThreadReplies, + Addr{idxMsg: addr.idxMsg, idxParMsg: Root, idxFile: addr.idxFile}, + filename, + ) + } + + if addr.idxMsg < 0 || len(msgs) <= addr.idxMsg { + return errors.New("invalid message reference") + } + if addr.idxFile < 0 || len(msgs[addr.idxMsg].Files) < addr.idxFile { + return errors.New("invalid file reference") + } + msgs[addr.idxMsg].Files[addr.idxFile].URLPrivateDownload = filename + msgs[addr.idxMsg].Files[addr.idxFile].URLPrivate = filename + return nil +} + +// Extract scans the message slice msgs, and calls fn for each file it +// finds. fn is called with the copy of the file and that file's address in the +// provided message slice. idxParentMsg is the index of the parent message (for +// message replies slice), or refRoot if it's the topmost messages slice (see +// invocation in downloadFn). +func Extract(msgs []slackdump.Message, idxParentMsg int, fn func(file slack.File, addr Addr) error) error { + if fn == nil { + return errors.New("extractFiles: internal error: no callback function") + } + for mi := range msgs { + if len(msgs[mi].Files) > 0 { + for fileIdx, file := range msgs[mi].Files { + if err := fn(file, Addr{idxMsg: mi, idxParMsg: idxParentMsg, idxFile: fileIdx}); err != nil { + return err + } + } + } + if len(msgs[mi].ThreadReplies) > 0 { + if err := Extract(msgs[mi].ThreadReplies, mi, fn); err != nil { + return err + } + } + } + return nil +} diff --git a/internal/structures/structures.go b/internal/structures/structures.go new file mode 100644 index 00000000..5d6a0b91 --- /dev/null +++ b/internal/structures/structures.go @@ -0,0 +1,2 @@ +// Package structures provides functions to parse Slack data types. +package structures diff --git a/internal/structures/user.go b/internal/structures/user.go new file mode 100644 index 00000000..50c5da34 --- /dev/null +++ b/internal/structures/user.go @@ -0,0 +1,37 @@ +package structures + +import "github.com/slack-go/slack" + +type UserIndex map[string]*slack.User + +// ResolveUsername tries to resolve the ResolveUsername by ID. If the internal users map is not +// initialised, it will return the ID, otherwise, if the user is not found in +// cache, it will assume that the user is external, and return the ID with +// "external" prefix. +func ResolveUsername(id string, userIdx UserIndex) string { + if userIdx == nil { + // no user cache, use the IDs. + return id + } + user, ok := userIdx[id] + if !ok { + return ":" + id + } + return user.Name +} + +// SenderName returns username for the message +func SenderName(msg *slack.Message, userIdx UserIndex) string { + var userid string + if msg.Comment != nil { + userid = msg.Comment.User + } else { + userid = msg.User + } + + if userid != "" { + return ResolveUsername(userid, userIdx) + } + + return "" +} diff --git a/messages.go b/messages.go index 4d7cde19..fbf528ef 100644 --- a/messages.go +++ b/messages.go @@ -32,11 +32,6 @@ const ( minMsgTimeApart = 2 * time.Minute ) -// Channel keeps the slice of messages. -// -// Deprecated: use Conversation instead. -type Channel = Conversation - type ProcessResult struct { Entity string Count int @@ -110,11 +105,11 @@ func (c Conversation) IsThread() bool { } // ToText outputs Messages m to io.Writer w in text format. -func (c Conversation) ToText(w io.Writer, sd *SlackDumper) (err error) { +func (c Conversation) ToText(w io.Writer, userIdx structures.UserIndex) (err error) { buf := bufio.NewWriter(w) defer buf.Flush() - return sd.generateText(w, c.Messages, "") + return generateText(w, c.Messages, "", userIdx) } // DumpAllURL dumps messages from the slack URL, it supports conversations and @@ -225,10 +220,6 @@ func (sd *SlackDumper) dumpMessages(ctx context.Context, channelID string, oldes } chunk := sd.convertMsgs(resp.Messages) - // threads, err := sd.populateThreads(ctx, threadLimiter, chunk, channelID, sd.dumpThread) - // if err != nil { - // return nil, err - // } results, err := runProcessFuncs(chunk, channelID, pfns...) if err != nil { @@ -292,7 +283,7 @@ func (sd *SlackDumper) convHistoryParams(channelID, cursor string, oldest, lates return params } -func (sd *SlackDumper) generateText(w io.Writer, m []Message, prefix string) error { +func generateText(w io.Writer, m []Message, prefix string, userIdx structures.UserIndex) error { var ( prevMsg Message prevTime time.Time @@ -307,13 +298,13 @@ func (sd *SlackDumper) generateText(w io.Writer, m []Message, prefix string) err fmt.Fprintf(w, prefix+"%s\n", message.Text) } else { fmt.Fprintf(w, prefix+"\n"+prefix+"> %s [%s] @ %s:\n%s\n", - sd.SenderName(&message), message.User, + structures.SenderName(&message.Message, userIdx), message.User, t.Format(textTimeFmt), prefix+html.UnescapeString(message.Text), ) } if len(message.ThreadReplies) > 0 { - if err := sd.generateText(w, message.ThreadReplies, "| "); err != nil { + if err := generateText(w, message.ThreadReplies, "| ", userIdx); err != nil { return err } } @@ -323,22 +314,6 @@ func (sd *SlackDumper) generateText(w io.Writer, m []Message, prefix string) err return nil } -// SenderName returns username for the message -func (sd *SlackDumper) SenderName(msg *Message) string { - var userid string - if msg.Comment != nil { - userid = msg.Comment.User - } else { - userid = msg.User - } - - if userid != "" { - return sd.username(userid) - } - - return "" -} - func sortMessages(msgs []Message) { sort.Slice(msgs, func(i, j int) bool { return msgs[i].Timestamp < msgs[j].Timestamp diff --git a/messages_test.go b/messages_test.go index 56fe8d99..fc607a82 100644 --- a/messages_test.go +++ b/messages_test.go @@ -15,6 +15,7 @@ import ( "github.com/rusq/slackdump/v2/internal/fixtures" "github.com/rusq/slackdump/v2/internal/network" + "github.com/rusq/slackdump/v2/internal/structures" ) var ( @@ -308,48 +309,34 @@ func TestSlackDumper_DumpMessages(t *testing.T) { } func TestSlackDumper_generateText(t *testing.T) { - type fields struct { - client clienter - Users Users - UserIndex map[string]*slack.User - options Options - } type args struct { - m []Message - prefix string + m []Message + prefix string + userIdx structures.UserIndex } tests := []struct { name string - fields fields args args wantW string wantErr bool }{ { "two messages from the same person, not very far apart, with html escaped char", - fields{}, - args{[]Message{testMsg1, testMsg2}, ""}, + args{[]Message{testMsg1, testMsg2}, "", nil}, "\n> U10H7D9RR [U10H7D9RR] @ 03/12/2021 02:15:51 Z:\nTest message < > < >\nmessage 2\n", false, }, { "two messages from the same person, far apart", - fields{}, - args{[]Message{testMsg1, testMsg4t}, ""}, + args{[]Message{testMsg1, testMsg4t}, "", nil}, "\n> U10H7D9RR [U10H7D9RR] @ 03/12/2021 02:15:51 Z:\nTest message < > < >\n\n> UP58RAHCJ [UP58RAHCJ] @ 03/12/2021 09:47:34 Z:\nmessage 4\n| \n| > U01HPAR0YFN [U01HPAR0YFN] @ 03/12/2021 18:05:26 Z:\n| blah blah, reply 1\n", false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sd := &SlackDumper{ - client: tt.fields.client, - Users: tt.fields.Users, - UserIndex: tt.fields.UserIndex, - options: tt.fields.options, - } w := &bytes.Buffer{} - if err := sd.generateText(w, tt.args.m, tt.args.prefix); (err != nil) != tt.wantErr { + if err := generateText(w, tt.args.m, tt.args.prefix, tt.args.userIdx); (err != nil) != tt.wantErr { t.Errorf("SlackDumper.generateText() error = %v, wantErr %v", err, tt.wantErr) return } diff --git a/slackdump.go b/slackdump.go index 970e8a1e..78b4bbde 100644 --- a/slackdump.go +++ b/slackdump.go @@ -160,10 +160,10 @@ func checkCacheFile(filename string, maxAge time.Duration) error { return err } - return validateFileStats(fi, maxAge) + return validateCache(fi, maxAge) } -func validateFileStats(fi os.FileInfo, maxAge time.Duration) error { +func validateCache(fi os.FileInfo, maxAge time.Duration) error { if fi.IsDir() { return errors.New("cache file is a directory") } diff --git a/slackdump_test.go b/slackdump_test.go index 58cd8cc0..7372c9a8 100644 --- a/slackdump_test.go +++ b/slackdump_test.go @@ -15,7 +15,7 @@ import ( "github.com/rusq/slackdump/v2/internal/network" ) -func Test_validateFileStats(t *testing.T) { +func Test_validateCache(t *testing.T) { type args struct { maxAge time.Duration } @@ -80,7 +80,7 @@ func Test_validateFileStats(t *testing.T) { tt.expectFn(mfi) - if err := validateFileStats(mfi, tt.args.maxAge); (err != nil) != tt.wantErr { + if err := validateCache(mfi, tt.args.maxAge); (err != nil) != tt.wantErr { t.Errorf("validateFileStats() error = %v, wantErr %v", err, tt.wantErr) } })