Skip to content

Wakatime.com migration #196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Omakase Ruby styling for Rails
inherit_gem: { rubocop-rails-omakase: rubocop.yml }

# this is so annoying, i'm sorry
Layout/TrailingWhitespace:
Enabled: false

# Overwrite or add rules to create your own house style
#
# # Use `[a, [b, c]]` not `[ a, [ b, c ] ]`
Expand Down
8 changes: 7 additions & 1 deletion app/controllers/users_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ def migrate_heartbeats
notice: "Heartbeats & api keys migration started"
end

def migrate_wakatimecom_heartbeats
OneTime::MigrateWakatimecomHeartbeatsJob.perform_later(@user.id)
redirect_to is_own_settings? ? my_settings_path : settings_user_path(@user),
notice: "Wakatime.com heartbeats migration started"
end

def wakatime_setup
api_key = current_user&.api_keys&.last
api_key ||= current_user.api_keys.create!(name: "Wakatime API Key")
Expand Down Expand Up @@ -132,6 +138,6 @@ def is_own_settings?
end

def user_params
params.require(:user).permit(:uses_slack_status, :hackatime_extension_text_type, :timezone, :allow_public_stats_lookup)
params.require(:user).permit(:uses_slack_status, :hackatime_extension_text_type, :timezone, :wakatime_api_key, :allow_public_stats_lookup)
end
end
233 changes: 233 additions & 0 deletions app/jobs/one_time/migrate_wakatimecom_heartbeats_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
require "fileutils"
require "open-uri"

class OneTime::MigrateWakatimecomHeartbeatsJob < ApplicationJob
queue_as :default

include GoodJob::ActiveJobExtensions::Concurrency

# only allow one instance of this job to run at a time
good_job_control_concurrency_with(
key: -> { "migrate_wakatimecom_heartbeats_job_#{arguments.first}" },
total_limit: 1,
)

def perform(user_id)
@user = User.find(user_id)
@api_key = WakatimeMirror.find_by(endpoint_url: "https://wakatime.com/api/v1", user_id: @user.id)&.encrypted_api_key
import_heartbeats
end

private

def import_heartbeats
puts "starting wakatime.com heartbeats import for user #{@user.id}"

# get dump once to check if there's already one.
# in development i've already created one and don't want to keep spamming dumps
# (it's also really slow for me, my entire coding career is in there)
dump = get_dumps

if dump["status"] != "Completed" && !wakatime_json_exists?
create_dump
while true
sleep 5
dump = get_dumps
puts "wakatime.com import for #{@user.id} is at #{dump['percent_complete']}%"
break if dump["status"] == "Completed"
end
end

output_path = download(dump)
machines = get_machines
agents = get_agents

existing_heartbeats = Heartbeat.where(user_id: @user.id)
.select(:entity, :type, :project, :branch, :language, :time)
.map { |h| generate_dedup_key(h.entity, h.type, h.project, h.branch, h.language, h.time) }
.to_set

# this could explode, let's see how it ends up.
parsed_json = JSON.parse(File.read(output_path))
parsed_json = parsed_json["days"].select { |day| day["heartbeats"].any? }
puts "found #{parsed_json.size} days with heartbeats"

heartbeats_to_insert = []
parsed_json.each do |day|
day["heartbeats"].each do |wh|
next if wh["category"] == "browsing"
agent = agents.find { |a| a["id"] == wh["user_agent_id"] }

attrs = {
user_id: @user.id,
branch: wh["branch"],
category: wh["category"],
dependencies: wh["dependencies"],
entity: wh["entity"],
is_write: wh["is_write"],
language: wh["language"],
project: wh["project"],
time: wh["time"],
type: wh["type"],
machine: machines.find { |m| m["id"] == wh["machine_name_id"] }&.dig("name"),
editor: agent&.dig("editor"),
operating_system: agent&.dig("os"),
cursorpos: wh["cursorpos"],
lineno: wh["lineno"],
lines: wh["lines"],
created_at: wh["created_at"],
source_type: 3 # wakatimecom_import
}

attrs[:fields_hash] = Heartbeat.generate_fields_hash(attrs)

dedup_key = generate_dedup_key(attrs[:entity], attrs[:type], attrs[:project], attrs[:branch], attrs[:language], attrs[:time])
if existing_heartbeats.include?(dedup_key)
next
end

heartbeats_to_insert << attrs
end
end

# deduplicate heartbeats
heartbeats_to_insert = heartbeats_to_insert.uniq { |attrs| attrs[:fields_hash] }
puts "attempting to insert #{heartbeats_to_insert.size} heartbeats..."

if heartbeats_to_insert.any?
begin
result = Heartbeat.upsert_all(
heartbeats_to_insert,
unique_by: :fields_hash
)
puts "inserted #{result.rows.size} heartbeats."
rescue => e
puts "error during insert: #{e.class} - #{e.message}"
puts e.backtrace.join("\n")
end
else
puts "no new heartbeats to insert."
end

# FileUtils.rm(output_path)
puts "finished wakatime.com heartbeats import for user #{@user.id}"
end

def get_dumps
auth_token = Base64.strict_encode64("#{@api_key}:")
response = HTTP.auth("Basic #{auth_token}")
.get("https://api.wakatime.com/api/v1/users/current/data_dumps")

if response.status.success?
dumps = JSON.parse(response.body)["data"].find { |dump| dump["type"] == "heartbeats" }
dumps || {}
else
puts "Failed to fetch Wakatime.com data dumps: #{response.status} - #{response.body}"
{}
end
end

def create_dump
auth_token = Base64.strict_encode64("#{@api_key}:")
HTTP.auth("Basic #{auth_token}")
.post("https://api.wakatime.com/api/v1/users/current/data_dumps",
json: {
type: "heartbeats",
email_when_finished: false
}
)
end

def download(dump)
output_dir = Rails.root.join("storage", "wakatime_dumps")
FileUtils.mkdir_p(output_dir)
output_path = output_dir.join("wakatime_heartbeats_#{@user.id}.json")
# check if it doesnt exist
# this is because i've been working on this during a roadtrip without unlimited data
if wakatime_json_exists?
puts "file already exists, skipping download"
return output_path
end

puts "downloading wakatime.com heartbeats dump for user #{@user.id}"
File.open(output_path, "wb") do |file|
# i don't get why with HTTP it doesn't work...
file.write(URI.open(dump["download_url"]).read)
end

puts "wakatime.com heartbeats saved to #{output_path} for user #{@user.id}"
output_path
end

def get_machines
auth_token = Base64.strict_encode64("#{@api_key}:")
all_machines = []
page = 1

loop do
response = HTTP.auth("Basic #{auth_token}")
.get("https://api.wakatime.com/api/v1/users/current/machine_names", params: { page: page })

if response.status.success?
data = JSON.parse(response.body)
machines = data["data"]
all_machines.concat(machines)

# Check if there are more pages
if data["next_page"]
sleep 1 # fricken ratelimits!!!
page += 1
else
break
end
else
puts "failed to fetch wakatime.com machines: #{response.status} - #{response.body}"
break
end
end

puts "fetched #{all_machines.size} machines total"
all_machines
end

def get_agents # basically the editors
auth_token = Base64.strict_encode64("#{@api_key}:")
all_agents = []
page = 1

loop do
response = HTTP.auth("Basic #{auth_token}")
.get("https://api.wakatime.com/api/v1/users/current/user_agents", params: { page: page })

if response.status.success?
data = JSON.parse(response.body)
agents = data["data"]
all_agents.concat(agents)

# Check if there are more pages
if data["next_page"]
sleep 1 # fricken ratelimits!!!
page += 1
else
break
end
else
puts "failed to fetch wakatime.com user agents: #{response.status} - #{response.body}"
break
end
end

puts "fetched #{all_agents.size} user agents total"
all_agents
end

def wakatime_json_exists?
output_dir = Rails.root.join("storage", "wakatime_dumps")
output_path = output_dir.join("wakatime_heartbeats_#{@user.id}.json")
File.exist?(output_path)
end

def generate_dedup_key(entity, type, project, branch, language, time)
"#{entity}-#{type}-#{project}-#{branch}-#{language}-#{time}"
end
end
3 changes: 2 additions & 1 deletion app/models/heartbeat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class Heartbeat < ApplicationRecord
enum :source_type, {
direct_entry: 0,
wakapi_import: 1,
test_entry: 2
test_entry: 2,
wakatimecom_import: 3
}

enum :ysws_program, {
Expand Down
8 changes: 6 additions & 2 deletions app/models/user.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,15 @@ def streak_days_formatted
after_save :invalidate_activity_graph_cache, if: :saved_change_to_timezone?

def data_migration_jobs
job_classes = [
"OneTime::MigrateUserFromHackatimeJob",
"OneTime::MigrateWakatimecomHeartbeatsJob"
]
GoodJob::Job.where(
"serialized_params->>'arguments' = ?", [ id ].to_json
).where(
"job_class = ?", "MigrateUserFromHackatimeJob"
).order(created_at: :desc).limit(10).all
job_class: job_classes
).order(created_at: :desc).limit(10)
end

def in_progress_migration_jobs?
Expand Down
4 changes: 3 additions & 1 deletion app/models/wakatime_mirror.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class WakatimeMirror < ApplicationRecord

def unsynced_heartbeats
# Get heartbeats since last sync, or all heartbeats if never synced
user.heartbeats.where("created_at > ?", last_synced_at || Time.at(0))
user.heartbeats
.where("created_at > ?", last_synced_at || Time.at(0))
.where.not(source_type: :wakatimecom_import)
end

def sync_heartbeats
Expand Down
5 changes: 4 additions & 1 deletion app/views/users/edit.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -316,10 +316,13 @@
<article>
<header>
<h2 id="user_migration_assistant">🚚 Migration Assistant</h2>
<p>This will migrate your heartbeats from waka.hackclub.com to this platform.</p>
<p>This will migrate your heartbeats from other services to this platform.</p>
</header>

<%= button_to "Migrate heartbeats", my_settings_migrate_heartbeats_path, method: :post, role: "button" %>
<% if WakatimeMirror.find_by(endpoint_url: "https://wakatime.com/api/v1", user_id: @user.id)&.encrypted_api_key %>
<%= button_to "Migrate wakatime.com", my_settings_migrate_wakatimecom_heartbeats_path, method: :post, role: "button" %>
<% end %>

<% if @heartbeats_migration_jobs.any? %>
<section>
Expand Down
2 changes: 2 additions & 0 deletions config/environments/development.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@
# Append comments with runtime information tags to SQL queries in logs.
config.active_record.query_log_tags_enabled = true

config.active_record.logger = nil

# Replace the default in-process and non-durable queuing backend for Active Job.
config.active_job.queue_adapter = :good_job
# config.solid_queue.connects_to = { database: { writing: :queue } }
Expand Down
1 change: 1 addition & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def self.matches?(request)
get "my/settings", to: "users#edit", as: :my_settings
patch "my/settings", to: "users#update"
post "my/settings/migrate_heartbeats", to: "users#migrate_heartbeats", as: :my_settings_migrate_heartbeats
post "my/settings/migrate_wakatimecom_heartbeats", to: "users#migrate_wakatimecom_heartbeats", as: :my_settings_migrate_wakatimecom_heartbeats

namespace :my do
resources :project_repo_mappings, param: :project_name, only: [ :edit, :update ]
Expand Down
5 changes: 5 additions & 0 deletions db/migrate/20250429114602_wakatime_api_key_user.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class WakatimeApiKeyUser < ActiveRecord::Migration[8.0]
def change
add_column :users, :wakatime_api_key, :string, null: true
end
end
Loading