Skip to content

Commit

Permalink
WIP analytics:
Browse files Browse the repository at this point in the history
* adds tldextract dependency to extract the organization name from the email domain
* adds management command askbot_compile_analytics_events
  counts users per group as well as events per user and group
  todo: time on site calculation
* askbot_create_per_email_domain_groups management command:
 - adds --silent option
 - per email domain groups are marked with used_for_analytics=True
* models {User,Group}DailySummary:
 - event counts, time_on_site receive zero default values
* model Event, Session, {User,Group}DailySummary:
 - adds compiled attribute
* model Group (AskbotGroup)
 - adds used_for_analytics attribute
* User.get_groups method:
 - adds used_for_analytics parameter
* event summary models can add events and summaries via add_event and __add__ methods
* askbot.models.user.get_organization_name_from_domain:
 - returns a sentence-cased organization name from the email domain
 using the tldextract library
* askbot.utils.console.ProgressBar:
 - adds silent parameter to suppress output
  • Loading branch information
evgenyfadeev committed Jul 15, 2024
1 parent 8d3d6e2 commit 8d7cfa1
Show file tree
Hide file tree
Showing 12 changed files with 261 additions and 57 deletions.
1 change: 1 addition & 0 deletions askbot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
'requirements': 'requirements-parser>=0.2.0',
'robots': 'django-robots==5.0',
'regex': 'regex',
'tldextract': 'tldextract==5.1.2',
'unidecode': 'unidecode',
'urllib3': 'urllib3<1.27,>=1.21.1'
}
Expand Down
84 changes: 84 additions & 0 deletions askbot/management/commands/askbot_compile_analytics_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Management commands for Askbot Analytics Events.
Compiles summaries of Askbot Analytics Events in the
per-user and per-group Summary tables.
"""
import datetime
from django.db import transaction
from django.core.management.base import BaseCommand
from askbot.utils.console import ProgressBar
from askbot.models.analytics import Event, GroupDailySummary, UserDailySummary

class Command(BaseCommand): # pylint: disable=missing-class-docstring, too-few-public-methods

def add_arguments(self, parser): # pylint: disable=missing-function-docstring
parser.add_argument('--silent', action='store_true', help='Print progress on the console')

def handle(self, *args, **options): # pylint: disable=missing-function-docstring
"""
Filters uncompiled analytics events.
Iterates over the events, and calculates per user summaries
per date.
THen iterates over the per-user summaries and combines them
into the per-group summaries.
"""
events = Event.objects.filter(compiled=False).order_by('timestamp') # pylint: disable=no-member
events_count = events.count()
message = 'Compiling Events:'
silent = options['silent']
for event in ProgressBar(events.iterator(), events_count, message=message, silent=silent):
self.compile_event(event)

daily_summaries = UserDailySummary.objects.filter(compiled=False).order_by('date') # pylint: disable=no-member
message = 'Compiling User Daily Summaries:'
summaries_count = daily_summaries.count()
iterator = daily_summaries.iterator()
for daily_summary in ProgressBar(iterator, summaries_count, message=message, silent=silent):
self.compile_user_daily_summary(daily_summary)

# todo:
# update the time on site (how?)
# update the total number of users per group
# maybe: record number of active users per group within period
message = 'Count users per group:'
group_daily_summaries = GroupDailySummary.objects.filter(compiled=False) # pylint: disable=no-member
count = group_daily_summaries.count()
iterator = group_daily_summaries.iterator() # pylint: disable=no-member
for group_summary in ProgressBar(iterator, count, message=message, silent=silent):
self.update_users_count_per_group(group_summary)


@transaction.atomic
def update_users_count_per_group(self, group_summary):
"""Counts the number of users in the group at the end of the day"""
join_date_cutoff = group_summary.date + datetime.timedelta(days=1)
users = group_summary.group.user_set.filter(date_joined__lte=join_date_cutoff) # pylint: disable=no-member
group_summary.num_users = users.count()
group_summary.compiled = True
group_summary.save()


@transaction.atomic
def compile_event(self, event):
"""Adds up event stats into the user daily summary"""
date = event.timestamp.date()
user = event.session.user
user_summary, _ = UserDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member
user=user)
user_summary.add_event(event)
user_summary.save()
Event.objects.filter(id=event.id).update(compiled=True) # pylint: disable=no-member


@transaction.atomic
def compile_user_daily_summary(self, user_daily_summary):
groups = user_daily_summary.user.get_groups(used_for_analytics=True)
for group in groups:
date = user_daily_summary.date
group_summary, _ = GroupDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member
group=group)
group_summary += user_daily_summary
group_summary.save()

UserDailySummary.objects.filter(id=user_daily_summary.id).update(compiled=True) # pylint: disable=no-member

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
class Command(BaseCommand): # pylint: disable=missing-docstring
help = 'Create groups for each email domain in the database.'

def add_arguments(self, parser): # pylint: disable=missing-docstring
parser.add_argument('--silent', action='store_true', help='Do not print progress messages.')

def handle(self, *args, **options): # pylint: disable=missing-docstring, unused-argument
"""Obtains a list of unique email domains names.
Creates a group for each domain name, if such group does not exist.
Expand All @@ -20,19 +23,27 @@ def handle(self, *args, **options): # pylint: disable=missing-docstring, unused-
created_groups = []
unchanged_groups = []
done_lowercased_domains = []
for domain in ProgressBar(domains.iterator(), count, message):
silent = options['silent']
for domain in ProgressBar(domains.iterator(), count, message=message, silent=silent):

domain_name = domain['domain']
if domain_name.lower in done_lowercased_domains:
domain_name = domain['domain'] or 'Unknown Organization'
if domain_name.lower() in done_lowercased_domains:
continue
else:
done_lowercased_domains.append(domain_name.lower())

done_lowercased_domains.append(domain_name.lower())

organization_name = get_organization_name_from_domain(domain_name)
group, created = Group.objects.get_or_create(
name=organization_name,
visibility=askbot_settings.PER_EMAIL_DOMAIN_GROUP_DEFAULT_VISIBILITY
visibility=askbot_settings.PER_EMAIL_DOMAIN_GROUP_DEFAULT_VISIBILITY,
used_for_analytics=True
)

if not created:
if not group.used_for_analytics:
group.used_for_analytics = True
group.save()

users = User.objects.filter(email__endswith='@' + domain_name)
for user in users.iterator():
user.join_group(group, force=True)
Expand All @@ -41,4 +52,3 @@ def handle(self, *args, **options): # pylint: disable=missing-docstring, unused-
created_groups.append(group)
else:
unchanged_groups.append(group)

28 changes: 14 additions & 14 deletions askbot/migrations/0028_userdailysummary_groupdailysummary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Generated by Django 4.2.4 on 2024-06-24 21:15

import datetime
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
Expand All @@ -17,12 +17,12 @@ class Migration(migrations.Migration):
name='UserDailySummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField()),
('num_answers', models.PositiveIntegerField()),
('num_upvotes', models.PositiveIntegerField()),
('num_downvotes', models.PositiveIntegerField()),
('question_views', models.PositiveIntegerField()),
('time_on_site', models.DurationField()),
('num_questions', models.PositiveIntegerField(default=0)),
('num_answers', models.PositiveIntegerField(default=0)),
('num_upvotes', models.PositiveIntegerField(default=0)),
('num_downvotes', models.PositiveIntegerField(default=0)),
('question_views', models.PositiveIntegerField(default=0)),
('time_on_site', models.DurationField(default=datetime.timedelta(0))),
('date', models.DateField(db_index=True)),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
Expand All @@ -34,14 +34,14 @@ class Migration(migrations.Migration):
name='GroupDailySummary',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('num_questions', models.PositiveIntegerField()),
('num_answers', models.PositiveIntegerField()),
('num_upvotes', models.PositiveIntegerField()),
('num_downvotes', models.PositiveIntegerField()),
('question_views', models.PositiveIntegerField()),
('time_on_site', models.DurationField()),
('num_questions', models.PositiveIntegerField(default=0)),
('num_answers', models.PositiveIntegerField(default=0)),
('num_upvotes', models.PositiveIntegerField(default=0)),
('num_downvotes', models.PositiveIntegerField(default=0)),
('question_views', models.PositiveIntegerField(default=0)),
('time_on_site', models.DurationField(default=datetime.timedelta(0))),
('date', models.DateField(db_index=True)),
('num_users', models.PositiveIntegerField()),
('num_users', models.PositiveIntegerField(default=0)),
('group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='askbot.group')),
],
options={
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.4 on 2024-07-02 00:10

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('askbot', '0029_group_visibility'),
]

operations = [
migrations.AddField(
model_name='event',
name='compiled',
field=models.BooleanField(default=False, help_text='True if the event is compiled into a summary'),
),
migrations.AddField(
model_name='group',
name='used_for_analytics',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='groupdailysummary',
name='compiled',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='userdailysummary',
name='compiled',
field=models.BooleanField(default=False),
),
]
5 changes: 3 additions & 2 deletions askbot/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2932,10 +2932,11 @@ def get_profile_link(self, text=None):

return mark_safe(profile_link)

def user_get_groups(self, private=False):
def user_get_groups(self, private=False, used_for_analytics=False):
"""returns a query set of groups to which user belongs"""
#todo: maybe cache this query
return Group.objects.get_for_user(self, private=private)
return Group.objects.get_for_user(self, private=private,
used_for_analytics=used_for_analytics)

def user_join_default_groups(self):
"""adds user to "global" and "personal" groups"""
Expand Down
102 changes: 80 additions & 22 deletions askbot/models/analytics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Models for the Analytics feature"""
import datetime
from django.db import models
from django.db.models import Q
from django.db.models import Value
Expand Down Expand Up @@ -47,21 +48,36 @@
#TYPE_ACTIVITY_QUESTION_VIEWED = 52
#TYPE_ACTIVITY_ANSWER_VIEWED = 53

EVENT_TYPE_USER_REGISTERED = 1
EVENT_TYPE_LOGGED_IN = 2
EVENT_TYPE_LOGGED_OUT = 3
EVENT_TYPE_QUESTION_VIEWED = 4
EVENT_TYPE_ANSWER_VIEWED = 5
EVENT_TYPE_UPVOTED = 6
EVENT_TYPE_DOWNVOTED = 7
EVENT_TYPE_VOTE_CANCELED = 8
EVENT_TYPE_ASKED = 9
EVENT_TYPE_ANSWERED = 10
EVENT_TYPE_QUESTION_COMMENTED = 11
EVENT_TYPE_ANSWER_COMMENTED = 12
EVENT_TYPE_QUESTION_RETAGGED = 13
EVENT_TYPE_SEARCHED = 14

EVENT_TYPES = (
(1, _('registered')), # Activity.activity_type == 51
(2, _('logged in')),
(3, _('logged out')),
(4, _('question viewed')), # Activity.activity_type == 52
(5, _('answer viewed')), # Activity.activity_type == 53
(6, _('upvoted')), # Activity.activity_type == 9
(7, _('downvoted')), # Activity.activity_type == 10
(8, _('canceled vote')), # Activity.activity_type == 11
(9, _('asked')), # Activity.activity_type == 1
(10, _('answered')), # Activity.activity_type == 2
(11, _('commented question')), # Activity.activity_type == 3
(12, _('commented answer')), # Activity.activity_type == 4
(13, _('retagged question')), # Activity.activity_type == 15
(14, _('searched')),
(EVENT_TYPE_USER_REGISTERED, _('registered')), # Activity.activity_type == 51
(EVENT_TYPE_LOGGED_IN, _('logged in')),
(EVENT_TYPE_LOGGED_OUT, _('logged out')),
(EVENT_TYPE_QUESTION_VIEWED, _('question viewed')), # Activity.activity_type == 52
(EVENT_TYPE_ANSWER_VIEWED, _('answer viewed')), # Activity.activity_type == 53
(EVENT_TYPE_UPVOTED, _('upvoted')), # Activity.activity_type == 9
(EVENT_TYPE_DOWNVOTED, _('downvoted')), # Activity.activity_type == 10
(EVENT_TYPE_VOTE_CANCELED, _('canceled vote')), # Activity.activity_type == 11
(EVENT_TYPE_ASKED, _('asked')), # Activity.activity_type == 1
(EVENT_TYPE_ANSWERED, _('answered')), # Activity.activity_type == 2
(EVENT_TYPE_QUESTION_COMMENTED, _('commented question')), # Activity.activity_type == 3
(EVENT_TYPE_ANSWER_COMMENTED, _('commented answer')), # Activity.activity_type == 4
(EVENT_TYPE_QUESTION_RETAGGED, _('retagged question')), # Activity.activity_type == 15
(EVENT_TYPE_SEARCHED, _('searched')),
)

# Dimension and Metric would make a generic implementation of the analytics feature
Expand Down Expand Up @@ -107,7 +123,7 @@ def get_organizations_count():

def get_unique_user_email_domains():
"""Returns a list of unique email domain names"""
return list(get_user_organization_domains_qs().values_list('domain', flat=True))
return list(get_unique_user_email_domains_qs().values_list('domain', flat=True))


class Session(models.Model):
Expand All @@ -133,6 +149,8 @@ class Event(models.Model):
content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
object_id = models.PositiveIntegerField(db_index=True)
content_object = GenericForeignKey('content_type', 'object_id')
compiled = models.BooleanField(default=False,
help_text="True if the event is compiled into a summary")

def __str__(self):
timestamp = self.timestamp.isoformat() # pylint: disable=no-member
Expand All @@ -144,24 +162,60 @@ class BaseSummary(models.Model):
An abstract model for per-interval summaries.
An interval name is defined in the subclass.
"""
num_questions = models.PositiveIntegerField()
num_answers = models.PositiveIntegerField()
num_upvotes = models.PositiveIntegerField()
num_downvotes = models.PositiveIntegerField()
question_views = models.PositiveIntegerField()
time_on_site = models.DurationField()
num_questions = models.PositiveIntegerField(default=0)
num_answers = models.PositiveIntegerField(default=0)
num_upvotes = models.PositiveIntegerField(default=0)
num_downvotes = models.PositiveIntegerField(default=0)
question_views = models.PositiveIntegerField(default=0)
time_on_site = models.DurationField(default=datetime.timedelta(0))
compiled = models.BooleanField(default=False)

class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring
abstract = True


def add_event(self, event):
"""Increments the attribute appropriate for the event type"""
if event.event_type == EVENT_TYPE_ASKED:
self.num_questions += 1
elif event.event_type == EVENT_TYPE_ANSWERED:
self.num_answers += 1
elif event.event_type == EVENT_TYPE_UPVOTED:
self.num_upvotes += 1
elif event.event_type == EVENT_TYPE_DOWNVOTED:
self.num_downvotes += 1
elif event.event_type == EVENT_TYPE_QUESTION_VIEWED:
self.question_views += 1


def __add__(self, other):
"""Adds the attributes of two summaries"""
self.num_questions += other.num_questions
self.num_answers += other.num_answers
self.num_upvotes += other.num_upvotes
self.num_downvotes += other.num_downvotes
self.question_views += other.question_views
self.time_on_site += other.time_on_site
return self


class DailySummary(BaseSummary):
"""An abstract class for daily summaries."""
date = models.DateField(db_index=True)

class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring
abstract = True

def add_event(self, event):
"""Increments the attribute appropriate for the event type.
In addition adds up the time on site for all matching sessions.
"""
super().add_event(event)
# todo: get all sessions intersecting the date
# for each session, calculate the intersection with the date
# add up those intervals
# assumes that sessions do not overlap


class UserDailySummary(DailySummary):
"""User summary for each day with activity."""
Expand All @@ -171,4 +225,8 @@ class UserDailySummary(DailySummary):
class GroupDailySummary(DailySummary):
"""Group summary for each day with activity."""
group = models.ForeignKey(AskbotGroup, on_delete=models.CASCADE)
num_users = models.PositiveIntegerField()
num_users = models.PositiveIntegerField(default=0)


def add_event(self, event):
raise RuntimeError("Cannot add events to GroupDailySummary")
Loading

0 comments on commit 8d7cfa1

Please sign in to comment.