Skip to content

Commit dbbb7ef

Browse files
committed
Materials from the recording.
1 parent 1f17b20 commit dbbb7ef

13 files changed

+295
-0
lines changed

.idea/inspectionProfiles/Project_Default.xml

+18
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

data/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# jetbrains-webcast-build-with-mongodb
2+
Code and handouts for my JetBrains webcast recorded January 30, 2018

data/pypi_db.zip

45.7 MB
Binary file not shown.
Binary file not shown.

src/data/downloads.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import datetime
2+
3+
import mongoengine
4+
5+
6+
class Download(mongoengine.Document):
7+
created = mongoengine.DateTimeField(default=datetime.datetime.now)
8+
package_id = mongoengine.ObjectIdField()
9+
release_id = mongoengine.ObjectIdField()
10+
client_ip = mongoengine.StringField()
11+
client_app = mongoengine.StringField()
12+
python_version = mongoengine.StringField()
13+
14+
meta = {
15+
'db_alias': 'core',
16+
'collection': 'downloads',
17+
'indexes': [
18+
'created',
19+
'package_id',
20+
'release_id',
21+
'python_version',
22+
],
23+
'ordering': ['-created']
24+
}

src/data/mongo_setup.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import mongoengine
2+
3+
4+
def global_init(db_name: str):
5+
mongoengine.register_connection(alias='core', name=db_name)

src/data/packages.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import datetime
2+
3+
import mongoengine as mongoengine
4+
5+
6+
class Package(mongoengine.Document):
7+
created = mongoengine.DateTimeField(default=datetime.datetime.now)
8+
name = mongoengine.StringField(required=True)
9+
maintainers = mongoengine.ListField(mongoengine.ObjectIdField())
10+
total_downloads = mongoengine.LongField()
11+
12+
meta = {
13+
'db_alias': 'core',
14+
'collection': 'packages',
15+
'indexes': [
16+
'created',
17+
'name',
18+
'maintainers',
19+
'total_downloads',
20+
],
21+
'ordering': ['-created']
22+
}

src/data/release_health.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import mongoengine
2+
3+
4+
class ReleaseHealth(mongoengine.EmbeddedDocument):
5+
ci = mongoengine.BooleanField()
6+
coverage = mongoengine.FloatField()
7+
health_index = mongoengine.FloatField()

src/data/release_history.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import datetime
2+
3+
import mongoengine
4+
5+
from data.release_health import ReleaseHealth
6+
7+
8+
class ReleaseHistory(mongoengine.Document):
9+
created = mongoengine.DateTimeField(default=datetime.datetime.now)
10+
package_id = mongoengine.ObjectIdField(required=True)
11+
version_number = mongoengine.StringField(required=True)
12+
description = mongoengine.StringField()
13+
14+
topics = mongoengine.ListField(mongoengine.StringField())
15+
programming_languages = mongoengine.ListField(mongoengine.StringField())
16+
dependencies = mongoengine.ListField(mongoengine.StringField())
17+
18+
health = mongoengine.EmbeddedDocumentField(ReleaseHealth)
19+
20+
meta = {
21+
'db_alias': 'core',
22+
'collection': 'releases',
23+
'indexes': [
24+
'package_id',
25+
'version_number',
26+
'topics',
27+
'programming_languages',
28+
'health.ci',
29+
],
30+
'ordering': ['-version_number']
31+
}

src/data/users.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import datetime
2+
3+
import mongoengine
4+
5+
6+
class User(mongoengine.Document):
7+
created = mongoengine.DateTimeField(default=datetime.datetime.now)
8+
name = mongoengine.StringField(required=True)
9+
email = mongoengine.StringField(required=True)
10+
11+
meta = {
12+
'db_alias': 'core',
13+
'collection': 'users',
14+
'indexes': [
15+
'created',
16+
'name',
17+
'email',
18+
],
19+
'ordering': ['name']
20+
}

src/program.py

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# noinspection PyPackageRequirements
2+
import time
3+
from data.mongo_setup import global_init
4+
from data.packages import Package
5+
from services.package_service import PackageService
6+
7+
8+
def main():
9+
global_init('pypi')
10+
print_header()
11+
input_loop()
12+
13+
14+
def input_loop():
15+
while True:
16+
17+
print("What do you want to do?")
18+
val = input("[q]uery packages, view [d]ownloads, or e[x]it? ").lower().strip()
19+
if val == 'q':
20+
query_packages()
21+
elif val == 'd':
22+
view_downloads()
23+
elif val == 'x':
24+
print("Bye")
25+
break
26+
else:
27+
print(f"Don't know what to do with '{val}'")
28+
print()
29+
print()
30+
31+
32+
def query_packages():
33+
name = input("What package would you like details for? [hint: PackageNNNN]? ")
34+
35+
t0 = time.time()
36+
37+
package = PackageService.find_package_by_name(name)
38+
if not package:
39+
print(f"Sorry, no package with name '{name}'.")
40+
return
41+
42+
r = PackageService.latest_release(package)
43+
if not r:
44+
print(f"Sorry the package {package.name} has no releases.")
45+
return
46+
47+
maintainers = PackageService.find_maintainers(package)
48+
t1 = time.time()
49+
50+
print("PACKAGE: " + package.name)
51+
print(f" Status: "
52+
f"[CI: {'passing' if r.health.ci else 'failing'}] "
53+
f"[Health: {r.health.health_index:.1f}] "
54+
f"[Coverage: {r.health.coverage:.1f}]")
55+
print()
56+
print(f"Current version: {r.version_number}")
57+
print()
58+
print(f"Description: {r.description[:100]} ...")
59+
print()
60+
print(f"Maintainers:")
61+
for m in maintainers:
62+
print(f"* {m.name} ({m.email})")
63+
print()
64+
print()
65+
print(f"Topics:")
66+
for t in r.topics:
67+
print("* " + t)
68+
print()
69+
print(f"Supported languages:")
70+
for lang in r.programming_languages:
71+
print("* " + lang)
72+
print()
73+
print(f"Dependencies:")
74+
for d in r.dependencies:
75+
print("* " + d)
76+
print()
77+
print(f"Elapsed time: {(t1-t0)*1000:.3f} ms.")
78+
79+
80+
def view_downloads():
81+
t0 = time.time()
82+
83+
tops = PackageService.popular_packages(limit=10)
84+
85+
dt = time.time() - t0
86+
print("Top 10 packages by downloads")
87+
print()
88+
for idx, p in enumerate(tops):
89+
print(f"{idx+1}. {p.total_downloads:,} {p.name}")
90+
print()
91+
print(f"Elapsed time: {dt*1000:,.03f} ms.")
92+
93+
94+
def print_header():
95+
print('-------------------------------------')
96+
print(' PYPI DATA EXPLORER')
97+
print('-------------------------------------')
98+
print()
99+
print("Current stats: ")
100+
# TODO: Show number of items
101+
print(f"Packages: {PackageService.package_count():,}")
102+
print(f"Releases: {PackageService.release_count():,}")
103+
print(f"Users: {PackageService.user_count():,}")
104+
print(f"Downloads: {PackageService.download_count():,}")
105+
print()
106+
107+
108+
if __name__ == '__main__':
109+
main()

src/services/package_service.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from typing import Optional, List
2+
3+
from data.downloads import Download
4+
from data.packages import Package
5+
from data.release_history import ReleaseHistory
6+
from data.users import User
7+
8+
9+
class PackageService:
10+
@classmethod
11+
def package_count(cls):
12+
return Package.objects().count()
13+
14+
@classmethod
15+
def release_count(cls):
16+
return ReleaseHistory.objects().count()
17+
18+
@classmethod
19+
def user_count(cls):
20+
return User.objects().count()
21+
22+
@classmethod
23+
def download_count(cls):
24+
return Download.objects().count()
25+
26+
@classmethod
27+
def find_package_by_name(cls, name):
28+
package = Package.objects(name=name).first()
29+
return package
30+
31+
@classmethod
32+
def latest_release(cls, package: Package) -> Optional[ReleaseHistory]:
33+
release = ReleaseHistory \
34+
.objects(package_id=package.id) \
35+
.order_by('-created') \
36+
.first()
37+
38+
return release
39+
40+
@classmethod
41+
def find_maintainers(cls, package: Package) -> List[User]:
42+
users = User.objects(id__in=package.maintainers)
43+
return list(users)
44+
45+
@classmethod
46+
def popular_packages(cls, limit: int) -> List[Package]:
47+
packages = Package.objects()\
48+
.order_by('-total_downloads')\
49+
.limit(limit)
50+
51+
return list(packages)

0 commit comments

Comments
 (0)