diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4416ee8..8acd8e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,3 +23,24 @@ jobs: - name: Run tests run: pipenv run pytest + + ruff: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + + # Update output format to enable automatic inline annotations. + - name: Run Ruff + run: ruff check --output-format=github . + \ No newline at end of file diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..c371f0c --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,8 @@ +{ + "recommendations": [ + "charliermarsh.ruff" + ], + "unwantedRecommendations": [ + + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..6e4616f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + } + } +} \ No newline at end of file diff --git a/Pipfile b/Pipfile index 69ee0c1..b95f696 100644 --- a/Pipfile +++ b/Pipfile @@ -4,12 +4,14 @@ verify_ssl = true name = "pypi" [packages] -requests = "==2.26.0" -python-dotenv = "==1.0.1" -pytest = "==8.3.3" -pytest-cov = "==5.0.0" +python-dotenv = "~=1.0" +requests = "~=2.26" +ruff = "~=0.7" +tqdm = "~=4.66" [dev-packages] +pytest = "~=8.3" +pytest-cov = "~=5.0" [requires] python_version = "3.12" diff --git a/Pipfile.lock b/Pipfile.lock index f3d0de6..9ad305e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "000484286c8a1e3d843e55471e6addf2968d9ca207587c136ef2f7ce177ef27f" + "sha256": "4538e3fe34bc8b2bf4f190a63bbb7c2fd7a6fa658af76c8b717463ba913c9571" }, "pipfile-spec": 6, "requires": { @@ -26,18 +26,121 @@ }, "charset-normalizer": { "hashes": [ - "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", - "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621", + "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6", + "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8", + "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912", + "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c", + "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b", + "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d", + "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d", + "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95", + "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e", + "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565", + "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64", + "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab", + "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be", + "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e", + "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907", + "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0", + "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2", + "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62", + "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62", + "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23", + "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc", + "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284", + "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca", + "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455", + "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858", + "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b", + "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594", + "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc", + "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db", + "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b", + "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea", + "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6", + "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920", + "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749", + "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7", + "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd", + "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99", + "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242", + "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee", + "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129", + "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2", + "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51", + "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee", + "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8", + "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b", + "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613", + "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742", + "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe", + "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3", + "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5", + "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631", + "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7", + "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15", + "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c", + "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea", + "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417", + "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250", + "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88", + "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca", + "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa", + "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99", + "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149", + "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41", + "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574", + "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0", + "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f", + "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d", + "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654", + "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3", + "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19", + "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90", + "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578", + "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9", + "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1", + "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51", + "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719", + "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236", + "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a", + "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c", + "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade", + "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944", + "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc", + "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6", + "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6", + "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27", + "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6", + "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2", + "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12", + "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf", + "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114", + "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7", + "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf", + "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d", + "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b", + "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed", + "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03", + "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4", + "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67", + "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365", + "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a", + "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748", + "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b", + "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", + "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482" ], - "markers": "python_version >= '3'", - "version": "==2.0.12" + "markers": "python_full_version >= '3.7.0'", + "version": "==3.4.0" }, "idna": { "hashes": [ "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3" ], - "markers": "python_version >= '3'", + "markers": "python_version >= '3.6'", "version": "==3.10" }, "python-dotenv": { @@ -51,21 +154,169 @@ }, "requests": { "hashes": [ - "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", - "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.32.3" + }, + "ruff": { + "hashes": [ + "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37", + "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35", + "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c", + "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7", + "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a", + "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8", + "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99", + "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd", + "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565", + "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad", + "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378", + "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca", + "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250", + "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4", + "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9", + "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112", + "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89", + "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==0.7.1" + }, + "tqdm": { + "hashes": [ + "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd", + "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad" ], "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==2.26.0" + "markers": "python_version >= '3.7'", + "version": "==4.66.5" }, "urllib3": { "hashes": [ - "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", - "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32" + "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", + "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==1.26.20" + "markers": "python_version >= '3.8'", + "version": "==2.2.3" } }, - "develop": {} + "develop": { + "coverage": { + "extras": [ + "toml" + ], + "hashes": [ + "sha256:00a1d69c112ff5149cabe60d2e2ee948752c975d95f1e1096742e6077affd376", + "sha256:023bf8ee3ec6d35af9c1c6ccc1d18fa69afa1cb29eaac57cb064dbb262a517f9", + "sha256:0294ca37f1ba500667b1aef631e48d875ced93ad5e06fa665a3295bdd1d95111", + "sha256:06babbb8f4e74b063dbaeb74ad68dfce9186c595a15f11f5d5683f748fa1d172", + "sha256:0809082ee480bb8f7416507538243c8863ac74fd8a5d2485c46f0f7499f2b491", + "sha256:0b3fb02fe73bed561fa12d279a417b432e5b50fe03e8d663d61b3d5990f29546", + "sha256:0b58c672d14f16ed92a48db984612f5ce3836ae7d72cdd161001cc54512571f2", + "sha256:0bcd1069e710600e8e4cf27f65c90c7843fa8edfb4520fb0ccb88894cad08b11", + "sha256:1032e178b76a4e2b5b32e19d0fd0abbce4b58e77a1ca695820d10e491fa32b08", + "sha256:11a223a14e91a4693d2d0755c7a043db43d96a7450b4f356d506c2562c48642c", + "sha256:12394842a3a8affa3ba62b0d4ab7e9e210c5e366fbac3e8b2a68636fb19892c2", + "sha256:182e6cd5c040cec0a1c8d415a87b67ed01193ed9ad458ee427741c7d8513d963", + "sha256:1d5b8007f81b88696d06f7df0cb9af0d3b835fe0c8dbf489bad70b45f0e45613", + "sha256:1f76846299ba5c54d12c91d776d9605ae33f8ae2b9d1d3c3703cf2db1a67f2c0", + "sha256:27fb4a050aaf18772db513091c9c13f6cb94ed40eacdef8dad8411d92d9992db", + "sha256:29155cd511ee058e260db648b6182c419422a0d2e9a4fa44501898cf918866cf", + "sha256:29fc0f17b1d3fea332f8001d4558f8214af7f1d87a345f3a133c901d60347c73", + "sha256:2b6b4c83d8e8ea79f27ab80778c19bc037759aea298da4b56621f4474ffeb117", + "sha256:2fdef0d83a2d08d69b1f2210a93c416d54e14d9eb398f6ab2f0a209433db19e1", + "sha256:3c65d37f3a9ebb703e710befdc489a38683a5b152242664b973a7b7b22348a4e", + "sha256:4f704f0998911abf728a7783799444fcbbe8261c4a6c166f667937ae6a8aa522", + "sha256:51b44306032045b383a7a8a2c13878de375117946d68dcb54308111f39775a25", + "sha256:53d202fd109416ce011578f321460795abfe10bb901b883cafd9b3ef851bacfc", + "sha256:58809e238a8a12a625c70450b48e8767cff9eb67c62e6154a642b21ddf79baea", + "sha256:5915fcdec0e54ee229926868e9b08586376cae1f5faa9bbaf8faf3561b393d52", + "sha256:5beb1ee382ad32afe424097de57134175fea3faf847b9af002cc7895be4e2a5a", + "sha256:5f8ae553cba74085db385d489c7a792ad66f7f9ba2ee85bfa508aeb84cf0ba07", + "sha256:5fbd612f8a091954a0c8dd4c0b571b973487277d26476f8480bfa4b2a65b5d06", + "sha256:6bd818b7ea14bc6e1f06e241e8234508b21edf1b242d49831831a9450e2f35fa", + "sha256:6f01ba56b1c0e9d149f9ac85a2f999724895229eb36bd997b61e62999e9b0901", + "sha256:73d2b73584446e66ee633eaad1a56aad577c077f46c35ca3283cd687b7715b0b", + "sha256:7bb92c539a624cf86296dd0c68cd5cc286c9eef2d0c3b8b192b604ce9de20a17", + "sha256:8165b796df0bd42e10527a3f493c592ba494f16ef3c8b531288e3d0d72c1f6f0", + "sha256:862264b12ebb65ad8d863d51f17758b1684560b66ab02770d4f0baf2ff75da21", + "sha256:8902dd6a30173d4ef09954bfcb24b5d7b5190cf14a43170e386979651e09ba19", + "sha256:8cf717ee42012be8c0cb205dbbf18ffa9003c4cbf4ad078db47b95e10748eec5", + "sha256:8ed9281d1b52628e81393f5eaee24a45cbd64965f41857559c2b7ff19385df51", + "sha256:99b41d18e6b2a48ba949418db48159d7a2e81c5cc290fc934b7d2380515bd0e3", + "sha256:9cb7fa111d21a6b55cbf633039f7bc2749e74932e3aa7cb7333f675a58a58bf3", + "sha256:a181e99301a0ae128493a24cfe5cfb5b488c4e0bf2f8702091473d033494d04f", + "sha256:a413a096c4cbac202433c850ee43fa326d2e871b24554da8327b01632673a076", + "sha256:a6b1e54712ba3474f34b7ef7a41e65bd9037ad47916ccb1cc78769bae324c01a", + "sha256:ade3ca1e5f0ff46b678b66201f7ff477e8fa11fb537f3b55c3f0568fbfe6e718", + "sha256:b0ac3d42cb51c4b12df9c5f0dd2f13a4f24f01943627120ec4d293c9181219ba", + "sha256:b369ead6527d025a0fe7bd3864e46dbee3aa8f652d48df6174f8d0bac9e26e0e", + "sha256:b57b768feb866f44eeed9f46975f3d6406380275c5ddfe22f531a2bf187eda27", + "sha256:b8d3a03d9bfcaf5b0141d07a88456bb6a4c3ce55c080712fec8418ef3610230e", + "sha256:bc66f0bf1d7730a17430a50163bb264ba9ded56739112368ba985ddaa9c3bd09", + "sha256:bf20494da9653f6410213424f5f8ad0ed885e01f7e8e59811f572bdb20b8972e", + "sha256:c48167910a8f644671de9f2083a23630fbf7a1cb70ce939440cd3328e0919f70", + "sha256:c481b47f6b5845064c65a7bc78bc0860e635a9b055af0df46fdf1c58cebf8e8f", + "sha256:c7c8b95bf47db6d19096a5e052ffca0a05f335bc63cef281a6e8fe864d450a72", + "sha256:c9b8e184898ed014884ca84c70562b4a82cbc63b044d366fedc68bc2b2f3394a", + "sha256:cc8ff50b50ce532de2fa7a7daae9dd12f0a699bfcd47f20945364e5c31799fef", + "sha256:d541423cdd416b78626b55f123412fcf979d22a2c39fce251b350de38c15c15b", + "sha256:dab4d16dfef34b185032580e2f2f89253d302facba093d5fa9dbe04f569c4f4b", + "sha256:dacbc52de979f2823a819571f2e3a350a7e36b8cb7484cdb1e289bceaf35305f", + "sha256:df57bdbeffe694e7842092c5e2e0bc80fff7f43379d465f932ef36f027179806", + "sha256:ed8fe9189d2beb6edc14d3ad19800626e1d9f2d975e436f84e19efb7fa19469b", + "sha256:f3ddf056d3ebcf6ce47bdaf56142af51bb7fad09e4af310241e9db7a3a8022e1", + "sha256:f8fe4984b431f8621ca53d9380901f62bfb54ff759a1348cd140490ada7b693c", + "sha256:fe439416eb6380de434886b00c859304338f8b19f6f54811984f3420a2e03858" + ], + "markers": "python_version >= '3.9'", + "version": "==7.6.4" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "packaging": { + "hashes": [ + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" + ], + "markers": "python_version >= '3.8'", + "version": "==24.1" + }, + "pluggy": { + "hashes": [ + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" + ], + "markers": "python_version >= '3.8'", + "version": "==1.5.0" + }, + "pytest": { + "hashes": [ + "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181", + "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==8.3.3" + }, + "pytest-cov": { + "hashes": [ + "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652", + "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==5.0.0" + } + } } diff --git a/README.md b/README.md index 94b1417..4c4dfbc 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,43 @@ Currently, we are only accepting contributions from members of the project who m ## Development -This code uses Python 3, probably at least 3.9. +This code uses Python 3. It is tested on Python 3.12, but will probably work on versions back to 3.9. -To install the project dependencies, first install pipenv globally with `pip install pipenv`. Then create a virtual env/install dependencies with `pipenv install`. +To install the project dependencies, first install pipenv globally with `pip install pipenv`. Then create a virtual env/install dependencies with `pipenv install --dev`. To run code in the project, prefix your command with `pipenv run`, a la `pipenv run python -m mediabridge.main`. +## Running main + +The "main.py" script is part of the `mediabridge` module. Additionally, running it requires `pipenv run` as mentioned above. So the full command to run the main script (or any other script in the `mediabridge` module) is: + +``` +pipenv run python -m mediabridge.main +``` + +This should be run from the root of the project directory. + +### Running from VSCode + +To fix import errors and other Intellisense features, make sure you've let VSCode know about your pipenv environment. To do that: + +1. Open the VSCode command palette (Control/Command+SHIFT+P) +2. Search for and select the "Python: Select Interpreter" command +3. Choose the option that starts with `MediaBridge` + ## Testing To run unit tests, 1. Ensure `pipenv` is installed 2. Run `pipenv run pytest` + +There is a GitHub actions "check" for passing tests, which must pass for you to be able to merge your PR. + +## Code formatting + +We use [ruff](https://docs.astral.sh/ruff/) for code formatting, linting, and import sorting. If you've installed the project with the instructions above, you should have access to the `ruff` binary. + +The repo comes with a `.vscode` directory that contains a recommended ruff extension, as well as settings to set ruff as your Python formatter and to format code and sort imports on save. If you're not using VSCode, you can run `ruff format` from the project root directory to format all Python code. + +There is a GitHub actions "check" for code formatting, which will fail if you have unformatted code in your PR. diff --git a/mediabridge/config/setting.py b/mediabridge/config/setting.py index 08fb56f..1dcd2dd 100644 --- a/mediabridge/config/setting.py +++ b/mediabridge/config/setting.py @@ -1 +1 @@ - # Configuration settings (e.g., MongoDB URI, paths) \ No newline at end of file +# Configuration settings (e.g., MongoDB URI, paths) diff --git a/mediabridge/data_processing/build_matrices.py b/mediabridge/data_processing/build_matrices.py index fe60e5b..d1c2342 100644 --- a/mediabridge/data_processing/build_matrices.py +++ b/mediabridge/data_processing/build_matrices.py @@ -1 +1 @@ -# Scripts to build interaction and feature matrices \ No newline at end of file +# Scripts to build interaction and feature matrices diff --git a/mediabridge/data_processing/preprocess.py b/mediabridge/data_processing/preprocess.py index 77f9777..a63f8ba 100644 --- a/mediabridge/data_processing/preprocess.py +++ b/mediabridge/data_processing/preprocess.py @@ -1 +1 @@ -# Data preprocessing scripts (e.g., feature extraction) \ No newline at end of file +# Data preprocessing scripts (e.g., feature extraction) diff --git a/mediabridge/data_processing/wiki_to_netflix.py b/mediabridge/data_processing/wiki_to_netflix.py index 7311955..bc361ed 100644 --- a/mediabridge/data_processing/wiki_to_netflix.py +++ b/mediabridge/data_processing/wiki_to_netflix.py @@ -1,39 +1,104 @@ -import requests import csv import os +import sys +import time +from dataclasses import dataclass +from typing import List, Optional + +import requests +from tqdm import tqdm + + +class WikidataServiceTimeoutException(Exception): + pass + + +@dataclass +class MovieData: + movie_id: Optional[str] + genre: List[str] + director: Optional[str] + + +# need Genres, Directors, Title, year? + +data_dir = os.path.join(os.path.dirname(__file__), "../../data") +out_dir = os.path.join(os.path.dirname(__file__), "../../out") +user_agent = "Noisebridge MovieBot 0.0.1/Audiodude " -data_dir = os.path.join(os.path.dirname(__file__), '../../data') -out_dir = os.path.join(os.path.dirname(__file__), '../../out') -user_agent = 'Noisebridge MovieBot 0.0.1/Audiodude ' -# Reading netflix text file def read_netflix_txt(txt_file, test): + """ + Reads and processes a Netflix text file. + + Parameters: + txt_file (str): Path to the Netflix text file + test (Bool): When true, runs the functon in test mode + """ num_rows = None - if test == True: + if test: num_rows = 100 - with open(txt_file, "r", encoding = "ISO-8859-1") as netflix_data: + with open(txt_file, "r", encoding="ISO-8859-1") as netflix_data: for i, line in enumerate(netflix_data): if num_rows is not None and i >= num_rows: break - yield line.rstrip().split(',', 2) + yield line.rstrip().split(",", 2) + + +def create_netflix_csv(csv_name, data_list): + """ + Writes data to a Netflix CSV file. -# Writing netflix csv file -def create_netflix_csv(csv_name, data_list): - with open(csv_name, 'w') as netflix_csv: + Parameters: + csv_name (str): Name of CSV file to be created + data_list (list): Row of data to be written to CSV file + """ + with open(csv_name, "w") as netflix_csv: csv.writer(netflix_csv).writerows(data_list) -# Extracting movie info from Wiki data + def wiki_feature_info(data, key): - if len(data['results']['bindings']) < 1 or key not in data['results']['bindings'][0]: + """ + Extracts movie information from a Wikidata query result. + + Parameters: + data (dict): JSON response from a SPARQL query, see example in get_example_json_sparql_response(). + key (str): The key for the information to extract (e.g., 'item', 'genreLabel', 'directorLabel'). + + Returns: + None: If the key is not present or no results are available. + list: If the key is 'genreLabel', returns a list of unique genre labels. + String: If the Key is present, return the movie ID of the first binding, in other words the first row in query result + """ + if ( + len(data["results"]["bindings"]) < 1 + or key not in data["results"]["bindings"][0] + ): return None - if key == 'genreLabel': - return list({d['genreLabel']['value'] for d in data['results']['bindings'] if 'genreLabel' in d}) - return data['results']['bindings'][0][key]['value'].split('/')[-1] + if key == "genreLabel": + return list( + { + d["genreLabel"]["value"] + for d in data["results"]["bindings"] + if "genreLabel" in d + } + ) + return data["results"]["bindings"][0][key]["value"].split("/")[-1] + -# Formatting SPARQL query for Wiki data def format_sparql_query(title, year): - QUERY = ''' + """ + Formats SPARQL query for Wiki data + + Parameters: + title (str): name of content to query + year (int): release year of the movie + + Returns: + SPARQL Query (str): formatted string with movie title and year + """ + QUERY = """ SELECT * WHERE { SERVICE wikibase:mwapi { bd:serviceParam wikibase:api "EntitySearch" ; @@ -77,63 +142,118 @@ def format_sparql_query(title, year): SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } } - ''' - return QUERY % {'Title': title, 'Year': year} + """ + return QUERY % {"Title": title, "Year": year} + -# Getting list of movie IDs, genre IDs, and director IDs from request def wiki_query(data_csv, user_agent): - wiki_movie_ids = [] - wiki_genres = [] - wiki_directors = [] - - for row in data_csv: + """ + Formats SPARQL query for Wiki data + + Parameters: + data_csv (list of lists): Rows of movie data with [movie ID, release year, title]. + user_agent (str): used to identify our script when sending requests to Wikidata SPARQL API. + + Returns: + list of WikiMovieData: A list of movieData instances with movie IDs, genres, and directors. + """ + wiki_data_list = [] + + for row in tqdm(data_csv): if row[1] is None: continue SPARQL = format_sparql_query(row[2], int(row[1])) - response = requests.post('https://query.wikidata.org/sparql', - headers={'User-Agent': user_agent}, - data={ - 'query': SPARQL, - 'format': 'json', - } - ) - response.raise_for_status() - + tries = 0 + while True: + try: + response = requests.post( + "https://query.wikidata.org/sparql", + headers={"User-Agent": user_agent}, + data={"query": SPARQL, "format": "json"}, + timeout=20, + ) + break + except requests.exceptions.Timeout: + wait_time = 2**tries * 5 + time.sleep(wait_time) + tries += 1 + if tries > 5: + raise WikidataServiceTimeoutException( + f"Tried {tries} time, could not reach Wikidata " + f"(movie: {row[2]} {row[1]})" + ) + + response.raise_for_status() data = response.json() - - wiki_movie_ids.append(wiki_feature_info(data, 'item')) - wiki_genres.append(wiki_feature_info(data, 'genreLabel')) - wiki_directors.append(wiki_feature_info(data, 'directorLabel')) - - return wiki_movie_ids, wiki_genres, wiki_directors -# Calling all functions + wiki_data_list.append( + MovieData( + movie_id=wiki_feature_info(data, "item"), + genre=wiki_feature_info(data, "genreLabel"), + director=wiki_feature_info(data, "directorLabel"), + ) + ) + + return wiki_data_list + + def process_data(test=False): + """ + Processes Netflix movie data by enriching it with information from Wikidata and writes the results to a CSV file. + Netflix data was conveted from a generator to a list to avoid exaustion. was running into an issue where nothing would print to CSV file + """ missing_count = 0 processed_data = [] - netflix_data = read_netflix_txt(os.path.join(data_dir, 'movie_titles.txt'), test) + netflix_data = list( + read_netflix_txt(os.path.join(data_dir, "movie_titles.txt"), test) + ) - netflix_csv = os.path.join(out_dir, 'movie_titles.csv') + netflix_csv = os.path.join(out_dir, "movie_titles.csv") - wiki_movie_ids_list, wiki_genres_list, wiki_directors_list = wiki_query(netflix_data, user_agent) + enriched_movies = wiki_query(netflix_data, user_agent) - num_rows = len(wiki_movie_ids_list) + num_rows = len(enriched_movies) for index, row in enumerate(netflix_data): netflix_id, year, title = row - if wiki_movie_ids_list[index] is None: + movie_data = enriched_movies[index] + if movie_data.movie_id is None: missing_count += 1 - movie = [netflix_id, wiki_movie_ids_list[index], title, year, wiki_genres_list[index], wiki_directors_list[index]] + if movie_data.genre: + genres = "; ".join(movie_data.genre) + else: + genres = "" + if movie_data.director: + director = movie_data.director + else: + director = "" + movie = [ + netflix_id, + movie_data.movie_id, + title, + year, + genres, + director, + ] processed_data.append(movie) + print("Processed Data:") + for movie in processed_data: + print(movie) + create_netflix_csv(netflix_csv, processed_data) - print(f'missing: {missing_count} ({missing_count / num_rows * 100}%)') - print(f'found: {num_rows - missing_count} ({(num_rows - missing_count) / num_rows * 100}%)') - print(f'total: {num_rows}') + print(f"missing: {missing_count} ({missing_count / num_rows * 100:.2f}%)") + print( + f"found: {num_rows - missing_count} ({(num_rows - missing_count) / num_rows * 100:.2f}%)" + ) + print(f"total: {num_rows}") + -if __name__ == '__main__': - process_data(True) +if __name__ == "__main__": + # Test is true if no argument is passed or if the first argument is not '--prod'. + test = len(sys.argv) < 2 or sys.argv[1] != "--prod" + process_data(test=test) diff --git a/mediabridge/data_processing/wiki_to_netflix_test.py b/mediabridge/data_processing/wiki_to_netflix_test.py index 13ebda4..b7ea413 100644 --- a/mediabridge/data_processing/wiki_to_netflix_test.py +++ b/mediabridge/data_processing/wiki_to_netflix_test.py @@ -1,6 +1,27 @@ -from wiki_to_netflix import format_sparql_query, wiki_query, process_data +from wiki_to_netflix import format_sparql_query from wiki_to_netflix_test_data import EXPECTED_SPARQL_QUERY + def test_format_sparql_query(): QUERY = format_sparql_query("The Room", 2003) - assert QUERY == EXPECTED_SPARQL_QUERY \ No newline at end of file + assert QUERY == EXPECTED_SPARQL_QUERY + + +def get_example_json_sparql_response(): + """ + Returns an example response structure for testing. + """ + return { + "results": { + "bindings": [ + { + "item": { + "type": "uri", + "value": "http://www.wikidata.org/entity/Q12345", + }, + "genreLabel": {"type": "literal", "value": "Science Fiction"}, + "directorLabel": {"type": "literal", "value": "John Doe"}, + } + ] + } + } diff --git a/mediabridge/data_processing/wiki_to_netflix_test_data.py b/mediabridge/data_processing/wiki_to_netflix_test_data.py index eac0295..9a907e8 100644 --- a/mediabridge/data_processing/wiki_to_netflix_test_data.py +++ b/mediabridge/data_processing/wiki_to_netflix_test_data.py @@ -1,4 +1,4 @@ -EXPECTED_SPARQL_QUERY =''' +EXPECTED_SPARQL_QUERY = """ SELECT * WHERE { SERVICE wikibase:mwapi { bd:serviceParam wikibase:api "EntitySearch" ; @@ -42,4 +42,4 @@ SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } } - ''' \ No newline at end of file + """ diff --git a/mediabridge/db/connect.py b/mediabridge/db/connect.py index fbdc2e3..37f9080 100644 --- a/mediabridge/db/connect.py +++ b/mediabridge/db/connect.py @@ -1 +1 @@ - # MongoDB connection setup +# MongoDB connection setup diff --git a/mediabridge/db/queries.py b/mediabridge/db/queries.py index 5217756..60f38b3 100644 --- a/mediabridge/db/queries.py +++ b/mediabridge/db/queries.py @@ -1 +1 @@ - # Functions to query MongoDB for movies and interactions \ No newline at end of file +# Functions to query MongoDB for movies and interactions diff --git a/mediabridge/main.py b/mediabridge/main.py index fa835ac..e3e03bf 100644 --- a/mediabridge/main.py +++ b/mediabridge/main.py @@ -1,4 +1,4 @@ from mediabridge.data_processing import wiki_to_netflix -q = wiki_to_netflix.format_sparql_query('The Room', 2003) +q = wiki_to_netflix.format_sparql_query("The Room", 2003) print(q) diff --git a/mediabridge/models/predict.py b/mediabridge/models/predict.py index 02921b8..df95b5b 100644 --- a/mediabridge/models/predict.py +++ b/mediabridge/models/predict.py @@ -1 +1 @@ - # Script to make predictions using the trained model \ No newline at end of file +# Script to make predictions using the trained model diff --git a/mediabridge/models/train_model.py b/mediabridge/models/train_model.py index 0debfb2..0fd7818 100644 --- a/mediabridge/models/train_model.py +++ b/mediabridge/models/train_model.py @@ -1 +1 @@ -# Script to train the LightFM model \ No newline at end of file +# Script to train the LightFM model diff --git a/mediabridge/models/utils.py b/mediabridge/models/utils.py index 2eb296b..18cc0a7 100644 --- a/mediabridge/models/utils.py +++ b/mediabridge/models/utils.py @@ -1 +1 @@ -# Utility functions (e.g., for building matrices) \ No newline at end of file +# Utility functions (e.g., for building matrices) diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..4f6111e --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +# Default selections for ruff, plus isort. +lint.select = ["E4", "E7", "E9", "F", "I001"]