Add test for python whl ecosystem aboutcode-org#3707 aboutcode-org#3862

Signed-off-by: swastik <swastkk@gmail.com>
swastkk · Aug 4, 2024 · b84bec4 · b84bec4
1 parent 7eedcca
commit b84bec4
Show file tree

Hide file tree

Showing 13 changed files with 1,734 additions and 1 deletion.
diff --git a/tests/packagedcode/data/package_summary/python-whl-expected.json b/tests/packagedcode/data/package_summary/python-whl-expected.json
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/METADATA b/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/METADATA
@@ -0,0 +1,122 @@
+Metadata-Version: 2.1
+Name: beautifulsoup4
+Version: 4.12.3
+Summary: Screen-scraping library
+Project-URL: Download, https://www.crummy.com/software/BeautifulSoup/bs4/download/
+Project-URL: Homepage, https://www.crummy.com/software/BeautifulSoup/bs4/
+Author-email: Leonard Richardson <leonardr@segfault.org>
+License: MIT License
+License-File: AUTHORS
+License-File: LICENSE
+Keywords: HTML,XML,parse,soup
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Classifier: Topic :: Text Processing :: Markup :: SGML
+Classifier: Topic :: Text Processing :: Markup :: XML
+Requires-Python: >=3.6.0
+Requires-Dist: soupsieve>1.2
+Provides-Extra: cchardet
+Requires-Dist: cchardet; extra == 'cchardet'
+Provides-Extra: chardet
+Requires-Dist: chardet; extra == 'chardet'
+Provides-Extra: charset-normalizer
+Requires-Dist: charset-normalizer; extra == 'charset-normalizer'
+Provides-Extra: html5lib
+Requires-Dist: html5lib; extra == 'html5lib'
+Provides-Extra: lxml
+Requires-Dist: lxml; extra == 'lxml'
+Description-Content-Type: text/markdown
+
+Beautiful Soup is a library that makes it easy to scrape information
+from web pages. It sits atop an HTML or XML parser, providing Pythonic
+idioms for iterating, searching, and modifying the parse tree.
+
+# Quick start
+
+```
+>>> from bs4 import BeautifulSoup
+>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
+>>> print(soup.prettify())
+<html>
+ <body>
+  <p>
+   Some
+   <b>
+    bad
+    <i>
+     HTML
+    </i>
+   </b>
+  </p>
+ </body>
+</html>
+>>> soup.find(text="bad")
+'bad'
+>>> soup.i
+<i>HTML</i>
+#
+>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
+#
+>>> print(soup.prettify())
+<?xml version="1.0" encoding="utf-8"?>
+<tag1>
+ Some
+ <tag2/>
+ bad
+ <tag3>
+  XML
+ </tag3>
+</tag1>
+```
+
+To go beyond the basics, [comprehensive documentation is available](https://www.crummy.com/software/BeautifulSoup/bs4/doc/).
+
+# Links
+
+* [Homepage](https://www.crummy.com/software/BeautifulSoup/bs4/)
+* [Documentation](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
+* [Discussion group](https://groups.google.com/group/beautifulsoup/)
+* [Development](https://code.launchpad.net/beautifulsoup/)
+* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/)
+* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG)
+
+# Note on Python 2 sunsetting
+
+Beautiful Soup's support for Python 2 was discontinued on December 31,
+2020: one year after the sunset date for Python 2 itself. From this
+point onward, new Beautiful Soup development will exclusively target
+Python 3. The final release of Beautiful Soup 4 to support Python 2
+was 4.9.3.
+
+# Supporting the project
+
+If you use Beautiful Soup as part of your professional work, please consider a
+[Tidelift subscription](https://tidelift.com/subscription/pkg/pypi-beautifulsoup4?utm_source=pypi-beautifulsoup4&utm_medium=referral&utm_campaign=readme).
+This will support many of the free software projects your organization
+depends on, not just Beautiful Soup.
+
+If you use Beautiful Soup for personal projects, the best way to say
+thank you is to read
+[Tool Safety](https://www.crummy.com/software/BeautifulSoup/zine/), a zine I
+wrote about what Beautiful Soup has taught me about software
+development.
+
+# Building the documentation
+
+The bs4/doc/ directory contains full documentation in Sphinx
+format. Run `make html` in that directory to create HTML
+documentation.
+
+# Running the unit tests
+
+Beautiful Soup supports unit test discovery using Pytest:
+
+```
+$ pytest
+```
+
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/RECORD b/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/RECORD
@@ -0,0 +1,10 @@
+sample/__init__.py,sha256=kq32cCtQiNjjU9XwjD0b1jdXN5WEC87nJqSSW3PhVkM,33822
+sample/builder/_html5lib.py,sha256=0w-hmPM5wWR2iDuRCR6MvY6ZPXbg_hgddym-YWqj03s,19114
+sample/builder/_htmlparser.py,sha256=_VD5Z08j6A9YYMR4y7ZTfdMzwiCBsSUQAPuHiYB-WZI,14923
+sample/builder/_lxml.py,sha256=yKdMx1kdX7H2CopwSWEYm4Sgrfkd-WDj8HbskcaLauU,14948
+sample/tests/__init__.py,sha256=NydTegds_r7MoOEuQLS6TFmTA9TwK3KxJhwEkqjCGTQ,48392
+samples-0.1.0.dist-info/METADATA,sha256=UkOS1koIjlakIy9Q1u2yCNwDEFOUZSrLcsbV-mTInz4,3790
+samples-0.1.0.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
+samples-0.1.0.dist-info/licenses/AUTHORS,sha256=uSIdbrBb1sobdXl7VrlUvuvim2dN9kF3MH4Edn0WKGE,2176
+samples-0.1.0.dist-info/licenses/LICENSE,sha256=VbTY1LHlvIbRDvrJG3TIe8t3UmsPW57a-LnNKtxzl7I,1441
+samples-0.1.0.dist-info/RECORD,
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/WHEEL b/tests/packagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/WHEEL
@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.21.0
+Root-Is-Purelib: true
+Tag: py3-none-any
diff --git a/...kagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/licenses/AUTHORS b/...kagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/licenses/AUTHORS
@@ -0,0 +1,49 @@
+Behold, mortal, the origins of Beautiful Soup...
+================================================
+
+Leonard Richardson is the primary maintainer.
+
+Aaron DeVore and Isaac Muse have made significant contributions to the
+code base.
+
+Mark Pilgrim provided the encoding detection code that forms the base
+of UnicodeDammit.
+
+Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
+Soup 4 working under Python 3.
+
+Simon Willison wrote soupselect, which was used to make Beautiful Soup
+support CSS selectors. Isaac Muse wrote SoupSieve, which made it
+possible to _remove_ the CSS selector code from Beautiful Soup.
+
+Sam Ruby helped with a lot of edge cases.
+
+Jonathan Ellis was awarded the prestigious Beau Potage D'Or for his
+work in solving the nestable tags conundrum.
+
+An incomplete list of people have contributed patches to Beautiful
+Soup:
+
+ Istvan Albert, Andrew Lin, Anthony Baxter, Oliver Beattie, Andrew
+Boyko, Tony Chang, Francisco Canas, "Delong", Zephyr Fang, Fuzzy,
+Roman Gaufman, Yoni Gilad, Richie Hindle, Toshihiro Kamiya, Peteris
+Krumins, Kent Johnson, Marek Kapolka, Andreas Kostyrka, Roel Kramer,
+Ben Last, Robert Leftwich, Stefaan Lippens, "liquider", Staffan
+Malmgren, Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon",
+Ed Oskiewicz, Martijn Peters, Greg Phillips, Giles Radford, Stefano
+Revera, Arthur Rudolph, Marko Samastur, James Salter, Jouni Sepp�nen,
+Alexander Schmolck, Tim Shirley, Geoffrey Sneddon, Ville Skytt�,
+"Vikas", Jens Svalgaard, Andy Theyers, Eric Weiser, Glyn Webster, John
+Wiseman, Paul Wright, Danny Yoo
+
+An incomplete list of people who made suggestions or found bugs or
+found ways to break Beautiful Soup:
+
+ Hanno B�ck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
+ Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
+ Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
+ warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
+ Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
+ Summers, Dennis Sutch, Chris Smith, Aaron Swartz, Stuart
+ Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
+ Sousa Rocha, Yichun Wei, Per Vognsen
diff --git a/...kagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/licenses/LICENSE b/...kagedcode/data/package_summary/python.whl-extract/sample-0.1.0.dist-info/licenses/LICENSE
@@ -0,0 +1,31 @@
+Beautiful Soup is made available under the MIT license:
+
+ Copyright (c) Leonard Richardson
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
+Beautiful Soup incorporates code from the html5lib library, which is
+also made available under the MIT license. Copyright (c) James Graham
+and other contributors
+
+Beautiful Soup has an optional dependency on the soupsieve library,
+which is also made available under the MIT license. Copyright (c)
+Isaac Muse
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/__init__.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/__init__.py
@@ -0,0 +1,23 @@
+"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".
+
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup uses a pluggable XML or HTML parser to parse a
+(possibly invalid) document into a tree representation. Beautiful Soup
+provides methods and Pythonic idioms that make it easy to navigate,
+search, and modify the parse tree.
+
+Beautiful Soup works with Python 3.6 and up. It works better if lxml
+and/or html5lib is installed.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+"""
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "4.12.3"
+__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
+
+__all__ = ['BeautifulSoup']
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/__init__.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/__init__.py
@@ -0,0 +1,2 @@
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_html5lib.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_html5lib.py
@@ -0,0 +1,6 @@
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
+
+__all__ = [
+    'HTML5TreeBuilder',
+    ]
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_htmlparser.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_htmlparser.py
@@ -0,0 +1,9 @@
+# encoding: utf-8
+"""Use the HTMLParser library to parse HTML files that aren't too bad."""
+
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
+
+__all__ = [
+    'HTMLParserTreeBuilder',
+    ]
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_lxml.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/builder/_lxml.py
@@ -0,0 +1,7 @@
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
+
+__all__ = [
+    'LXMLTreeBuilderForXML',
+    'LXMLTreeBuilder',
+    ]
diff --git a/tests/packagedcode/data/package_summary/python.whl-extract/sample/tests/__init__.py b/tests/packagedcode/data/package_summary/python.whl-extract/sample/tests/__init__.py
@@ -0,0 +1,5 @@
+# encoding: utf-8
+"""Helper classes for tests."""
+
+# Use of this source code is governed by the MIT license.
+__license__ = "MIT"
diff --git a/tests/packagedcode/test_plugin_package.py b/tests/packagedcode/test_plugin_package.py
@@ -135,7 +135,15 @@ def test_plugin_package_with_package_summary(self):
 
         run_scan_click(['--package','--license','--copyright', '--strip-root', '--processes', '-1','--package-summary', '--classify','--json-pp', result_file, test_dir])
         check_json_scan(expected_file, result_file, remove_uuid=True, remove_file_date=True, regen=REGEN_TEST_FIXTURES)
-
+
+    def test_py_whl_ecosystem_with_package_summary(self):
+        test_dir = self.get_test_loc('package_summary/python.whl-extract') 
+        result_file = self.get_temp_file('json')
+        expected_file = self.get_test_loc('package_summary/python-whl-expected.json')
+
+        run_scan_click(['--package','--license','--copyright', '--strip-root', '--processes', '-1','--package-summary','--summary' , '--classify','--json-pp', result_file, test_dir])
+        check_json_scan(expected_file, result_file, remove_uuid=True, remove_file_date=True, regen=REGEN_TEST_FIXTURES)
+
     @skipIf(on_windows, 'somehow this fails on Windows')
     def test_package_command_scan_python(self):
         test_dir = self.get_test_loc('recon/pypi')