Skip to content

Commit

Permalink
fix: minor data update
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed May 13, 2024
1 parent 71595c0 commit b1967f6
Show file tree
Hide file tree
Showing 1,142 changed files with 1,140 additions and 2,280 deletions.
1 change: 0 additions & 1 deletion data/processed/0_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def task_func(numbers=list(range(1, 3))):
>>> isinstance(result, float)
True
"""

permutations = list(itertools.permutations(numbers))
sum_diffs = 0
for perm in permutations:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1000_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def task_func(url):
0 Alice 25 New York
1 Bob 30 San Francisco
"""

urllib.request.urlretrieve(url, TARGET_JSON_FILE)
with open(TARGET_JSON_FILE, "r") as f:
data = json.load(f)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1001_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def task_func(csv_file_path: str):
>>> ax.get_title()
" Plot Title : Normalized Column 1"
"""

df = pd.read_csv(csv_file_path)
mean = df["column1"].mean()
std = df["column1"].std()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1002_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def task_func(data, column_name="target_column"):
0 1 10
1 2 15
"""

df = pd.DataFrame(data)
if column_name not in df.columns:
raise ValueError(f"Column '{column_name}' not found in the DataFrame.")
Expand Down
1 change: 0 additions & 1 deletion data/processed/1003_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def task_func(url):
>>> df = task_func('http://example.com/invalid_structure.xml')
ValueError: XML structure does not match expected format.
"""

try:
with urllib.request.urlopen(url) as response:
xml_data = response.read()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1004_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def task_func(url):
- matplotlib
"""

with urllib.request.urlopen(url) as response:
text = response.read().decode()
words = re.findall(r"\b\w+\b", text)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1005_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def task_func(
"""

try:
if os.path.exists(save_path):
os.remove(save_path)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1006_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def task_func(url, download_path="mnt/data/downloads/"):
>>> task_func('https://example.com/file.zip')
'mnt/data/downloads/file'
"""

if not os.path.exists(download_path):
os.makedirs(download_path)
try:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1007_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def task_func(url: str) -> pd.DataFrame:
- It checks the HTTP response status and raises an HTTPError for unsuccessful status codes.
- Directly converts the HTTP response to JSON and then to a DataFrame, without intermediate processing.
"""

try:
response = requests.get(url, timeout=5)
response.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code
Expand Down
1 change: 0 additions & 1 deletion data/processed/1008_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def task_func(url, table_id):
Columns: []
Index: []
"""

try:
response = requests.get(url, timeout=5)
response.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code
Expand Down
1 change: 0 additions & 1 deletion data/processed/1009_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def task_func(xml_content, output_csv_path):
- The output CSV path should be a valid file path where the user has write
permissions, to prevent IOError.
"""

try:
root = ET.fromstring(xml_content)
data = [[elem.tag, elem.text] for elem in root.iter()]
Expand Down
1 change: 0 additions & 1 deletion data/processed/100_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def task_func(seed=42):
>>> ax.get_ylabel()
'Value'
"""

try:
plt.rc('font', family='Arial')
random.seed(seed)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1010_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def task_func(url):
- The function will not handle redirections or authentication scenarios. It
expects a direct link to an image resource.
"""

try:
response = requests.get(url, timeout=5)
response.raise_for_status()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1011_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def task_func(csv_file_path, col1_name="column1", col2_name="column2"):
- The function does not handle missing data. Ensure that the CSV file has clean and complete data for accurate results.
- The bar plot is customizable using matplotlib's functionality after the function returns the Axes object.
"""

df = pd.read_csv(csv_file_path)
groupby_data = df.groupby(col1_name)[col2_name].mean()
_, ax = plt.subplots(figsize=(10, 6))
Expand Down
1 change: 0 additions & 1 deletion data/processed/1012_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def task_func(url, filename):
>>> task_func('http://example.com/myfile.zip', 'myfile.zip')
('Download and extraction successful', ['file1.txt', 'file2.txt'])
"""

try:
response = requests.get(url, stream=True, timeout=5)
if response.status_code == 200:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1013_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def task_func(
>>> task_func('/anotherpage', base_url='https://www.different.com', csv_file='other_links.csv')
8
"""

full_url = urljoin(base_url, url)
response = requests.get(full_url)
soup = BeautifulSoup(response.text, "html.parser")
Expand Down
1 change: 0 additions & 1 deletion data/processed/1014_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def task_func(api_url):
>>> if plot:
>>> plot.show()
"""

if not isinstance(api_url, str):
raise TypeError("api_url must be a string")
response = requests.get(api_url, timeout=5)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1015_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def task_func(webpage_url: str, database_name: str = "my_database.db") -> int:
>>> print(f"Number of rows parsed: {num_rows}")
Number of rows parsed: 5
"""

try:
if webpage_url.startswith("file://"):
with open(webpage_url[7:], "r", encoding="utf-8") as file:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1016_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def task_func(url: str) -> "matplotlib.axes._axes.Axes":
>>> type(ax)
<class 'matplotlib.axes._axes.Axes'>
"""

response = None # Initialize response to None
if not isinstance(url, str) or not url:
raise ValueError("Invalid URL provided.")
Expand Down
1 change: 0 additions & 1 deletion data/processed/1017_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class 1 0.89 0.87 0.88 48
The CSV file must have a column with the name specified by 'target_column', and it should be in a
format readable by pandas.read_csv().
"""

df = pd.read_csv(csv_file_path)
if target_column not in df.columns:
raise ValueError(f"'{target_column}' column not found in the CSV file.")
Expand Down
1 change: 0 additions & 1 deletion data/processed/1018_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def task_func(url=URL, from_encoding="cp1251", use_lxml=False):
In such cases, the function returns None, indicating that the webpage could not be successfully retrieved or was not available.
"""

if not url:
return None
try:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1019_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def task_func(filename=IMAGE_PATH, from_encoding="cp1251", to_encoding="utf8"):
>>> print(text)
'Привет мир' # This output is the utf-8 encoded version of the extracted text.
"""

with Image.open(filename) as image:
try:
extracted_text = pytesseract.image_to_string(image)
Expand Down
1 change: 0 additions & 1 deletion data/processed/101_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def task_func(data_url="http://lib.stat.cmu.edu/datasets/boston", seed=42):
>>> type(ax)
<class 'matplotlib.axes._axes.Axes'>
"""

try:
font = {'sans-serif': 'Arial', 'family': 'sans-serif'}
plt.rc('font', **font)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1020_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def task_func(url=API_URL, from_encoding=None, to_encoding="utf8"):
- It handles cases with empty content and undetectable encoding by returning an empty dictionary or raising an exception, respectively.
- The decoding and re-encoding steps ensure compatibility with various data formats and the final JSON parsing.
"""

response = requests.get(url, timeout=5)
content = response.content
if from_encoding is None:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1021_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def task_func(input_string, verify_hash=None):
>>> task_func("Hello, World!", "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f")
True
"""

if not isinstance(input_string, str):
raise TypeError("Input must be a string")
if verify_hash is not None and not isinstance(verify_hash, str):
Expand Down
1 change: 0 additions & 1 deletion data/processed/1022_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def task_func(csv_file_path, column_name, date_format="%Y-%m-%d"):
0 2023-12-10 100
1 2023-12-11 150
"""

if not os.path.isfile(csv_file_path):
raise FileNotFoundError(f"The file {csv_file_path} does not exist.")
try:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1023_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def task_func(dataframe):
>>> print(ax)
Axes(0.125,0.11;0.775x0.77)
"""

if dataframe.empty:
raise ValueError("DataFrame is empty.")
if not all(dataframe.dtypes.apply(lambda x: np.issubdtype(x, np.number))):
Expand Down
1 change: 0 additions & 1 deletion data/processed/1024_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def task_func(data_dict):
>>> plot.get_title() if plot is not None else 'No plot generated'
'Value Distribution'
"""

df = pd.DataFrame(data_dict).dropna()
if df.empty or df.nunique().min() < 2:
return df, None
Expand Down
1 change: 0 additions & 1 deletion data/processed/1025_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def task_func(data_dict):
>>> plot_ax.get_title()
'Scaled Values'
"""

df = pd.DataFrame(data_dict).dropna()
if df.empty:
ax = plt.gca()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1026_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def task_func(kwargs):
>>> results['significant']
True
"""

alpha = 0.05 # Define the significance level
group1 = np.array(kwargs.get("group1", []))
group2 = np.array(kwargs.get("group2", []))
Expand Down
1 change: 0 additions & 1 deletion data/processed/1027_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def task_func(url):
>>> task_func('https://www.example.com?q=4a4b4c')
'JKL'
"""

try:
parsed_url = urllib.parse.urlparse(url)
query = urllib.parse.parse_qs(parsed_url.query).get("q", [None])[0]
Expand Down
1 change: 0 additions & 1 deletion data/processed/1028_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def task_func(interval, duration):
>>> task_func(5, 60)
'logfile.log'
"""

if interval <= 0 or duration <= 0:
raise ValueError("Interval and duration must be greater than zero.")
start_time = time.time()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1029_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def task_func(rows=100, columns=3):
x 1
Name: count, dtype: int64
"""

column_names = [
chr(97 + i) for i in range(columns)
] # generate column names based on the number of columns
Expand Down
1 change: 0 additions & 1 deletion data/processed/102_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def task_func():
>>> type(fig).__name__
'Figure'
"""

font = {'family': 'Arial'}
plt.rc('font', **font) # Set the global font to Arial.
DIABETES = load_diabetes()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1030_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def task_func():
3 a a d
4 a a e
"""

LETTERS = list(string.ascii_lowercase)
combinations = list(itertools.product(LETTERS, repeat=3))
df = pd.DataFrame(combinations, columns=["Letter 1", "Letter 2", "Letter 3"])
Expand Down
1 change: 0 additions & 1 deletion data/processed/1031_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def task_func(n_rows=1000):
>>> ax.get_title()
'Top 30 Frequencies of Random 3-Letter Strings'
"""

if n_rows <= 0:
raise ValueError("Number of rows must be greater than 0")
data = ["".join(random.choices(string.ascii_lowercase, k=3)) for _ in range(n_rows)]
Expand Down
1 change: 0 additions & 1 deletion data/processed/1032_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def task_func(rows=1000, string_length=3):
>>> ax.get_xlim()
(0.0, 26.0)
"""

data = ["".join(random.choices(LETTERS, k=string_length)) for _ in range(rows)]
df = pd.DataFrame({"String": data})
if df.empty:
Expand Down
1 change: 0 additions & 1 deletion data/processed/1033_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def task_func():
3 a a d
4 a a e
"""

LETTERS = list(string.ascii_lowercase)
combinations = list(itertools.product(LETTERS, repeat=3))
df = pd.DataFrame(combinations, columns=["a", "b", "c"])
Expand Down
1 change: 0 additions & 1 deletion data/processed/1034_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def task_func(s1, s2):
>>> edit_distance
387.5590277622236
"""

high_sales_categories = s1.index[(s1 > 200) & (s2 > 200)]
if high_sales_categories.empty:
return None, 0.0
Expand Down
1 change: 0 additions & 1 deletion data/processed/1035_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def task_func(feature: pd.Series, target: pd.Series) -> (np.ndarray, plt.Axes):
>>> ax.get_title()
'Confusion Matrix'
"""

df = pd.DataFrame({"Feature": feature, "Target": target})
X_train, X_test, y_train, y_test = train_test_split(
df["Feature"], df["Target"], test_size=0.2, random_state=42
Expand Down
1 change: 0 additions & 1 deletion data/processed/1036_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def task_func(s1, s2):
>>> ax.get_title()
'Overlap Between Series1 and Series2'
"""

intersection = set(s1).intersection(set(s2))
df1 = pd.DataFrame({s1.name: s1, "Type": "Series1"})
df2 = pd.DataFrame({s2.name: s2, "Type": "Series2"})
Expand Down
1 change: 0 additions & 1 deletion data/processed/1037_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def task_func(s1, s2, n_clusters=3):
"""

if not isinstance(s1, pd.Series) or not isinstance(s2, pd.Series):
raise ValueError("s1 and s2 must be pandas Series")
if len(s1) != len(s2):
Expand Down
1 change: 0 additions & 1 deletion data/processed/1038_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def task_func(client_socket):
... finally:
... server_socket.close()
"""

response_data = {"message": "Hello", "time": str(datetime.now())}
response = json.dumps(response_data) + "\n"
client_socket.send(response.encode("utf-8"))
Expand Down
1 change: 0 additions & 1 deletion data/processed/1039_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def task_func(client_socket, cert_file, key_file, buffer_size=1024):
>>> print("Sent file hash:", file_hash)
>>> server_socket.close()
"""

context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
context.load_cert_chain(certfile=cert_file, keyfile=key_file)
secure_socket = None
Expand Down
1 change: 0 additions & 1 deletion data/processed/103_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def task_func(temperatures):
>>> type(ax)
<class 'matplotlib.axes._axes.Axes'>
"""

try:
if temperatures.empty or not isinstance(temperatures, pd.DataFrame):
raise ValueError("Input temperatures must be a non-empty pandas DataFrame.")
Expand Down
1 change: 0 additions & 1 deletion data/processed/1040_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def task_func(
>>> print(task_func())
'Server started on localhost:12345. Ran for 5 seconds.'
"""

server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setblocking(0)
server.bind((server_address, server_port))
Expand Down
1 change: 0 additions & 1 deletion data/processed/1041_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def task_func(request):
>>> task_func("GET /restricted.txt HTTP/1.1") # Assuming an I/O error occurs
"HTTP/1.1 500 INTERNAL SERVER ERROR\r\n\r\nInternal Server Error"
"""

match = re.match(r"^GET /([\w\.\-]+) HTTP/1\.1$", request)
if match:
file_name = match.group(1)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1042_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def task_func(client_socket):
>>> client_socket, addr = server_socket.accept()
>>> task_func(client_socket)
"""

request = client_socket.recv(BUFFER_SIZE).decode("utf-8")
print(f"Received: {request}")
email = EmailMessage()
Expand Down
1 change: 0 additions & 1 deletion data/processed/1043_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def task_func(data_list):
>>> ax.get_xticks()
array([0., 1., 2., 3., 4., 5., 6.])
"""

if not data_list:
raise ValueError("The data list is empty.")
data_series = pd.Series(data_list)
Expand Down
1 change: 0 additions & 1 deletion data/processed/1044_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def task_func(date_str, booking_data):
3 Room4 Not Listed
4 Room5 Not Listed
"""

try:
date = datetime.strptime(date_str, "%Y-%m-%d")
if date < datetime.now():
Expand Down
Loading

0 comments on commit b1967f6

Please sign in to comment.