Skip to content

Commit 54c69a2

Browse files
committed
chore: pandas package is now optional
1 parent f6009d1 commit 54c69a2

10 files changed

+48
-41
lines changed

examples/anthropic/csv_scraper_anthropic.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
"""
44
import os
55
from dotenv import load_dotenv
6-
import pandas as pd
76
from scrapegraphai.graphs import CSVScraperGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
7+
from scrapegraphai.utils import prettify_exec_info
98

109
load_dotenv()
1110

@@ -17,7 +16,8 @@
1716
curr_dir = os.path.dirname(os.path.realpath(__file__))
1817
file_path = os.path.join(curr_dir, FILE_NAME)
1918

20-
text = pd.read_csv(file_path)
19+
with open(file_path, 'r') as file:
20+
text = file.read()
2121

2222
# ************************************************
2323
# Define the configuration for the graph
@@ -41,7 +41,7 @@
4141

4242
csv_scraper_graph = CSVScraperGraph(
4343
prompt="List me all the last names",
44-
source=str(text), # Pass the content of the file, not the file object
44+
source=text, # Pass the content of the file
4545
config=graph_config
4646
)
4747

@@ -53,8 +53,4 @@
5353
# ************************************************
5454

5555
graph_exec_info = csv_scraper_graph.get_execution_info()
56-
print(prettify_exec_info(graph_exec_info))
57-
58-
# Save to json or csv
59-
convert_to_csv(result, "result")
60-
convert_to_json(result, "result")
56+
print(prettify_exec_info(graph_exec_info))

examples/anthropic/csv_scraper_graph_multi_anthropic.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
"""
44
import os
55
from dotenv import load_dotenv
6-
import pandas as pd
76
from scrapegraphai.graphs import CSVScraperMultiGraph
8-
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
7+
from scrapegraphai.utils import prettify_exec_info
98

109
load_dotenv()
1110
# ************************************************
@@ -16,7 +15,8 @@
1615
curr_dir = os.path.dirname(os.path.realpath(__file__))
1716
file_path = os.path.join(curr_dir, FILE_NAME)
1817

19-
text = pd.read_csv(file_path)
18+
with open(file_path, 'r') as file:
19+
text = file.read()
2020

2121
# ************************************************
2222
# Define the configuration for the graph
@@ -48,7 +48,3 @@
4848

4949
graph_exec_info = csv_scraper_graph.get_execution_info()
5050
print(prettify_exec_info(graph_exec_info))
51-
52-
# Save to json or csv
53-
convert_to_csv(result, "result")
54-
convert_to_json(result, "result")

examples/openai/smart_scraper_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# ************************************************
2929

3030
smart_scraper_graph = SmartScraperGraph(
31-
prompt="Extract me all the articles",
31+
prompt="Extract me the first article",
3232
source="https://www.wired.com",
3333
config=graph_config
3434
)

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ dependencies = [
1919
"mistral-common>=1.4.0",
2020
"html2text>=2024.2.26",
2121
"beautifulsoup4>=4.12.3",
22-
"pandas>=2.2.2",
2322
"python-dotenv>=1.0.1",
2423
"tiktoken>=0.7",
2524
"tqdm>=4.66.4",
@@ -28,9 +27,10 @@ dependencies = [
2827
"playwright>=1.43.0",
2928
"undetected-playwright>=0.3.0",
3029
"langchain-ollama>=0.1.3",
30+
"semchunk>=2.2.0",
3131
"qdrant-client>=1.11.3",
3232
"fastembed>=0.3.6",
33-
"semchunk>=2.2.0",
33+
3434
"transformers>=4.44.2",
3535
"googlesearch-python>=1.2.5",
3636
"async-timeout>=4.0.3",

scrapegraphai/nodes/fetch_node.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import json
55
from typing import List, Optional
66
from langchain_openai import ChatOpenAI, AzureChatOpenAI
7-
import pandas as pd
87
import requests
98
from langchain_community.document_loaders import PyPDFLoader
109
from langchain_core.documents import Document
@@ -199,6 +198,10 @@ def load_file_content(self, source, input_type):
199198
loader = PyPDFLoader(source)
200199
return loader.load()
201200
elif input_type == "csv":
201+
try:
202+
import pandas as pd
203+
except ImportError:
204+
raise ImportError("pandas is not installed. Please install it using `pip install pandas`.")
202205
return [
203206
Document(
204207
page_content=str(pd.read_csv(source)), metadata={"source": "csv"}
Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,45 @@
11
"""
22
Prettify the execution information of the graph.
33
"""
4-
import pandas as pd
4+
from typing import Union
55

6-
def prettify_exec_info(complete_result: list[dict]) -> pd.DataFrame:
6+
def prettify_exec_info(complete_result: list[dict], as_string: bool = True) -> Union[str, list[dict]]:
77
"""
8-
Transforms the execution information of a graph into a DataFrame for enhanced visualization.
8+
Formats the execution information of a graph showing node statistics.
99
1010
Args:
11-
complete_result (list[dict]): The complete execution information of the graph.
11+
complete_result (list[dict]): The execution information containing node statistics.
12+
as_string (bool, optional): If True, returns a formatted string table.
13+
If False, returns the original list. Defaults to True.
1214
1315
Returns:
14-
pd.DataFrame: A DataFrame that organizes the execution information
15-
for better readability and analysis.
16-
17-
Example:
18-
>>> prettify_exec_info([{'node': 'A', 'status': 'success'},
19-
{'node': 'B', 'status': 'failure'}])
20-
DataFrame with columns 'node' and 'status' showing execution results for each node.
16+
Union[str, list[dict]]: A formatted string table if as_string=True,
17+
otherwise the original list of dictionaries.
2118
"""
19+
if not as_string:
20+
return complete_result
21+
22+
if not complete_result:
23+
return "Empty result"
24+
25+
# Format the table
26+
lines = []
27+
lines.append("Node Statistics:")
28+
lines.append("-" * 100)
29+
lines.append(f"{'Node':<20} {'Tokens':<10} {'Prompt':<10} {'Compl.':<10} {'Requests':<10} {'Cost ($)':<10} {'Time (s)':<10}")
30+
lines.append("-" * 100)
31+
32+
for item in complete_result:
33+
node = item['node_name']
34+
tokens = item['total_tokens']
35+
prompt = item['prompt_tokens']
36+
completion = item['completion_tokens']
37+
requests = item['successful_requests']
38+
cost = f"{item['total_cost_USD']:.4f}"
39+
time = f"{item['exec_time']:.2f}"
2240

23-
df_nodes = pd.DataFrame(complete_result)
41+
lines.append(
42+
f"{node:<20} {tokens:<10} {prompt:<10} {completion:<10} {requests:<10} {cost:<10} {time:<10}"
43+
)
2444

25-
return df_nodes
45+
return "\n".join(lines)

tests/graphs/scrape_graph_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
import os
66
import pytest
7-
import pandas as pd
87
from dotenv import load_dotenv
98
from scrapegraphai.graphs import ScrapeGraph
10-
from scrapegraphai.utils import prettify_exec_info
119

1210
load_dotenv()
1311

tests/graphs/smart_scraper_fireworks_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
import os
66
import pytest
7-
import pandas as pd
87
from dotenv import load_dotenv
98
from scrapegraphai.graphs import SmartScraperGraph
10-
from scrapegraphai.utils import prettify_exec_info
119

1210
load_dotenv()
1311

tests/graphs/smart_scraper_multi_lite_graph_openai_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
import os
66
import pytest
7-
import pandas as pd
87
from dotenv import load_dotenv
98
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
10-
from scrapegraphai.utils import prettify_exec_info
119

1210
load_dotenv()
1311

tests/graphs/smart_scraper_openai_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
import os
66
import pytest
7-
import pandas as pd
87
from dotenv import load_dotenv
98
from scrapegraphai.graphs import SmartScraperGraph
10-
from scrapegraphai.utils import prettify_exec_info
119

1210
load_dotenv()
1311

0 commit comments

Comments
 (0)