File tree Expand file tree Collapse file tree 2 files changed +7
-10
lines changed Expand file tree Collapse file tree 2 files changed +7
-10
lines changed Original file line number Diff line number Diff line change 51
51
"cell_type" : " markdown" ,
52
52
"metadata" : {},
53
53
"source" : [
54
- " # Step 2: Download the dataset"
54
+ " # Step 2: Load the dataset"
55
55
]
56
56
},
57
57
{
60
60
"metadata" : {},
61
61
"outputs" : [],
62
62
"source" : [
63
- " # You may see a warning upon running this cell. You can ignore it.\n " ,
64
- " import pandas as pd\n " ,
65
- " from datasets import load_dataset"
63
+ " import json"
66
64
]
67
65
},
68
66
{
71
69
"metadata" : {},
72
70
"outputs" : [],
73
71
"source" : [
74
- " # Download the `mongodb-docs` dataset from Hugging Face \n " ,
75
- " data = load_dataset( \" mongodb/mongodb-docs \" , split= \" train \" )\n " ,
76
- " # Convert the dataset into a dataframe first, then into a list of Python objects/dictionaries \n " ,
77
- " docs = pd.DataFrame(data).to_dict( \" records \" )"
72
+ " with open( \" ../data/mongodb_docs.json \" , \" r \" ) as data_file: \n " ,
73
+ " json_data = data_file.read( )\n " ,
74
+ " \n " ,
75
+ " docs = json.loads(json_data )"
78
76
]
79
77
},
80
78
{
177
175
" chunked_data = []\n " ,
178
176
" for chunk in chunks:\n " ,
179
177
" temp = doc.copy()\n " ,
180
- " temp[text_field]= chunk\n " ,
178
+ " temp[text_field] = chunk\n " ,
181
179
" chunked_data.append(temp)\n " ,
182
180
" \n " ,
183
181
" return chunked_data"
Original file line number Diff line number Diff line change 1
1
pymongo == 4.11.3
2
- datasets == 3.6.0
3
2
langchain == 0.3.25
4
3
langchain-aws == 0.2.22
5
4
langchain-google-genai == 2.1.4
You can’t perform that action at this time.
0 commit comments