Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow saving of file into directory and clean up the code #9

Merged
merged 2 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llms_txt/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
'git_url': 'https://github.com/AnswerDotAI/llms-txt',
'lib_path': 'llms_txt'},
'syms': { 'llms_txt.core': { 'llms_txt.core._doc': ('core.html#_doc', 'llms_txt/core.py'),
'llms_txt.core._local_docs_pth': ('core.html#_local_docs_pth', 'llms_txt/core.py'),
'llms_txt.core._parse_links': ('core.html#_parse_links', 'llms_txt/core.py'),
'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'),
'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'),
'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'),
'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'),
'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'),
'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'),
'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'),
Expand Down
39 changes: 16 additions & 23 deletions llms_txt/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb.

# %% auto 0
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx',
'get_sizes', 'create_ctx', 'llms_txt2ctx']
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'get_doc_content', 'mk_ctx', 'get_sizes',
'create_ctx', 'llms_txt2ctx']

# %% ../nbs/01_core.ipynb
import re
Expand All @@ -15,6 +15,7 @@
from fastcore.script import *
import httpx
from urllib.parse import urlparse
from nbdev.config import get_config

# %% ../nbs/01_core.ipynb
def opt_re(s):
Expand Down Expand Up @@ -68,28 +69,15 @@ def parse_llms_file(txt):
from fastcore.xml import Sections,Project,Doc

# %% ../nbs/01_core.ipynb
def find_root_dir():
"Find the root directory of the nbdev project by looking for settings.ini"
path = Path.cwd()
while path != path.parent:
if (path / 'settings.ini').exists(): return path
path = path.parent
return None
def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name

# %% ../nbs/01_core.ipynb
def get_doc_content(url):
"Fetch content from local file if in nbdev repo."
root_dir = find_root_dir()
if root_dir:
config = Config(root_dir, 'settings.ini')
doc_host = config.get('doc_host')
if doc_host and url.startswith(doc_host):
parsed_url = urlparse(url)
relative_path = parsed_url.path.lstrip('/')
local_path = root_dir / '_docs' / relative_path
if local_path.exists():
with open(local_path, 'r') as f: return f.read()
# If not a local file or file doesn't exist, fetch from URL
cfg = get_config()
if url.startswith(cfg.doc_host):
relative_path = urlparse(url).path.lstrip('/')
local_path = _local_docs_pth(cfg) / relative_path
if local_path.exists(): return local_path.read_text()
return httpx.get(url).text

# %% ../nbs/01_core.ipynb
Expand Down Expand Up @@ -131,7 +119,12 @@ def create_ctx(txt, optional=False, n_workers=None):
def llms_txt2ctx(
fname:str, # File name to read
optional:bool_arg=False, # Include 'optional' section?
n_workers:int=None # Number of threads to use for parallel downloading
n_workers:int=None, # Number of threads to use for parallel downloading
save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout
):
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."
print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))
ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)
if save_nbdev_fname:
cfg = get_config()
(_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)
else: print(ctx)
65 changes: 30 additions & 35 deletions nbs/01_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
"from fastcore.xml import *\n",
"from fastcore.script import *\n",
"import httpx\n",
"from urllib.parse import urlparse"
"from urllib.parse import urlparse\n",
"from nbdev.config import get_config"
]
},
{
Expand Down Expand Up @@ -185,7 +186,7 @@
{
"data": {
"text/plain": [
"{'title': 'FastHTML quick start'}"
"{'title': 'internal docs - ed'}"
]
},
"execution_count": null,
Expand Down Expand Up @@ -217,8 +218,7 @@
{
"data": {
"text/plain": [
"{'title': 'FastHTML quick start',\n",
" 'url': 'https://docs.fastht.ml/tutorials/quickstart_for_web_devs.html.md'}"
"{'title': 'internal docs - ed', 'url': 'https://llmstxt.org/ed.html'}"
]
},
"execution_count": null,
Expand Down Expand Up @@ -674,42 +674,22 @@
"from fastcore.xml import Sections,Project,Doc"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"def find_root_dir():\n",
" \"Find the root directory of the nbdev project by looking for settings.ini\"\n",
" path = Path.cwd()\n",
" while path != path.parent:\n",
" if (path / 'settings.ini').exists(): return path\n",
" path = path.parent\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#|export\n",
"def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name\n",
"\n",
"def get_doc_content(url):\n",
" \"Fetch content from local file if in nbdev repo.\"\n",
" root_dir = find_root_dir()\n",
" if root_dir:\n",
" config = Config(root_dir, 'settings.ini')\n",
" doc_host = config.get('doc_host')\n",
" if doc_host and url.startswith(doc_host):\n",
" parsed_url = urlparse(url)\n",
" relative_path = parsed_url.path.lstrip('/')\n",
" local_path = root_dir / '_docs' / relative_path\n",
" if local_path.exists():\n",
" with open(local_path, 'r') as f: return f.read()\n",
" # If not a local file or file doesn't exist, fetch from URL\n",
" cfg = get_config()\n",
" if url.startswith(cfg.doc_host):\n",
" relative_path = urlparse(url).path.lstrip('/')\n",
" local_path = _local_docs_pth(cfg) / relative_path\n",
" if local_path.exists(): return local_path.read_text()\n",
" return httpx.get(url).text"
]
},
Expand Down Expand Up @@ -797,7 +777,8 @@
{
"data": {
"text/plain": [
"{'docs': {'FastHTML quick start': 27376,\n",
"{'docs': {'internal docs - ed': 34464,\n",
" 'FastHTML quick start': 27376,\n",
" 'HTMX reference': 26427,\n",
" 'Starlette quick guide': 7936},\n",
" 'examples': {'Todo list application': 18558},\n",
Expand All @@ -821,7 +802,7 @@
{
"data": {
"text/plain": [
"129814"
"164321"
]
},
"execution_count": null,
Expand Down Expand Up @@ -858,10 +839,24 @@
"def llms_txt2ctx(\n",
" fname:str, # File name to read\n",
" optional:bool_arg=False, # Include 'optional' section?\n",
" n_workers:int=None # Number of threads to use for parallel downloading\n",
" n_workers:int=None, # Number of threads to use for parallel downloading\n",
" save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout\n",
"):\n",
" \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
" print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))"
" ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)\n",
" if save_nbdev_fname:\n",
" cfg = get_config()\n",
" (_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)\n",
" else: print(ctx)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Path('/Users/hamel/github/fastcore/_docs/llms-ctx-full.txt').mk_write('hello')"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python LLMs NLP
language = English
status = 3
user = AnswerDotAI
requirements = fastcore>=1.7.3 httpx
requirements = fastcore>=1.7.3 httpx nbdev
conda_user = fastai
console_scripts = llms_txt2ctx=llms_txt.core:llms_txt2ctx
readme_nb = index.ipynb
Expand Down