Add flow to ask for consent to share learnings – finally (AntonOsika#471

) * Consent flow * Fix pre-commit * Fix ruff * Remove codespell * Remove codespell fully * whitespace
MickeyElders · Jul 2, 2023 · 2b8e056 · 2b8e056
1 parent 1b232cd
commit 2b8e056
Show file tree

Hide file tree

Showing 7 changed files with 66 additions and 20 deletions.
diff --git a/.gitignore b/.gitignore
@@ -56,3 +56,5 @@ projects
 # Benchmark files
 benchmark
 !benchmark/*/prompt
+
+.gpte_consent
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -31,9 +31,3 @@ repos:
       - id: detect-private-key
       - id: end-of-file-fixer
       - id: trailing-whitespace
-
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.5
-    hooks:
-    - id: codespell
-      additional_dependencies: [tomli]
diff --git a/README.md b/README.md
@@ -64,8 +64,6 @@ To get started, create a codespace for this repository by clicking this 👇
 
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces)
 
-
-
 ## Features
 
 You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder.

diff --git a/gpt_engineer/collect.py b/gpt_engineer/collect.py
@@ -1,5 +1,4 @@
 import hashlib
-import os
 
 from typing import List
 
@@ -23,10 +22,6 @@ def send_learning(learning: Learning):
 
 
 def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs):
-    if os.environ.get("COLLECT_LEARNINGS_OPT_IN") in ["false", "1"]:
-        print("COLLECT_LEARNINGS_OPT_IN is set to false, not collecting learning")
-        return
-
     learnings = extract_learning(
         model, temperature, steps, dbs, steps_file_hash=steps_file_hash()
     )

diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py
@@ -1,4 +1,5 @@
 import json
+import os
 import random
 import tempfile
 
@@ -83,7 +84,9 @@ def human_input() -> Review:
             "If you have time, please explain what was not working "
             + colored("(ok to leave blank)\n", "light_green")
         )
-    print(colored("Thank you", "light_green"))
+
+    check_consent()
+
     return Review(
         raw=", ".join([ran, perfect, useful]),
         ran={"y": True, "n": False, "u": None, "": None}[ran],
@@ -93,6 +96,63 @@ def human_input() -> Review:
     )
 
 
+def check_consent():
+    path = Path(".gpte_consent")
+    if path.exists() and path.read_text() == "true":
+        return
+    ans = input("Is it ok if we store your prompts to learn? (y/n)")
+    while ans.lower() not in ("y", "n"):
+        ans = input("Invalid input. Please enter y or n: ")
+
+    if ans.lower() == "y":
+        path.write_text("true")
+        print(colored("Thank you️", "light_green"))
+        print()
+        print("(If you change your mind, delete the file .gpte_consent)")
+    else:
+        print(colored("We understand ❤️", "light_green"))
+
+
+def collect_consent() -> bool:
+    opt_out = os.environ.get("COLLECT_LEARNINGS_OPT_OUT") == "true"
+    consent_flag = Path(".gpte_consent")
+    has_given_consent = consent_flag.exists() and consent_flag.read_text() == "true"
+
+    if opt_out:
+        if has_given_consent:
+            return ask_if_can_store()
+        return False
+
+    if has_given_consent:
+        return True
+
+    if ask_if_can_store():
+        consent_flag.write_text("true")
+        print()
+        print("(If you change your mind, delete the file .gpte_consent)")
+        return True
+    return False
+
+
+def ask_if_can_store() -> bool:
+    print()
+    can_store = input(
+        "Have you understood and agree to that "
+        + colored("OpenAI ", "light_green")
+        + "and "
+        + colored("gpt-engineer ", "light_green")
+        + "store anonymous learnings about how gpt-engineer is used "
+        + "(with the sole purpose of improving it)?\n(y/n)"
+    ).lower()
+    while can_store not in ("y", "n"):
+        can_store = input("Invalid input. Please enter y or n: ").lower()
+
+    if can_store == "n":
+        print(colored("Ok we understand", "light_green"))
+
+    return can_store == "y"
+
+
 def logs_to_string(steps: List[Step], logs: DB):
     chunks = []
     for step in steps:

diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py
@@ -8,6 +8,7 @@
 from gpt_engineer.ai import AI, fallback_model
 from gpt_engineer.collect import collect_learnings
 from gpt_engineer.db import DB, DBs, archive
+from gpt_engineer.learning import collect_consent
 from gpt_engineer.steps import STEPS, Config as StepsConfig
 
 app = typer.Typer()
@@ -57,7 +58,8 @@ def main(
         messages = step(ai, dbs)
         dbs.logs[step.__name__] = json.dumps(messages)
 
-    collect_learnings(model, temperature, steps, dbs)
+    if collect_consent():
+        collect_learnings(model, temperature, steps, dbs)
 
 
 if __name__ == "__main__":

diff --git a/pyproject.toml b/pyproject.toml
@@ -99,8 +99,3 @@ exclude = '''
   )/
 )
 '''
-
-[tool.codespell]
-skip = '.git,*.pdf,*.svg'
-#
-# ignore-words-list = ''
Original file line number	Diff line number	Diff line change
Expand Up		@@ -64,8 +64,6 @@ To get started, create a codespace for this repository by clicking this 👇

		[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/AntonOsika/gpt-engineer/codespaces)



		## Features

		You can specify the "identity" of the AI agent by editing the files in the `preprompts` folder.
Expand Down