-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Detailed writing instructor (more criteria, longer responses, etc.).
- Loading branch information
Showing
14 changed files
with
295 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
import asyncio | ||
import glob | ||
import json | ||
import os | ||
import random | ||
from loguru import logger | ||
|
||
|
||
async def generate(instructor): | ||
"""Generator for detailed writing training data.""" | ||
config = instructor.instructors.get("detailed_writing", {}) | ||
if not config: | ||
return | ||
target_count = config.get("count") | ||
if target_count is None: | ||
target_count = instructor.default_count | ||
target_count = int(target_count) | ||
if not target_count: | ||
return | ||
|
||
# Load the seeds tasks. | ||
seed_path = config.get("seed_path", "detailed_writing_seeds") | ||
if not os.path.isdir(seed_path): | ||
seed_path = os.path.join( | ||
os.path.dirname(os.path.abspath(__file__)), "prompts", seed_path | ||
) | ||
if not os.path.isdir(seed_path): | ||
raise Exception("No seeds!") | ||
seeds = [] | ||
for path in glob.glob(os.path.join(seed_path, "*.txt")): | ||
with open(str(path)) as infile: | ||
seeds.append(infile.read()) | ||
seed_index = 0 | ||
|
||
# Load the prompt template. | ||
path = config.get("prompt_path", "detailed_writing.txt") | ||
if not os.path.exists(path): | ||
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompts", path) | ||
with open(path) as infile: | ||
template = infile.read() | ||
|
||
# Load the response generating prompt template. | ||
path = config.get("response_prompt_path", "detailed_writing_response.txt") | ||
if not os.path.exists(path): | ||
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompts", path) | ||
with open(path) as infile: | ||
response_template = infile.read() | ||
|
||
# Load the topics. | ||
topics = instructor.get_instructor_topics(config) | ||
random.shuffle(topics) | ||
topic_index = 0 | ||
|
||
# API params, overriding defaults with this instructor's config. | ||
api_params = {**instructor.api_params, **config.get("api_params", {})} | ||
|
||
# Min similarity score. | ||
min_score = config.get("min_docsearch_score") | ||
if min_score is None: | ||
min_score = instructor.min_docsearch_score | ||
min_score = float(min_score) | ||
|
||
# Generate the instruction/response pairs until we reach the target count. | ||
if "detailed_writing" not in instructor.instructor_counts: | ||
instructor.instructor_counts["detailed_writing"] = 0 | ||
batch_size = config.get("batch_size") | ||
if batch_size is None: | ||
batch_size = instructor.default_batch_size | ||
batch_size = int(batch_size) | ||
futures = [] | ||
language = config.get("language") or instructor.language | ||
flesch = config.get("flesch") or instructor.default_flesch | ||
while instructor.instructor_counts["detailed_writing"] < target_count: | ||
# Generate the prompts. | ||
topic = topics[topic_index] | ||
topic_index += 1 | ||
if topic_index >= len(topics): | ||
topic_index = 0 | ||
prompt = template.format( | ||
example=seeds[seed_index], | ||
flesch=flesch, | ||
language=language, | ||
topic=json.dumps(topic), | ||
topic_avoidance=instructor.topic_avoidance, | ||
) | ||
seed_index += 1 | ||
if seed_index >= len(seeds): | ||
seed_index = 0 | ||
futures.append(instructor.generate_response(prompt, **api_params)) | ||
if len(futures) < batch_size: | ||
continue | ||
|
||
instructions = [] | ||
for instruction in await asyncio.gather(*futures): | ||
if not instruction or not instruction.strip(): | ||
continue | ||
if await instructor.is_too_similar(instruction, min_score=min_score): | ||
logger.warning("Skipping detailed writing prompt, too similar.") | ||
continue | ||
instructions.append(instruction) | ||
if not instructions: | ||
futures = [] | ||
continue | ||
|
||
# Generate the responses. | ||
futures = [ | ||
instructor.generate_response( | ||
response_template.format( | ||
instruction=instruction, flesch=flesch, language=language | ||
), | ||
**api_params, | ||
) | ||
for instruction in instructions | ||
] | ||
responses = await asyncio.gather(*futures) | ||
|
||
# Lengthen the responses. | ||
complete_instructions = [] | ||
futures = [] | ||
for idx in range(len(responses)): | ||
response = responses[idx] | ||
if not response or not response.strip(): | ||
continue | ||
complete_instructions.append(instructions[idx]) | ||
futures.append( | ||
instructor.generate_response( | ||
f"Below is some text that seems very low effort and short/boring. Please rewrite it so as to double the length, add significantly more detail, and make it more interesting and colorful.\n\n{response}", | ||
**api_params, | ||
) | ||
) | ||
if not futures: | ||
continue | ||
responses = await asyncio.gather(*futures) | ||
for idx in range(len(futures)): | ||
response = responses[idx] | ||
if not response or not response.strip(): | ||
continue | ||
yield { | ||
"instruction": complete_instructions[idx].strip(), | ||
"response": response.strip(), | ||
"category": "detailed_writing", | ||
} | ||
if instructor.instructor_counts["detailed_writing"] >= target_count: | ||
break | ||
futures = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
I would like you to help me create another example of an detailed writing task. Don't actually respond to the example task, just create a new task. | ||
This comment has been minimized.
Sorry, something went wrong. |
||
|
||
The example prompt is between "BEGINEXAMPLE" and "ENDEXAMPLE". Any details following "ENDEXAMPLE" are requirements for your output, and must not be included or referenced in any way in the new tasks. | ||
|
||
{example} | ||
|
||
{flesch} | ||
|
||
All output text should be in {language}. | ||
|
||
{topic_avoidance} | ||
|
||
The new task should include some elements related to {topic}, but it need not be the main theme, it can be an extraneous detail. If the example task is fictional, the new task must also be fictional. | ||
|
||
Don't start with "Certainly!" or any intro sentence, just produce the desired output. | ||
|
||
Don't reference these requirements, just produce the desired output, i.e. there should be no reference to the requirements at all. |
13 changes: 13 additions & 0 deletions
13
airoboros/instructors/prompts/detailed_writing_response.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{instruction} | ||
|
||
The response should be in {language}. | ||
|
||
{flesch} | ||
|
||
Be extremely loquacious, use colorful immersive language, and be sure to follow the requirements closely. | ||
|
||
Don't start with "Certainly!" or any intro sentence, just produce the desired output. | ||
|
||
Don't reference these requirements, just produce the desired output, i.e. there should be no reference to the requirements at all. | ||
|
||
Before you generate a response, think silenty to yourself about how you would lengthen the response by including more detail and interesting information, to make sure your response is as comprehensive as possible. Include between 150 and 300 sentences in your response, which would equate to between 3000 and 6000 words. |
44 changes: 44 additions & 0 deletions
44
airoboros/instructors/prompts/detailed_writing_seeds/0.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
BEGINEXAMPLE | ||
Write the opening chapter of a novel with the following requirements: | ||
|
||
Introduction to the Main Protagonist, Dr. Elena Vance: | ||
- Background: Detail Dr. Vance's education in both alchemical and mechanical engineering, her occupation as a researcher, her estranged family, her deep sense of curiosity, and her physical attributes. | ||
- Initial Conflict: Introduce her challenge with an unsolved scientific mystery that ties to an ancient prophecy. | ||
|
||
Setting Development in the City of Aeloria: | ||
- Primary Location: Elaborate on Aeloria, a city where skyscrapers are intertwined with giant mystical trees, and streets filled with autonomous carriages and floating lanterns. | ||
- Atmosphere: Include descriptions of weather, sounds, scents, and cultural nuances to make the city come alive. | ||
- Political Structure: Briefly mention the ruling council that blends technocratic governance with a secret mystical order. | ||
|
||
Secondary Characters: Commander James Thorn and Ava, the Mystical AI: | ||
- Thorn's Background: Describe his military background, his stoic demeanor, his skepticism of magic, and his hidden admiration for Dr. Vance. | ||
- Ava's Nature: Introduce Ava, an AI with an inexplicable connection to ancient wisdom; describe her appearance and her cryptic manner of speaking. | ||
- Relationships: Develop the dynamics between Dr. Vance, Commander Thorn, and Ava, highlighting conflicts, alliances, and underlying tensions. | ||
|
||
Plot Initiation through the Discovery of the Orb: | ||
- Incident: Describe Dr. Vance's discovery of a mysterious orb that reacts to both technology and magic. | ||
- Reaction: Detail her fascination and determination to understand the orb, and Thorn's reluctance and skepticism. | ||
|
||
Thematic Undertones on Humanity and Duality: | ||
- Theme Introduction: Introduce the themes of humanity's relationship with technology and magic, and the duality of logic and faith. | ||
- Symbolism: Utilize the orb, the city, and specific character traits as symbols reflecting the deeper thematic layers. | ||
|
||
Narrative Style through Third-Person Limited Perspective: | ||
- Perspective: Narrate from Dr. Vance's perspective to allow readers insight into her thoughts and emotions. | ||
- Literary Devices: Include well-crafted metaphors comparing technology and magic, foreshadowing through ancient texts, and similes that evoke the atmosphere. | ||
|
||
Integration of Mystical & Technological Elements: | ||
- Magic: Explain how magic is harnessed through ancient runes and controlled by a mystical guild. | ||
- Technology: Describe autonomous machines, airships, and an internet connected through magical ley lines. | ||
- Constraints: Outline the taboo of blending magic and technology, a line Dr. Vance is willing to cross. | ||
|
||
Ending the Chapter with Intrigue: | ||
- Cliffhanger: End with Ava uttering a prophecy connected to the orb, leaving both Dr. Vance and the reader puzzled. | ||
- Reflection: Provide Dr. Vance's inner reflections, her excitement, fear, and a determination that hints at her forthcoming journey. | ||
|
||
Be sure to occasionally include quotes from the characters. | ||
ENDEXAMPLE | ||
|
||
The new task must have completely different requirements, topics, etc. | ||
|
||
This example has 8 criteria sections (intro, setting development, secondary characters, plit initiation, thematic undertones, narrative style, integration of ..., ending the chapter); the new task must have new, unique sections with different headings. |
22 changes: 22 additions & 0 deletions
22
airoboros/instructors/prompts/detailed_writing_seeds/1.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
BEGINEXAMPLE | ||
Write the next section of a post-apocalyptic tale. | ||
|
||
Here's a summary of the previous chapter: | ||
In a desolate world ravaged by an unprecedented catastrophe, Kael, a young scavenger, has stumbled upon a seemingly abandoned facility. He encounters Seraphina, a mysterious woman with knowledge of the Old World. Together they discover hidden secrets, indicating that the world may still harbor pockets of civilization. Their relationship is marked by trust and suspicion, and as the chapter ends, they resolve to venture further into the unknown to find others like themselves. | ||
|
||
Requirements for the next section: | ||
1. Character Development of Kael and Seraphina: | ||
- Kael's Growth: Reveal more about Kael's life before the apocalypse, his fears, dreams, and determination to find his family. Show his developing skills, curiosity, and growing reliance on Seraphina. | ||
- Seraphina's Enigma: Introduce the ambiguity surrounding Seraphina's origins and true intentions. Build tension by hinting at secrets she's hiding, while also portraying her empathy and wisdom. | ||
|
||
2. Exploration of the Wasteland and the Unknown Facility: | ||
- The Wasteland: Describe the perilous journey across the wasteland, including encounters with mutated creatures, extreme weather, and remnants of the Old World. | ||
- The Unknown Facility: Develop the hidden facility they discover, filled with technological marvels, forgotten experiments, and cryptic messages hinting at the survival of humanity. | ||
|
||
3. Emergence of the Subplot: The Cryo Chamber: | ||
- Discovery: Have Kael and Seraphina discover a concealed cryogenic chamber with living humans in suspended animation, triggering new questions about the past and the existence of other survivors. | ||
- Decision: Capture their moral dilemma on whether to wake the sleepers, the potential dangers, and the hopes it brings for rebuilding civilization. | ||
- Clue: End with a clue that leads them to believe there are other facilities, setting the stage for their new quest. | ||
ENDEXAMPLE | ||
|
||
This example has 3 criteria sections (character development, exploration of..., emergence of subplot); the new task must have new, unique sections with different headings with interesting requirements. |
22 changes: 22 additions & 0 deletions
22
airoboros/instructors/prompts/detailed_writing_seeds/2.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
BEGINEXAMPLE | ||
Compose a Research Paper on the Ecological Impacts of Llama Grazing with the following criteria. | ||
|
||
Introduction to Llama Grazing Practices and Their Importance: | ||
- Background: Provide a historical overview of llama grazing in South America, highlighting its cultural and economic significance. Detail the regions where llamas are primarily grazed. | ||
- Relevance: Discuss the importance of llamas in agricultural systems and briefly mention the existing concerns regarding their ecological impacts. | ||
- Thesis Statement: Clearly state the purpose of the research paper, the hypothesis, and the specific ecological aspects that will be investigated. | ||
|
||
Analysis of Ecological Effects of Llama Grazing: | ||
- Soil Quality and Erosion: Analyze how llama grazing affects soil quality, including changes in nutrient content, compaction, and erosion rates. Utilize scientific studies and field observations. | ||
- Vegetation and Biodiversity: Investigate the impacts of llama grazing on native plant species and overall biodiversity. Include the potential for invasive species propagation. | ||
- Water Resources: Evaluate the relationship between llama grazing and local water quality and availability, including effects on streams, ponds, and groundwater. | ||
|
||
Recommendations and Conclusions: | ||
- Best Practices: Propose evidence-based best practices for sustainable llama grazing that balances ecological health with economic needs. | ||
- Policy Implications: Discuss potential policy interventions or regulations to guide sustainable llama grazing. Include an assessment of the roles of local communities and governments. | ||
- Concluding Remarks: Sum up the main findings of the paper and their implications for future research and practice in sustainable agriculture. | ||
ENDEXAMPLE | ||
|
||
The new task must not ask for any output that a language model could not produce, e.g. images, charts, etc. | ||
|
||
This example has 3 criteria sections: introduction, analysis, recommendations and conclusions. The new task must have different, unique criteria sections from the example, with a different output format, i.e. you need not use bullet points for the specifics, it could be a numbered list or plain text paragraph, but be internally consistent with the style you choose. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
BEGINEXAMPLE | ||
I'm in the mood for a fun story, and I've got a few things in mind that I'd like you to include. Can you write me a tale set in a small coastal village? I'd love it if there was a talking cat who's wise beyond its years, and maybe it's guiding the main character to find an ancient key. The key could have some sort of mystical power or history. All of the characters should be female. Feel free to take it in any direction you want, but make sure to include those elements. | ||
ENDEXAMPLE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
"an detailed" typo?
Also two spaces between sentences