Skip to content

Commit

Permalink
Detailed writing instructor (more criteria, longer responses, etc.).
Browse files Browse the repository at this point in the history
  • Loading branch information
j-durbin committed Aug 5, 2023
1 parent 2c6a221 commit c6aa785
Show file tree
Hide file tree
Showing 14 changed files with 295 additions and 14 deletions.
5 changes: 4 additions & 1 deletion airoboros/instructors/contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ async def generate(instructor):
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
futures = []
language = config.get("language") or instructor.language
flesch = config.get("flesch") or instructor.default_flesch
while instructor.instructor_counts["contextual"] < target_count:
prompt = generate_prompt(instructor, config, template, topic_iter)
Expand All @@ -212,7 +213,9 @@ async def generate(instructor):
# Generate the responses.
futures = [
instructor.generate_response(
response_template.format(instruction=instruction, flesch=flesch),
response_template.format(
instruction=instruction, flesch=flesch, language=language
),
**api_params,
)
for instruction in instructions
Expand Down
145 changes: 145 additions & 0 deletions airoboros/instructors/detailed_writing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import asyncio
import glob
import json
import os
import random
from loguru import logger


async def generate(instructor):
"""Generator for detailed writing training data."""
config = instructor.instructors.get("detailed_writing", {})
if not config:
return
target_count = config.get("count")
if target_count is None:
target_count = instructor.default_count
target_count = int(target_count)
if not target_count:
return

# Load the seeds tasks.
seed_path = config.get("seed_path", "detailed_writing_seeds")
if not os.path.isdir(seed_path):
seed_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "prompts", seed_path
)
if not os.path.isdir(seed_path):
raise Exception("No seeds!")
seeds = []
for path in glob.glob(os.path.join(seed_path, "*.txt")):
with open(str(path)) as infile:
seeds.append(infile.read())
seed_index = 0

# Load the prompt template.
path = config.get("prompt_path", "detailed_writing.txt")
if not os.path.exists(path):
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompts", path)
with open(path) as infile:
template = infile.read()

# Load the response generating prompt template.
path = config.get("response_prompt_path", "detailed_writing_response.txt")
if not os.path.exists(path):
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompts", path)
with open(path) as infile:
response_template = infile.read()

# Load the topics.
topics = instructor.get_instructor_topics(config)
random.shuffle(topics)
topic_index = 0

# API params, overriding defaults with this instructor's config.
api_params = {**instructor.api_params, **config.get("api_params", {})}

# Min similarity score.
min_score = config.get("min_docsearch_score")
if min_score is None:
min_score = instructor.min_docsearch_score
min_score = float(min_score)

# Generate the instruction/response pairs until we reach the target count.
if "detailed_writing" not in instructor.instructor_counts:
instructor.instructor_counts["detailed_writing"] = 0
batch_size = config.get("batch_size")
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
futures = []
language = config.get("language") or instructor.language
flesch = config.get("flesch") or instructor.default_flesch
while instructor.instructor_counts["detailed_writing"] < target_count:
# Generate the prompts.
topic = topics[topic_index]
topic_index += 1
if topic_index >= len(topics):
topic_index = 0
prompt = template.format(
example=seeds[seed_index],
flesch=flesch,
language=language,
topic=json.dumps(topic),
topic_avoidance=instructor.topic_avoidance,
)
seed_index += 1
if seed_index >= len(seeds):
seed_index = 0
futures.append(instructor.generate_response(prompt, **api_params))
if len(futures) < batch_size:
continue

instructions = []
for instruction in await asyncio.gather(*futures):
if not instruction or not instruction.strip():
continue
if await instructor.is_too_similar(instruction, min_score=min_score):
logger.warning("Skipping detailed writing prompt, too similar.")
continue
instructions.append(instruction)
if not instructions:
futures = []
continue

# Generate the responses.
futures = [
instructor.generate_response(
response_template.format(
instruction=instruction, flesch=flesch, language=language
),
**api_params,
)
for instruction in instructions
]
responses = await asyncio.gather(*futures)

# Lengthen the responses.
complete_instructions = []
futures = []
for idx in range(len(responses)):
response = responses[idx]
if not response or not response.strip():
continue
complete_instructions.append(instructions[idx])
futures.append(
instructor.generate_response(
f"Below is some text that seems very low effort and short/boring. Please rewrite it so as to double the length, add significantly more detail, and make it more interesting and colorful.\n\n{response}",
**api_params,
)
)
if not futures:
continue
responses = await asyncio.gather(*futures)
for idx in range(len(futures)):
response = responses[idx]
if not response or not response.strip():
continue
yield {
"instruction": complete_instructions[idx].strip(),
"response": response.strip(),
"category": "detailed_writing",
}
if instructor.instructor_counts["detailed_writing"] >= target_count:
break
futures = []
2 changes: 2 additions & 0 deletions airoboros/instructors/prompts/contextual_response.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ If there are multiple context blocks from which the references are extracted, be

If the tasks cannot be answered using only the information provided in the input, do not make up a response.

All output should be in {language}.

{instruction}
17 changes: 17 additions & 0 deletions airoboros/instructors/prompts/detailed_writing.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
I would like you to help me create another example of an detailed writing task. Don't actually respond to the example task, just create a new task.

This comment has been minimized.

Copy link
@IgnacioFDM

IgnacioFDM Aug 6, 2023

"an detailed" typo?

Also two spaces between sentences


The example prompt is between "BEGINEXAMPLE" and "ENDEXAMPLE". Any details following "ENDEXAMPLE" are requirements for your output, and must not be included or referenced in any way in the new tasks.

{example}

{flesch}

All output text should be in {language}.

{topic_avoidance}

The new task should include some elements related to {topic}, but it need not be the main theme, it can be an extraneous detail. If the example task is fictional, the new task must also be fictional.

Don't start with "Certainly!" or any intro sentence, just produce the desired output.

Don't reference these requirements, just produce the desired output, i.e. there should be no reference to the requirements at all.
13 changes: 13 additions & 0 deletions airoboros/instructors/prompts/detailed_writing_response.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{instruction}

The response should be in {language}.

{flesch}

Be extremely loquacious, use colorful immersive language, and be sure to follow the requirements closely.

Don't start with "Certainly!" or any intro sentence, just produce the desired output.

Don't reference these requirements, just produce the desired output, i.e. there should be no reference to the requirements at all.

Before you generate a response, think silenty to yourself about how you would lengthen the response by including more detail and interesting information, to make sure your response is as comprehensive as possible. Include between 150 and 300 sentences in your response, which would equate to between 3000 and 6000 words.
44 changes: 44 additions & 0 deletions airoboros/instructors/prompts/detailed_writing_seeds/0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
BEGINEXAMPLE
Write the opening chapter of a novel with the following requirements:

Introduction to the Main Protagonist, Dr. Elena Vance:
- Background: Detail Dr. Vance's education in both alchemical and mechanical engineering, her occupation as a researcher, her estranged family, her deep sense of curiosity, and her physical attributes.
- Initial Conflict: Introduce her challenge with an unsolved scientific mystery that ties to an ancient prophecy.

Setting Development in the City of Aeloria:
- Primary Location: Elaborate on Aeloria, a city where skyscrapers are intertwined with giant mystical trees, and streets filled with autonomous carriages and floating lanterns.
- Atmosphere: Include descriptions of weather, sounds, scents, and cultural nuances to make the city come alive.
- Political Structure: Briefly mention the ruling council that blends technocratic governance with a secret mystical order.

Secondary Characters: Commander James Thorn and Ava, the Mystical AI:
- Thorn's Background: Describe his military background, his stoic demeanor, his skepticism of magic, and his hidden admiration for Dr. Vance.
- Ava's Nature: Introduce Ava, an AI with an inexplicable connection to ancient wisdom; describe her appearance and her cryptic manner of speaking.
- Relationships: Develop the dynamics between Dr. Vance, Commander Thorn, and Ava, highlighting conflicts, alliances, and underlying tensions.

Plot Initiation through the Discovery of the Orb:
- Incident: Describe Dr. Vance's discovery of a mysterious orb that reacts to both technology and magic.
- Reaction: Detail her fascination and determination to understand the orb, and Thorn's reluctance and skepticism.

Thematic Undertones on Humanity and Duality:
- Theme Introduction: Introduce the themes of humanity's relationship with technology and magic, and the duality of logic and faith.
- Symbolism: Utilize the orb, the city, and specific character traits as symbols reflecting the deeper thematic layers.

Narrative Style through Third-Person Limited Perspective:
- Perspective: Narrate from Dr. Vance's perspective to allow readers insight into her thoughts and emotions.
- Literary Devices: Include well-crafted metaphors comparing technology and magic, foreshadowing through ancient texts, and similes that evoke the atmosphere.

Integration of Mystical & Technological Elements:
- Magic: Explain how magic is harnessed through ancient runes and controlled by a mystical guild.
- Technology: Describe autonomous machines, airships, and an internet connected through magical ley lines.
- Constraints: Outline the taboo of blending magic and technology, a line Dr. Vance is willing to cross.

Ending the Chapter with Intrigue:
- Cliffhanger: End with Ava uttering a prophecy connected to the orb, leaving both Dr. Vance and the reader puzzled.
- Reflection: Provide Dr. Vance's inner reflections, her excitement, fear, and a determination that hints at her forthcoming journey.

Be sure to occasionally include quotes from the characters.
ENDEXAMPLE

The new task must have completely different requirements, topics, etc.

This example has 8 criteria sections (intro, setting development, secondary characters, plit initiation, thematic undertones, narrative style, integration of ..., ending the chapter); the new task must have new, unique sections with different headings.
22 changes: 22 additions & 0 deletions airoboros/instructors/prompts/detailed_writing_seeds/1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BEGINEXAMPLE
Write the next section of a post-apocalyptic tale.

Here's a summary of the previous chapter:
In a desolate world ravaged by an unprecedented catastrophe, Kael, a young scavenger, has stumbled upon a seemingly abandoned facility. He encounters Seraphina, a mysterious woman with knowledge of the Old World. Together they discover hidden secrets, indicating that the world may still harbor pockets of civilization. Their relationship is marked by trust and suspicion, and as the chapter ends, they resolve to venture further into the unknown to find others like themselves.

Requirements for the next section:
1. Character Development of Kael and Seraphina:
- Kael's Growth: Reveal more about Kael's life before the apocalypse, his fears, dreams, and determination to find his family. Show his developing skills, curiosity, and growing reliance on Seraphina.
- Seraphina's Enigma: Introduce the ambiguity surrounding Seraphina's origins and true intentions. Build tension by hinting at secrets she's hiding, while also portraying her empathy and wisdom.

2. Exploration of the Wasteland and the Unknown Facility:
- The Wasteland: Describe the perilous journey across the wasteland, including encounters with mutated creatures, extreme weather, and remnants of the Old World.
- The Unknown Facility: Develop the hidden facility they discover, filled with technological marvels, forgotten experiments, and cryptic messages hinting at the survival of humanity.

3. Emergence of the Subplot: The Cryo Chamber:
- Discovery: Have Kael and Seraphina discover a concealed cryogenic chamber with living humans in suspended animation, triggering new questions about the past and the existence of other survivors.
- Decision: Capture their moral dilemma on whether to wake the sleepers, the potential dangers, and the hopes it brings for rebuilding civilization.
- Clue: End with a clue that leads them to believe there are other facilities, setting the stage for their new quest.
ENDEXAMPLE

This example has 3 criteria sections (character development, exploration of..., emergence of subplot); the new task must have new, unique sections with different headings with interesting requirements.
22 changes: 22 additions & 0 deletions airoboros/instructors/prompts/detailed_writing_seeds/2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BEGINEXAMPLE
Compose a Research Paper on the Ecological Impacts of Llama Grazing with the following criteria.

Introduction to Llama Grazing Practices and Their Importance:
- Background: Provide a historical overview of llama grazing in South America, highlighting its cultural and economic significance. Detail the regions where llamas are primarily grazed.
- Relevance: Discuss the importance of llamas in agricultural systems and briefly mention the existing concerns regarding their ecological impacts.
- Thesis Statement: Clearly state the purpose of the research paper, the hypothesis, and the specific ecological aspects that will be investigated.

Analysis of Ecological Effects of Llama Grazing:
- Soil Quality and Erosion: Analyze how llama grazing affects soil quality, including changes in nutrient content, compaction, and erosion rates. Utilize scientific studies and field observations.
- Vegetation and Biodiversity: Investigate the impacts of llama grazing on native plant species and overall biodiversity. Include the potential for invasive species propagation.
- Water Resources: Evaluate the relationship between llama grazing and local water quality and availability, including effects on streams, ponds, and groundwater.

Recommendations and Conclusions:
- Best Practices: Propose evidence-based best practices for sustainable llama grazing that balances ecological health with economic needs.
- Policy Implications: Discuss potential policy interventions or regulations to guide sustainable llama grazing. Include an assessment of the roles of local communities and governments.
- Concluding Remarks: Sum up the main findings of the paper and their implications for future research and practice in sustainable agriculture.
ENDEXAMPLE

The new task must not ask for any output that a language model could not produce, e.g. images, charts, etc.

This example has 3 criteria sections: introduction, analysis, recommendations and conclusions. The new task must have different, unique criteria sections from the example, with a different output format, i.e. you need not use bullet points for the specifics, it could be a numbered list or plain text paragraph, but be internally consistent with the style you choose.
3 changes: 3 additions & 0 deletions airoboros/instructors/prompts/detailed_writing_seeds/3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
BEGINEXAMPLE
I'm in the mood for a fun story, and I've got a few things in mind that I'd like you to include. Can you write me a tale set in a small coastal village? I'd love it if there was a talking cat who's wise beyond its years, and maybe it's guiding the main character to find an ancient key. The key could have some sort of mystical power or history. All of the characters should be female. Feel free to take it in any direction you want, but make sure to include those elements.
ENDEXAMPLE
10 changes: 4 additions & 6 deletions airoboros/instructors/prompts/writing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ Here are a few examples:
- My name is George. Write an email to my boss, Jim Bob, with a proposal to start using CI/CD in github. Give a list of reasons why using CI/CD would be beneficial to the company, and why a slow regular release cadence is problematic. Include a silly joke about golf in a p.s. and sign off with my name, "Geroge, the Magnificant".
- Write a synopsis of the movie "Armageddon" in the style of Shakespeare.
- As a pirate captain, what would you say to motivate your crew to find buried treasure in the Isle of Goats after an epic loss in battle on the high seas?
- Come up with a story about a man named Dan who decided to start a small business, with a sad ending.
- Come up with a short story about a man named Dan who decided to start a small business, with a sad ending.
- Write a short story about a Llama named Billy, involving torrential downpoors, with a tragic ending.
- Write a song about a paraplegic, who against all odds, rose to fame and success.
- Tell me a short story about a man named Tom who was regularly bullied, with a sad ending.
- Create a story somehow involving kayaks and snowy mountain peaks.
- Compose a story based on some of the following guidelines. The main character is David Dorado, who wears an eye patch due to a tragic accident as a child involing a spork. The other character in Cynthia Tulip, who is a quiet, French Canadian who speaks English but not particularly well. She secretly loves David, but David isn't aware and doesn't return her affection. David suddenly realizes he may actually be in love with her while they drink coffee at noon.
- Write an email announcing a new breakthrough in tire technology by your company ("Atobormafi Tire Tech") which increases fuel efficiency by 7% on average. The target of the email is investors, since we are seeking Series B funding. Explain the profit possibilities with such a tire and ask for new investment.
- Write the introduction and methodology sections of a research article in which the use of Psilocybin Cubensis reduced dependency on parmeson cheese by 82%. Dosage was 0.5g/day, split between 3 dosages, administered orally. Double blind, placebo controlled, 1812 participants.

Make sure to include a wide variety of writing tasks, across a wide range of subjects. Be loquacious.
Make sure to include a wide variety of writing tasks with varying level of detail.

{topics}

{flesch}

Expand Down
Loading

0 comments on commit c6aa785

Please sign in to comment.