Project Home Page | Download Page
Detailed Documentation (English) | 详细文档(中文)
Join us | 加入我们
COOTA (Come Out of Thin Air) is a powerful data-generating python library. By supporting generator nesting, it allows you to generate a variety of data that shows great randomness. It also supports making generators conform to a certain distribution and associating two generators.
[TOC]
Requires Python 3.x.
pip install coota
pip3 install coota
Go to 👆🏻download page.
Demo programs can be found under the "tutorial" folder.
Generate a letter using a Generator.
from coota import *
generator = LetterGenerator()
print(generator.generate())
This returns a random letter, for example:
a
Generate data in batches using GeneratorSequence.
from coota import *
string_length = 2
int_range_from = 0
int_range_to = 10 # not included, which means the generator returns a integer x that matches 0 ≤ x < 10
generator_a = StringGenerator(string_length)
generator_b = IntGenerator(start=int_range_from, stop=int_range_to)
generator_sequence = GeneratorSequence("String: ", generator_a, ", Integer: ", generator_b, n=10)
for data in generator_sequence:
print(data)
This returns 10 groups of data.
String: Pf, Integer: 6
String: YL, Integer: 9
String: kl, Integer: 8
String: xC, Integer: 4
String: lo, Integer: 2
String: on, Integer: 4
String: uE, Integer: 0
String: or, Integer: 3
String: gj, Integer: 8
String: vl, Integer: 0
The default arguments can be generators. For example:
from coota import *
string_length = IntGenerator(start=1, stop=4)
int_range_from = 0
int_range_to = 10
generator_a = StringGenerator(string_length)
generator_b = IntGenerator(start=int_range_from, stop=int_range_to)
generator_sequence = GeneratorSequence("String: ", generator_a, ", Integer: ", generator_b, n=10)
for data in generator_sequence:
print(data)
String: I, Integer: 5
String: kaK, Integer: 1
String: aCx, Integer: 3
String: I, Integer: 2
String: Ev, Integer: 4
String: RXc, Integer: 4
String: Gh, Integer: 3
String: z, Integer: 6
String: DO, Integer: 3
String: OL, Integer: 2
The nesting depth is not limited. Any default argument like string_length
can be a generator. Global arguments like int_range_from
and int_range_to
can also be generators, but they won't be parsed.
from coota import *
# generate one letter
generator = LetterGenerator()
letter = generator.generate()
print(type(letter), letter)
<class 'str'> L
from coota import *
# generate a string of 5 letters
string_length = 5
generator = StringGenerator()
string = generator.generate(string_length)
print(type(string), string)
<class 'str'> ieyDW
from coota import *
# generate a 10 digit number
number_digit = 10
generator = NumberGenerator()
number = generator.generate(number_digit) # the first digit of the number cannot be zero except 0, like 012
print(type(number), number)
<class 'str'> 4947421207
from coota import *
# generate a string of 6 letters and numbers
string_length = 6
generator = LetterAndNumberGenerator()
string = generator.generate(string_length)
print(type(string), string)
<class 'str'> dPgpJ9
from coota import *
# generate an integer (0 ≤ x < 10)
range_from, range_to = 0, 10
generator = IntGenerator(start=range_from, stop=range_to)
integer = generator.generate()
print(type(integer), integer)
<class 'int'> 9
from coota import *
# create an IntIterable
range_from, range_to = 0, 10
step = 1 # optional, 1 by default
iterable_a = IntIterable(start=range_from, stop=range_to, step=step)
integer_a, integer_b = iterable_a.generate(), iterable_a.generate()
print(type(integer_a), integer_a, integer_b)
print("Example in the for loop:")
iterable_b = IntIterable(start=range_from, stop=range_to, step=step)
for integer in iterable_b:
print(type(integer), integer)
<class 'int'> 0 1
Example in the for loop:
<class 'int'> 0
<class 'int'> 1
<class 'int'> 2
<class 'int'> 3
<class 'int'> 4
<class 'int'> 5
<class 'int'> 6
<class 'int'> 7
<class 'int'> 8
<class 'int'> 9
from coota.preset import *
import datetime
# generate a datetime from April 1, 2022 to June 1, 2022
# can be either str or datetime.datetime object or integer time stamp
range_from, range_to = "2022-4-1", "2022.6.1" # optional, the default range is from now to next year
# range_from, range_to = datetime.datetime(year=2022, month=4, day=1), datetime.datetime(year=2022, month=6, day=1)
# range_from, range_to = 1648791336, 1654061736
generator = TimeGenerator(start=range_from, stop=range_to)
time = generator.generate()
print(type(time), time)
<class 'datetime.datetime'> 2022-05-24 18:12:00
from coota.preset import *
# generate an email address with a 7-digit name ending with "outlook.com"
domain, name_length = "outlook.com", 7
generator = EmailGenerator(domain=domain)
email = generator.generate(name_length)
print(type(email), email)
<class 'str'> RUSyUY5@outlook.com
from coota.preset import *
# generate a QQMail address with a 13-digit name
name_length = 13
generator = QQMailGenerator()
qqmail = generator.generate(name_length)
print(type(qqmail), qqmail)
<class 'str'> 7160380273761@qq.com
from coota.preset import *
# generate a name
generator = NameGenerator()
name = generator.generate()
print(type(name), name)
<class 'str'> Nikita
See the documentation section for a detailed explanation.
from coota import *
class MyGenerator(Generator):
def source(self) -> Sequence:
return "option A", "option B", "option C"
def make(self, *args) -> Any:
return self.choice()
my_generator = MyGenerator()
option = my_generator.generate()
print(option)
option C
See the documentation section for a detailed explanation.
from coota import *
class MyIterableGenerator(ItertableGenerator):
def initialize(self) -> None:
self.set_pointer(0)
def step(self) -> bool:
self.set_pointer(self.get_pointer() + 1)
return self.get_pointer() <= len(self.get_source())
def source(self) -> Sequence:
return "A", "B", "C", "D"
def make(self, *args) -> Any:
return self.get_source()[self.get_pointer()]
my_iterable_generator = MyIterableGenerator()
for opt in my_iterable_generator:
print(opt)
A
B
C
D
from coota.preset import *
domain, name_length = "outlook.com", 7
generator = EmailGenerator(domain=domain)
email = generator.generate(name_length)
The above example can be replaced by the following code:
from coota.preset import *
domain, name_length = "outlook.com", 7
generator = EmailGenerator(name_length, domain=domain)
email = generator.generate()
In other words, the arguments given to generate()
can be given to the constructor with the same order so that no arguments are necessary in generate()
.
However, the global arguments for generator can only be given to the constructor. If you don't know much about the argument format of python, the arguments in the form of name=value
is are global arguments, the others are default arguments.
For example, you want to generate a random length string:
from coota import *
string_length = IntGenerator(start=1, stop=10)
generator = StringGenerator()
string = generator.generate(string_length)
# or
generator = StringGenerator(string_length)
string = generator.generate()
If you want to pass the generator intact, there are two ways.
from coota import *
generator = LetterGenerator()
generator.set_parseable(False)
output = generator.generate()
print(type(output), output)
<class 'coota.generator.LetterGenerator'> [Generator({})]
from coota import *
generator = LetterGenerator()
output = generator.generate(parse=False)
print(type(output), output)
<class 'coota.generator.LetterGenerator'> [Generator({})]
Here's a use case.
from coota import *
class GeneratorGenerator(Generator):
def source(self) -> Sequence:
g1 = LetterGenerator()
g2 = StringGenerator(5)
return g1, g2
def make(self, *args) -> Any:
return self.choice()
generator = GeneratorGenerator()
generator_output = generator.generate(parse=False)
print(type(generator_output), generator_output)
<class 'coota.generator.LetterGenerator'> [Generator({})]
To make the generator fit a certain distribution, you can change the generator's chooser.
Built-in choosers:
- GaussianChooser
- BinomialChooser
- PoissonChooser
- GeomChooser
- ExponChooser
Following demo programs take GaussianChooser as an example. The same for other choosers. Some choosers may require additional arguments. More information in the documentation section.
from coota import *
loc, size = 0, 100
chooser = GaussianChooser()
show(chooser) # visualize the chooser
from coota import *
chooser = GaussianChooser()
generator = IntGenerator(start=40, stop=140)
generator.set_chooser(chooser)
result = []
for _ in range(100):
result.append(generator.generate())
show(result)
There is no preset Association, so a custom association class must be declared.
For example, the following case shows a demo program that makes the generator fit a random distribution each time.
from coota import *
class DistributionGenerator(Generator):
def source(self) -> Sequence:
return DefaultChooser(), GaussianChooser()
def make(self, *args) -> Any:
return self.choice()
class MyAssociation(Association):
def associate(self, g: Any, the_other_generator_output: Any) -> Any:
g.set_chooser(the_other_generator_output)
generator_a = DistributionGenerator()
generator_b = IntGenerator(start=0, stop=10)
generator_sequence = GeneratorSequence(generator_a, " ", generator_b, n=5)
generator_b.set_association(MyAssociation(generator_a))
for i in generator_sequence:
print(i)
<coota.generator.DefaultChooser object at 0x1085995e0> 9
<coota.generator.DefaultChooser object at 0x1085995e0> 2
<coota.generator.GaussianChooser object at 0x108599790> 4
<coota.generator.DefaultChooser object at 0x1085995e0> 5
<coota.generator.DefaultChooser object at 0x1085995e0> 9
You must have found that this step can be achieved by filling in a generator-type argument. Thus, an Association is usually used for the existence of some logical association between two generators rather than random or mathematical association.
The second case shows how an Association works in resolving logical problems.
from coota.preset import *
from coota import *
class GenderGenerator(Generator):
def source(self) -> Sequence:
return "male", "female"
def make(self, *args) -> Any:
return self.choice()
class MyAssociation(Association):
def associate(self, g: Any, the_other_generator_output: Any) -> Any:
if the_other_generator_output == "male":
return g.get_chooser().choice(NAME_MALE)
else:
return g.get_chooser().choice(NAME_FEMALE)
generator_a = GenderGenerator()
generator_b = NameGenerator()
generator_sequence = GeneratorSequence("{'sex': '", generator_a, "', 'name': '", generator_b, "'}", n=5)
generator_b.set_association(MyAssociation(generator_a))
for i in generator_sequence:
print(i)
{'sex': 'female', 'name': 'Dora'}
{'sex': 'female', 'name': 'Joan'}
{'sex': 'male', 'name': 'Benson'}
{'sex': 'male', 'name': 'Garfield'}
{'sex': 'male', 'name': 'Paul'}
In this example, I want to generate five random names containing gender. However, if the two generators are independent of each other, there may be a man with a female name. That's what an Association is for.
A GeneratorSequence provides a convenient way to generate a batch of data.
For example, to generate some information in a string.
from coota import *
from coota.preset import *
gs = GeneratorSequence("Hello, my name is ", NameGenerator(), ". ", n=10)
for i in gs:
print(i)
Hello, my name is Keith.
Hello, my name is Allison.
Hello, my name is Eugene.
Hello, my name is Helena.
Hello, my name is Sabrina.
Hello, my name is Ian.
Hello, my name is Alina.
Hello, my name is Carry.
Hello, my name is Oliver.
Hello, my name is Adelaide.
It can be also used in generating json strings.
Following cases take a Generator as an example. The same for other objects. More information in the documentation section.
Supported objects:
- Chooser
- Association
- Generator
- GeneratorOutput
- GeneratorSequence
from coota import *
path = "generator.g"
generator = LetterGenerator()
save(generator, path)
from coota import *
path = "generator.g"
generator = load(path)
# or
generator = GeneratorOperator(path).load()
General flow path:
class Chooser(object):
This method specifies how the chooser selects an item.
@abstractmethod
def choice(self, x: Sequence) -> Any:
Name | Usage |
---|---|
x | The data source from which the chooser chooses. |
return | A single item chosen from x . |
This method specifies how the chooser selects a batch of items.
@abstractmethod
def choices(self, x: Sequence, n: int) -> Sequence:
Name | Usage |
---|---|
x | The data source from which the chooser chooses. |
n | The number of choices. |
return | A list of items chosen from x . |
class Association(object):
def __init__(self, the_other_generator: Generator):
Name | Usage |
---|---|
the_other_generator | The other generator associated with this generator. That generator must generate before this generator. |
def get_the_other_generator(self) -> Generator:
Name | Usage |
---|---|
return | The other generator. |
This method specifies the association between the output of two generators.
@abstractmethod
def associate(self, g: Any, the_other_generator_output: Any) -> Any:
Name | Usage |
---|---|
G | The generator whose set_association() is called with the association given to. |
the_other_generator_output | The output generated by the other generator. |
return | Anything. If not none, the return will be returned by generate() , or the process will continue. |
class GeneratorOutput(object):
def __init__(self, output: Any):
Name | Usage |
---|---|
output | The output. |
def get_output(self) -> Any:
Name | Usage |
---|---|
return | The output. |
class Generator(object):
def __init__(self, *default_args, **args):
Name | Usage |
---|---|
default_args | Given to make() when no arguments are given to generate() . For example, in a GeneratorSequence, generate() is called with no arguments, then the default_args will be given to make() . Required arguments are listed in the specific generators. |
args | Global arguments for the generator. Required arguments are listed in the specific generators. |
def _get_source_cache(self) -> Union[Sequence, None]:
Name | Usage |
---|---|
return | The cache of the source. |
def _set_source_cache(self, source_cache: Sequence) -> None:
Name | Usage |
---|---|
source_cahce | The cache of the source. |
return |
def _get_last(self) -> Any:
Name | Usage |
---|---|
return | Last generated data. |
def _set_last(self, last: Any) -> None:
Name | Usage |
---|---|
last | Last generated data. |
return |
def get_parseable(self) -> bool:
Name | Usage |
---|---|
return | Whether the generator can be parsed as an argument. |
def set_parseable(self, parseable: bool) -> None:
Name | Usage |
---|---|
parseable | Whether the generator can be parsed as an argument. If false, the generator will be recognized as an argument itself instead of being parsed into an actual output. |
return |
def get_uc(self) -> bool:
Name | Usage |
---|---|
return | Whether the generator uses the cache of the source. |
def set_uc(self, use_cache: bool) -> None:
Name | Usage |
---|---|
use_cache | Whether the generator uses the cache of the source. If true, the generator will only call source() once and use the cache instead after that. It's true by default. Set it to false if your source is not always static. |
return |
def get_args(self) -> dict:
Name | Usage |
---|---|
return | The global arguments of the generator. |
def get_arg(self, name: str, required_type: type = object) -> Any:
Name | Usage |
---|---|
name | The argument's name. |
required_type | The type of argument you require. If any type of argument is acceptable, set it to object which is also by default. |
return | The argument's value. It can be None if the argument does not exist or is not of the same type as required. |
def get_arg_or_default(self, name: str, default: Any, required_type: type = object) -> Any:
Name | Usage |
---|---|
name | The argument's name. |
default | The default value. |
required_type | The type of argument you require. If any type of argument is acceptable, set it to object which is also by default. |
return | The argument's value. The default value will be returned if the argument does not exist or is not of the same type as required. |
def get_required_arg(self, name: str, required_type: type = object) -> Any:
Name | Usage |
---|---|
name | The argument's name. |
required_type | The type of argument you require. If any type of argument is acceptable, set it to object which is also by default. |
return | The argument's value. |
Exception | Case |
---|---|
AttributeError | The argument does not exist. |
TypeError | The argument is not of the same type as required. |
def get_default_args(self) -> tuple:
Name | Usage |
---|---|
return | The default arguments to be given to make() . |
def get_chooser(self) -> Chooser:
Name | Usage |
---|---|
return | The generator's chooser. |
def set_chooser(self, chooser: Chooser) -> None:
Name | Usage |
---|---|
chooser | Set the generator's chooser. A DefaultChooser is used by default. If you want to change the behavior of choosing, such as making it fit a certain distribution, you can do so by changing the chooser object. |
return |
def get_association(self) -> Union[Associatoin, None]:
Name | Usage |
---|---|
return | The association with the other generator. |
def set_association(self, association: Association) -> None:
Name | Usage |
---|---|
association | The association with the other generator. |
return |
def choice(self) -> Any:
Name | Usage |
---|---|
return | One single item chosen from the source by the chooser. |
def choices(self, n: int) -> Sequence:
Name | Usage |
---|---|
n | The number of items. |
return | A batch of items chosen from the source by the chooser. |
This method specifies what data the generator may generate.
@abstractmethod
def source(self) -> Sequence:
Name | Usage |
---|---|
return | The original dataset from which the generator selects. |
def get_source(self) -> Sequence:
Name | Usage |
---|---|
return | The same as source() returns. If use_cache is true, returns the source cache instead. |
This method specifies how to generate data.
@abstractmethod
def make(self, *args) -> Any:
Name | Usage |
---|---|
args | Optional arguments. |
return | Anything. |
def generate(self, *args, parse: bool = True) -> Any:
Name | Usage |
---|---|
args | Optional arguments given to make() . |
parse | Whether to resolve the generator in parameters and return values. True: return an output. False: return the generator itself. |
return | Anything. |
Exception | Case |
---|---|
LookupError | The associated generator generated before the generator to which is associated has generated. |
def output(self, *args, parse: bool = False) -> GeneratorOutput:
Name | Usage |
---|---|
args | Optional arguments given to make() . |
parse | Whether to resolve the generator in parameters and return values. |
return | The return value of generate() wrapped as a GeneratorOutput. |
An IterableGenerator is an ordered iterator, not a selector. It achieves ordering by adding a pointer to the generator.
class ItertableGenerator(Generator):
def get_pointer(self) -> Any:
Name | Usage |
---|---|
return | The pointer. |
def set_pointer(self, pointer: Any) -> None:
Name | Usage |
---|---|
pointer | The pointer which is 0 in default. |
return |
def choice(self) -> Any:
Name | Usage |
---|---|
return | self.get_source()[pointer] . The pointer must be an integer when calling this method. |
def choices(self, n: int) -> Sequence:
Name | |
---|---|
n | The number of choices. |
return | self.get_source()[pointer: pointer + n] . The pointer must be an integer when calling this method. |
This is a callback used to set initialization operations such as pointers.
@abstractmethod
def initialize(self) -> None:
Name | Usage |
---|---|
return |
This method specifies how the iterator iterates.
@abstractmethod
def step(self) -> bool:
Name | Usage |
---|---|
return | True: continue iteration. False: stop iteration. |
A GeneratorSequence provides a convenient way to generate a batch of data.
def __init__(self, *sequence, n: Union[int, _g.IntGenerator, _g.IntIterable], t: type = str):