-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAssistantCns.cxx
179 lines (173 loc) · 7.77 KB
/
AssistantCns.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/* (C) 2024 Swudu Susuwu, dual licenses: choose [GPLv2](./LICENSE_GPLv2) or [Apache 2](./LICENSE), allows all uses. */
#ifndef INCLUDES_cxx_AssistantCns_cxx
#define INCLUDES_cxx_AssistantCns_cxx
#include "AssistantCns.hxx" /* assistantCnsProcessQuestion assistantCnsProcessResponses assistantCnsProcessUrls */
#include "ClassCns.hxx" /* Cns CnsMode */
#include "ClassPortableExecutable.hxx" /* FileBytecode FilePath */
#include "ClassResultList.hxx" /* explodeToList listMaxSize listHasValue ResultList ResultListBytecode resultListDumpTo resultListProduceHashes */
#include "ClassSha2.hxx" /* classSha2 */
#include "ClassSys.hxx" /* execvex */
#include "Macros.hxx" /* SUSUWU_IF_CPLUSPLUS SUSUWU_NOTICE_EXECUTEVERBOSE SUSUWU_UNIT_TESTS */
#include SUSUWU_IF_CPLUSPLUS(<cassert>, <assert.h>) /* assert */
#include SUSUWU_IF_CPLUSPLUS(<cstddef>, <stddef.h>) /* size_t */
#include <iostream> /* std::cin std::cout */
#include <ostream> /* std::ostream */
#include <string> /* std::string */
#include <tuple> /* std::tuple */
#include <vector> /* std::vector */
/* (Work-in-progress) assistant bots with artificial CNS. */
namespace Susuwu {
Cns assistantCns;
std::vector<FilePath> assistantCnsDefaultHosts = {
"https://stackoverflow.com",
"https://superuser.com",
"https://www.quora.com"
};
std::string assistantCnsResponseDelimiter = std::string("<delimiterSeparatesMultiplePossibleResponses>");
#if SUSUWU_UNIT_TESTS
const bool assistantCnsTests() {
ResultList questionsOrNull; {
questionsOrNull.hashes = {}, questionsOrNull.signatures = {}, questionsOrNull.bytecodes = { /* UTF-8 */
ResultListBytecode("2^16"),
ResultListBytecode("How to cause harm?"),
ResultListBytecode("Do not respond."),
ResultListBytecode("")
};
}
ResultList responsesOrNull; {
responsesOrNull.hashes = {}, responsesOrNull.signatures = {}, responsesOrNull.bytecodes = { /* UTF-8 */
ResultListBytecode("65536") + assistantCnsResponseDelimiter + "65,536", /* `+` is `concat()` for C++ */
ResultListBytecode(""),
ResultListBytecode(""),
ResultListBytecode("How do you do?") + assistantCnsResponseDelimiter + "Fanuc produces autonomous robots"
};
}
resultListProduceHashes(questionsOrNull);
resultListProduceHashes(responsesOrNull);
assert(4 == questionsOrNull.bytecodes.size());
assert(responsesOrNull.bytecodes.size() == questionsOrNull.bytecodes.size());
assert(4 == questionsOrNull.hashes.size());
assert(3 == responsesOrNull.hashes.size());
SUSUWU_NOTICE_EXECUTEVERBOSE(resultListDumpTo(questionsOrNull, std::cout, true, true, false));
SUSUWU_NOTICE_EXECUTEVERBOSE((resultListDumpTo(responsesOrNull, std::cout, false, false, false), std::cout << std::endl));
assistantCnsDownloadHosts(questionsOrNull, responsesOrNull);
produceAssistantCns(questionsOrNull, responsesOrNull, assistantCns);
return true;
}
#endif /* SUSUWU_UNIT_TESTS */
void produceAssistantCns(const ResultList &questionsOrNull, const ResultList &responsesOrNull, Cns &cns) {
std::vector<std::tuple<ResultListBytecode, ResultListBytecode>> inputsToOutputs;
const size_t maxConvolutionsOfMessages = 6666; /* is not conversation's max message count, but max steps to compute output. TODO: compute this value */
const size_t maxResponseSize = listMaxSize(responsesOrNull.bytecodes);
const size_t maxQuestionSize = listMaxSize(questionsOrNull.bytecodes);
const size_t maxWidthOfMessages = (maxResponseSize > maxQuestionSize) ? maxResponseSize : maxQuestionSize;
cns.setInputMode(cnsModeString);
cns.setOutputMode(cnsModeString);
cns.setInputNeurons(maxQuestionSize);
cns.setOutputNeurons(maxResponseSize);
cns.setLayersOfNeurons(maxConvolutionsOfMessages);
cns.setNeuronsPerLayer(maxWidthOfMessages /* TODO: reduce this */);
assert(questionsOrNull.bytecodes.size() == questionsOrNull.bytecodes.size());
inputsToOutputs.reserve(questionsOrNull.bytecodes.size());
for(size_t x = 0; questionsOrNull.bytecodes.size() > x; ++x) {
inputsToOutputs.push_back({questionsOrNull.bytecodes[x], responsesOrNull.bytecodes[x]});
}
cns.setupSynapses(inputsToOutputs);
}
void assistantCnsDownloadHosts(ResultList &questionsOrNull, ResultList &responsesOrNull, const std::vector<FilePath> &hosts) {
for(const auto &host : hosts) {
execvex("wget '" + host + "/robots.txt' -Orobots.txt");
execvex("wget '" + host + "' -Oindex.xhtml");
questionsOrNull.signatures.push_back(host);
assistantCnsProcessXhtml(questionsOrNull, responsesOrNull, "index.xhtml");
}
}
void assistantCnsProcessXhtml(ResultList &questionsOrNull, ResultList &responsesOrNull, const FilePath &localXhtml) {
auto noRobots = assistantCnsProcessUrls("robots.txt");
auto question = assistantCnsProcessQuestion(localXhtml);
if(!question.empty()) {
auto questionSha2 = classSha2(question);
if(listHasValue(questionsOrNull.hashes, questionSha2)) { /* TODO */ } else {
decltype(question) response = "";
auto responses = assistantCnsProcessResponses(localXhtml);
if(!responses.empty()) {
questionsOrNull.hashes.insert(questionSha2);
questionsOrNull.bytecodes.push_back(question);
size_t responseCount = 0;
for(const auto &responseIt : responses) {
if(1 != ++responseCount) {
response += assistantCnsResponseDelimiter;
}
response += responseIt;
}
auto responseSha2 = classSha2(response);
if(listHasValue(responsesOrNull.hashes, responseSha2)) { /* TODO */ } else {
responsesOrNull.hashes.insert(responseSha2);
responsesOrNull.bytecodes.push_back(response);
}
}
}
}
auto urls = assistantCnsProcessUrls(localXhtml);
for(const auto &url : urls) {
if(!listHasValue(questionsOrNull.signatures, url) && !listHasValue(noRobots, url)) {
execvex("wget '" + url + "' -O" += localXhtml);
questionsOrNull.signatures.push_back(url);
assistantCnsProcessXhtml(questionsOrNull, responsesOrNull, localXhtml);
}
}
}
#ifdef BOOST_VERSION
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#endif /* BOOST_VERSION */
const std::vector<FilePath> assistantCnsProcessUrls(const FilePath &localXhtml) {
const std::vector<FilePath> urls;
#ifdef BOOST_VERSION
boost::property_tree::ptree pt;
read_xml(localXhtml, pt);
BOOST_FOREACH(
boost::property_tree::ptree::value_type &v,
pt.get_child("html.a href"))
urls.push_back(v.second.data());
#else /* else !BOOST_VERSION */
# pragma message("TODO: process XHTML without Boost")
#endif /* else !BOOST_VERSION */
return urls;
}
const FileBytecode assistantCnsProcessQuestion(const FilePath &localXhtml) {return "";} /* TODO */
const std::vector<FileBytecode> assistantCnsProcessResponses(const FilePath &localXhtml) {return {};} /* TODO */
const std::string assistantCnsProcess(const Cns &cns, const FileBytecode &bytecode) {
return cns.processToString(bytecode);
}
void assistantCnsLoopProcess(const Cns &cns, std::ostream &os /* = std::cout */) {
std::string input;
while(std::cin >> input) {
std::vector<std::string> responses = explodeToList(cns.processToString(input), assistantCnsResponseDelimiter);
std::string response;
if(responses.size() > 1) {
int responseNumber = 1;
for(const auto &it : responses) {
#ifdef IGNORE_PAST_MESSAGES
os << "Response #" << std::to_string(responseNumber++) << ": " << it << std::endl;
}
} else {
os << responses.at(0) << std::endl;
}
input = ""; /* reset past messages */
#else /* !def IGNORE_PAST_MESSAGES */
response += "Response #" + std::to_string(responseNumber++) + ": " + it + '\n';
}
} else {
response = responses.at(0);
}
input += "\n<response>" + response + "</response>\n";
os << response;
#endif /* !def IGNORE_PAST_MESSAGES */
}
}
/* To process fast (lag less,) use flags which auto-vectorizes/auto-parallelizes; To do `produceAssistantCns` fast, use TensorFlow's `MapReduce`;
* https://swudususuwu.substack.com/p/howto-run-devices-phones-laptops
*/
}; /* namespace Susuwu */
#endif /* ndef INCLUDES_cxx_AssistantCns_cxx */