diff --git a/build.py b/build.py index 2a92ba563..defdf1492 100644 --- a/build.py +++ b/build.py @@ -140,10 +140,6 @@ def main(chapters=[], epub=False, pdf=False, html=False, mobi=False, pandoc_epub def build_pdf(): os.chdir('tex') run('pdflatex -interaction nonstopmode 500L') - run('bibtex 500L') - run('pdflatex -interaction nonstopmode 500L') - run('pdflatex -interaction nonstopmode 500L') - run('pdflatex -interaction nonstopmode 500L') os.chdir('..') run('mv tex/500L.pdf output/') diff --git a/functionalDB/functionalDB.markdown b/functionalDB/functionalDB.markdown index 7cb2f8ddc..e422ff92b 100644 --- a/functionalDB/functionalDB.markdown +++ b/functionalDB/functionalDB.markdown @@ -1215,7 +1215,7 @@ We now have to remove the items that didn’t pass all of the conditions: (update-in path [2] CS/intersection relevant-items)) ``` -Finally, we remove all of the result clauses that are "empty" (i.e., their last item is empty). We do this in the last line of the `query-index` function. Our example leaves us with the items in \aosatblref{500.functionaldb.filteredqueryresults}. +Finally, we remove all of the result clauses that are "empty" (i.e., their last item is empty). We do this in the last line of the `query-index` function. Our example leaves us with the items in \aosatblref{500l.functionaldb.filteredqueryresults}. diff --git a/minutiae/500L.bib b/minutiae/500L.bib deleted file mode 100644 index fbc3ef036..000000000 --- a/minutiae/500L.bib +++ /dev/null @@ -1,496 +0,0 @@ -@string{ieeecm="IEEE Communications Magazine"} -@string{sigcomm="Proceedings of the Symposium on Communications Architectures and Protocols (SIGCOMM)"} - -@inproceedings{Rashid:86a, - key = "Accetta1986", - AUTHOR = {Mike Accetta and Robert Baron and William Bolosky and David Golub and Richard Rashid and Avadis Tavanian and Michael Young}, - TITLE = "{Mach: A New Kernel Foundation for UNIX Development}", - BOOKTITLE = "{Proceedings of the Summer 1986 USENIX Technical Conference and Exhibition}", - MONTH = jun, - YEAR = 1986, - PAGES = "93--112" -} - -@inproceedings{Schmidt:99w, - key = "Arulanthu2000", - author = "Alexander B. Arulanthu and Carlos O'Ryan and Douglas C. Schmidt and Michael Kircher and Jeff Parsons", - title = "{The Design and Performance of a Scalable ORB Architecture for CORBA Asynchronous Messaging}", - BOOKTITLE = "{Proceedings of the Middleware 2000 Conference}", - ORGANIZATION = "ACM/IFIP", - LOCATION = {Pallisades, New York}, - month = apr, - year = 2000 -} - -@inproceedings{Karamcheti:05, - key = "Akkerman2005", - author = {Anatoly Akkerman and Alexander Totok and Vijay Karamcheti}, - title = "{Infrastructure for Automatic Dynamic Deployment of J2EE Applications in Distributed Environments}", - booktitle = "{3rd International Working Conference on Component Deployment (CD 2005)}", - address = {Grenoble, France}, - year = {2005}, - month = nov, - pages = {17--32} -} - -@unpublished{diginorm, - key = "Brown2012", - AUTHOR = {Brown, CT AND Howe, A AND Zhang, Q AND Pyrkosz, A AND Brom, TH}, - TITLE = {A Reference-Free Algorithm for Computational Normalization of Shotgun Sequencing Data}, - YEAR = {2012}, - NOTE = {In review at PLoS One, July 2012; Preprint at http://arxiv.org/abs/1203.4802} -} - -@misc{web:dablooms, - key = "SW/dablooms", - title="dablooms: a scalable, counting {Bloom} filter", - author={bit.ly software developers}, - howpublished={\url{http://github.com/bitly/dablooms}}, -} - -@book{BrownWilson2011ArchOSS, - key = "Brown2011", - author = {Brown, Amy and Wilson, Greg}, - isbn = {1257638017}, - month = jun, - publisher = {lulu.com}, - title = {{The Architecture Of Open Source Applications}}, - url = {http://www.aosabook.org/en/}, - year = {2011} -} - -@article{Clark:89, - key = "Clark1989", - AUTHOR = "David D. Clark and Van Jacobson and John Romkey and Howard Salwen", - TITLE = "{An Analysis of {TCP} Processing Overhead}", - JOURNAL = ieeecm, - MONTH = jun, - YEAR = 1989, - VOLUME = 27, - NUMBER = 6, - PAGES = "23--29" -} - -@inproceedings{Clark:90, - key = "Clark1990", - AUTHOR = "David D. Clark and David L. Tennenhouse", - TITLE = "{Architectural Considerations for a New Generation of Protocols}", - BOOKTITLE = sigcomm, - ORGANIZATION = "ACM", - YEAR = 1990, - MONTH = sep, - PAGES = "200--208" -} - -@inproceedings{Degermark:97, - key = "Degermark1997", - author = {Mikael Degermark and Andrej Brodnik and Svante Carlsson and Stephen Pink}, - title = "{Small Forwarding Tables for Fast Routing Lookups}", - booktitle = {Proceedings of the ACM SIGCOMM '97 Conference on Applications, Technologies, Architectures, and Protocols for Computer Communication}, - year = {1997}, - isbn = {0-89791-905-X}, - pages = {3--14}, - publisher = {ACM Press} - } - -@inproceedings{Schmidt:05y, - key = "Deng2005", - AUTHOR = {Gan Deng and Jaiganesh Balasubramanian and William Otte and Douglas C. Schmidt and Aniruddha Gokhale}, - TITLE = "{DAnCE: A QoS-enabled Component Deployment and Configuration Engine}", - BOOKTITLE = "Proceedings of the 3rd Working Conference on Component Deployment (CD 2005)", - MONTH = nov, - YEAR = 2005, - PAGES = {67--82} -} - -@inproceedings{GokhaleF6Aerospace:12, - key = "Dubey2012", - author = {Abhishek Dubey and William Emfinger and Aniruddha Gokhale and Gabor Karsai and William Otte and Jeffrey Parsons and Csanad Czabo and Alessandro Coglio and Eric Smith and Prasanta Bose}, - title = "{A Software Platform for Fractionated Spacecraft}", - booktitle = {Proceedings of the IEEE Aerospace Conference, 2012}, - publisher = {IEEE}, - month = mar, - year = {2012}, - pages = {1--20} -} - -@inproceedings{Druschel:93, - key = "Druschel1993", - Author = "Peter Druschel and Larry L. Peterson", - Title = "{Fbufs: A High-Bandwidth Cross-Domain Transfer Facility}", - Booktitle = "Proceedings of the $14^{th}$ Symposium on Operating System Principles (SOSP)", - Month = dec, - Year = 1993 -} - -@misc{web:TAU, - key = "SW/TAU", - title="{TAU}: {Tuning} and {Analysis Utilities}", - author={A. D. Malony et al.}, - howpublished={\url{http://www.cs.uoregon.edu/Research/tau/home.php}} -} - -@misc{web:khmer, - key = "SW/khmer", - title="khmer: genomic data filtering and partitioning software", - author={C. Titus Brown et al.}, - howpublished={\url{http://github.com/ged-lab/khmer}}, -} - -@misc{web:Valgrind, - key = "SW/valgrind", - title="{Valgrind}", - author={Julian Seward et al.}, - howpublished={\url{http://valgrind.org/}} -} - -@inproceedings{Engler:96, - key = "Engler1996", - AUTHOR = "Dawson R. Engler and M. Frans Kaashoek", - TITLE = "{DPF: Fast, Flexible Message Demultiplexing using Dynamic Code Generation}", - BOOKTITLE = "Proceedings of ACM SIGCOMM '96 Conference in Computer Communication Review", - PUBLISHER = "ACM Press", - YEAR = 1996, - MONTH = aug, - PAGES = "53-59" -} - -@article{Fatland:07, - key = "Fatland2007", - author = {{Fatland}, D.~R. and {Heavner}, M.~J. and {Hood}, E. and {Connor}, C.}, - title = "{The SEAMONSTER Sensor Web: Lessons and Opportunities after One Year}", - journal = {AGU Fall Meeting Abstracts}, - year = 2007, - month = dec -} - -@book{Vlissides:94, - key = "Vlissides1994", - showlabel = {GoF}, - AUTHOR = "Erich Gamma and Richard Helm and Ralph Johnson and John Vlissides", - TITLE = "{Design Patterns: Elements of Reusable Object-Oriented Software}", - PUBLISHER = "Addison-Wesley", - YEAR = 1995 -} - -@inproceedings{Schmidt:02ee, - key = "Schmidt2002", - AUTHOR = {Aniruddha Gokhale and Balachandran Natarajan and Douglas C. Schmidt and Andrey Nechypurenko and Jeff Gray and Nanbor Wang and Sandeep Neema and Ted Bapty and Jeff Parsons}, - TITLE = "{CoSMIC: An MDA Generative Tool for Distributed Real-time and Embedded Component Middleware and Applications}", - BOOKTITLE = "Proceedings of the OOPSLA 2002 Workshop on Generative Techniques in the Context of Model Driven Architecture", - ORGANIZATION = "ACM", - MONTH = nov, - YEAR = 2002 -} - -@book{Heineman:01, - key = "Heineman2001", - AUTHOR = "George T. Heineman and Bill T. Councill", - TITLE = "Component-Based Software Engineering: Putting the Pieces Together", - PUBLISHER = "Addison-Wesley", - YEAR = 2001 -} - -@inproceedings{Hutchinson:88, - key = "Hutchinson1988", - AUTHOR = "Norman C. Hutchinson and Larry L. Peterson", - TITLE = "{Design of the {\it x}-{K}ernel}", - BOOKTITLE = "Proceedings of the SIGCOMM '88 Symposium", - PAGES = "65-75", - MONTH = aug, - YEAR = 1988 -} - -@inproceedings{Hasan:05, - key = "Hasan2005", - author = {Jahangir Hasan and T. N. Vijaykumar}, - title = "{Dynamic pipelining: Making IP-lookup Truly Scalable}", - booktitle = {SIGCOMM '05: Proceedings of the 2005 Conference on Applications, technologies, architectures, and protocols for computer communications}, - year = {2005}, - isbn = {1-59593-009-4}, - pages = {205--216}, - doi = {doi.acm.org/10.1145/1080091.1080116}, - publisher = {ACM Press} -} - -@misc{CIAO, - key = "SW/CIAO", - AUTHOR = "{Institute for Software Integrated Systems}", - TITLE = "{Component-Integrated ACE ORB (CIAO)}", - howpublished = "{www.dre.vanderbilt.edu/CIAO}", - year = "Vanderbilt University" -} - -@techreport{Schmidt:08z, - key = "Kinnebrew2008", - AUTHOR = {John S. Kinnebrew and William R. Otte and Nishanth Shankaran and Gautam Biswas and Douglas C. Schmidt}, - TITLE = "{Intelligent Resource Management and Dynamic Adaptation in a Distributed Real-time and Embedded Sensor Web System}", - NUMBER = "ISIS-08-906", - INSTITUTION = "Vanderbilt University", - YEAR = {2008}, - ORGANIZATION = {Department of Electrical Engineering and Computer Science} -} - -@misc{web:OpenMP, - key = "SW/OpenMP", - title="{OpenMP}", - author={OpenMP members}, - howpublished={\url{http://openmp.org}} -} - -@inproceedings{Jacobson:93, - key = "McCanne1993", - AUTHOR = {Steven McCanne and Van Jacobson}, - TITLE = "{The BSD Packet Filter: A New Architecture for User-level Packet Capture}", - BOOKTITLE = "Proceedings of the Winter USENIX Conference", - MONTH = jan, - YEAR = {1993}, - PAGES = {259--270} -} - -@inproceedings{Rashid:87f, - key = "Mogul1987", - AUTHOR = "Jeffrey C. Mogul and Richard F. Rashid and Michal J. Accetta", - TITLE = "{The Packet Filter: an Efficient Mechanism for User-level Network Code}", - BOOKTITLE = "Proceedings of the 11th Symposium on Operating System Principles (SOSP)", - MONTH = nov, - YEAR = 1987 -} - -@inproceedings{Ousterhout:88d, - key = "Nelson1988", - AUTHOR = {M. Nelson and J. Ousterhout}, - TITLE = "{Copy-on-Write For {S}prite}", - BOOKTITLE = {USENIX Summer Conference}, - PUBLISHER = "USENIX Association", - YEAR = {1988}, - PAGES = {187--201}, - MONTH = JUN -} - -@manual{LWCCM-2004, - key = "OMG2004", - TITLE = "{Lightweight CCM FTF Convenience Document}", - ORGANIZATION = "{Object Management Group}", - EDITION = "{ptc/04-06-10}", - MONTH = jun, - YEAR = 2004 -} - -@misc{CARDAMOM:web, - key = "CARDAMOM2006", - author = {{ObjectWeb Consortium}}, - title = {{CARDAMOM - An Enterprise Middleware for Building Mission and Safety Critical Applications}}, - howpublished = {{\url{cardamom.objectweb.org}}}, - year = "2006" -} - -@manual{CORBA:08b, - key = "OMG2008", - Title = "{The Common Object Request Broker: Architecture and Specification Version 3.1, Part 2: CORBA Interoperability}", - ORGANIZATION = "{Object Management Group}", - Month = jan, - Year = 2008, - EDITION = "{OMG Document formal/2008-01-07}" -} - -@inproceedings{Schmidt:11a, - key = "Otte2011", - author = {Otte, William R. and Gokhale, Aniruddha and Schmidt, Douglas C.}, - title = "{Predictable Deployment in Component-based Enterprise Distributed Real-time and Embedded Systems}", - booktitle = {Proceedings of the 14th international ACM Sigsoft Symposium on Component Based Software Engineering}, - series = {CBSE '11}, - year = {2011}, - isbn = {978-1-4503-0723-9}, - pages = {21--30}, - doi = {http://doi.acm.org/10.1145/2000229.2000233}, - publisher = {ACM} -} - -@article{SchmidtDnCIST:13, - key = "Otte2013", - AUTHOR = {William Otte and Aniruddha Gokhale and Douglas Schmidt and Alan Tackett}, - TITLE = "{Efficient and Deterministic Application Deployment in Component-based, Enterprise Distributed, Real-time, and Embedded Systems}", - JOURNAL = "Elsevier Journal of Information and Software Technology (IST)", - VOLUME= {55}, - NUMBER= {2}, - MONTH= feb, - YEAR = 2013, - PAGES = {475--488}, - DOI = "10.1016/j.infsof.2012.08.007" -} - -@manual{DandC:06, - key = "OMG2006", - TITLE = "{Deployment and Configuration of Component-based Distributed Applications, v4.0}", - ORGANIZATION = "{OMG}", - EDITION = "{Document formal/2006-04-02}", - MONTH = apr, - YEAR = 2006 -} - -@article{Pai:00, - key = "Pai2000", - author = {Vivek S. Pai and Peter Druschel and Willy Zwaenepoel}, - title = "{IO-Lite: A Unified I/O Buffering and Caching System}", - journal = {ACM Transactions of Computer Systems}, - volume = {18}, - number = {1}, - year = {2000}, - pages = {37--66}, - publisher = {ACM Press} -} - -@unpublished{kmer-percolation, - key = "Pell2012", - AUTHOR = {Pell, J AND Hintze, A AND Canino-Koning, R AND Howe, A AND Tiedje, JM AND Brown, CT}, - TITLE = {Scaling metagenome sequence assembly with probabilistic de Bruijn graphs}, - YEAR = {2012}, - NOTE = {Accepted at PNAS, July 2012; Preprint at http://arxiv.org/abs/1112.4193} -} - -@article{Rekhter:97, - key = "Rekhter1997", - author = {Y. Rekhter and B. Davie and E. Rosen and G. Swallow and D. Farinacci and D. Katz}, - title = "{Tag Switching Architecture Overview}", - journal = {Proceedings of the IEEE}, - volume = 85, - number = 12, - year = 1997, - month = dec, - pages = {1973--1983} -} - -@inproceedings{Schmidt:06d, - key = "Suri2006", - AUTHOR = {Dipa Suri and Adam Howell and Nishanth Shankaran and John Kinnebrew and Will Otte and Douglas C. Schmidt and Gautam Biswas}, - TITLE = "{Onboard Processing using the Adaptive Network Architecture}", - BOOKTITLE = "Proceedings of the Sixth Annual NASA Earth Science Technology Conference", - MONTH = jun, - YEAR = 2006 -} - -@article{Sahni:03, - key = "Sahni2003", - author = {Sartaj Sahni and Kun Suk Kim}, - title = "{Efficient Construction of Multibit Tries for IP Lookup}", - journal = {IEEE/ACM Trans. Netw.}, - volume = {11}, - number = {4}, - year = {2003}, - pages = {650--662}, - doi = {dx.doi.org/10.1109/TNET.2003.815288}, - publisher = {IEEE Press} -} - -@article{Schmidt:02g, - key = "Schmidt2002", - AUTHOR = {Douglas C. Schmidt and Bala Natarajan and Aniruddha Gokhale and Nanbor Wang and Christopher Gill}, - TITLE = "{TAO: A Pattern-Oriented Object Request Broker for Distributed Real-time and Embedded Systems}", - journal = {IEEE Distributed Systems Online}, - YEAR = 2002, - VOLUME = 3, - NUMBER = 2, - MONTH = Feb -} - -@book{Schmidt:00a, - key = "Schmidt2000", - showlabel = {POSA2}, - AUTHOR = "Douglas C. Schmidt and Michael Stal and Hans Rohnert and Frank Buschmann", - TITLE = "{Pattern-Oriented Software Architecture: Patterns for Concurrent and Networked Objects, Volume 2}", - PUBLISHER = "Wiley \& Sons", - ADDRESS = "New York", - YEAR = 2000 -} - -@inproceedings{Varghese:05, - key = "Shreedhar1995", - author = {M. Shreedhar and George Varghese}, - title = "{Efficient Fair Queueing using Deficit Round Robin}", - booktitle = {SIGCOMM '95: Proceedings of the conference on Applications, technologies, architectures, and protocols for computer communication}, - year = {1995}, - isbn = {0-89791-711-1}, - pages = {231--242}, - doi = {doi.acm.org/10.1145/217382.217453}, - publisher = {ACM Press} -} - -@misc{web:Amdahl, - key="WP/Amdahl", - title="{Amdahl's Law}", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Amdahl%27s_law&oldid=515929929}} -} - -@misc{web:atomics, - key="WP/AO", - title="atomic operations", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Linearizability&oldid=511650567}} -} - -@misc{web:bigdata, - key="WP/BD", - title="big data", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Big_data&oldid=521018481}} -} - -@misc{web:BloomFilter, - key = "WP/BF", - title="{Bloom} filter", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Bloom_filter&oldid=520253067}} -} - -@misc{web:membar, - key = "WP/MB", - title="memory barrier", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Memory_barrier&oldid=517642176}} -} - -@misc{web:PGO, - key = "WP/PGO", - title="profile-guided optimization", - author={Various}, - howpublished={\url{http://en.wikipedia.org/w/index.php?title=Profile-guided_optimization&oldid=509056192}} -} - -@Book{Varghesebook:05, - key = "Varghese2005", - author = {George Varghese}, - title = "{Network Algorithmics: An Interdisciplinary Approach to Designing Fast Networked Devices}", - publisher = {Morgan Kaufmann Publishers (Elsevier)}, - address = {San Francisco, CA}, - year = 2005 -} - -@article{Varghese:97, - key = "Varghese1997", - AUTHOR = "George Varghese and Tony Lauck", - TITLE = "{Hashed and Hierarchical Timing Wheels: Data Structures for the Efficient Implementation of a Timer Facility}", - JOURNAL = "IEEE Transactions on Networking", - MONTH = dec, - YEAR = 1997 -} - -@article{SchmidtSpringer:11g, - key = "White2011", - AUTHOR = "Jules White and Brian Dougherty and Richard Schantz and Douglas C. Schmidt and Adam Porter and Angelo Corsaro", - TITLE = "{R\&D Challenges and Solutions for Highly Complex Distributed Systems: a Middleware Perspective}", - JOURNAL = "the Springer Journal of Internet Services and Applications special issue on the Future of Middleware", - VOLUME=2, - NUMBER=3, - MONTH=dec, - YEAR = 2011 -} - -@inproceedings{JulesXML:05, - key = "White2005", - author = {Jules White and Boris Kolpackov and Balachandran Natarajan and Douglas C. Schmidt}, - title = "{Reducing Application Code Complexity with Vocabulary-specific XML language Bindings}", - booktitle = {ACM-SE 43: Proceedings of the 43rd annual Southeast regional conference}, - year = {2005}, -} diff --git a/tex/500L.template.tex b/tex/500L.template.tex index 3204b9c24..cd7895807 100644 --- a/tex/500L.template.tex +++ b/tex/500L.template.tex @@ -263,10 +263,6 @@ chapterchapterchapter -\bibliographystyle{alpha} - -\bibliography{500L} - \makeatletter \@openrightfalse %% \renewcommand*\cleardoublepage{\clearpage\if@twoside diff --git a/tex/500L.tex b/tex/500L.tex index c358110ae..0028269b0 100644 --- a/tex/500L.tex +++ b/tex/500L.tex @@ -260,12 +260,50 @@ \mainmatter +\include{image-filters} + +\include{dagoba} + +\include{ocr} + +\include{contingent} + +\include{same-origin-policy} + \include{blockcode} +\include{interpreter} + +\include{web-server} + +\include{static-analysis} + +\include{functionalDB} + +\include{flow-shop} + +\include{template-engine} + +\include{pedometer} + +\include{sampler} + +\include{spreadsheet} + +\include{cluster} + +\include{data-store} + +\include{objmodel} + +\include{ci} + +\include{crawler} + +\include{modeller} -\bibliographystyle{alpha} +\include{event-web-framework} -\bibliography{500L} \makeatletter \@openrightfalse diff --git a/tex/ci.tex b/tex/ci.tex index 6d1632201..86284505d 100644 --- a/tex/ci.tex +++ b/tex/ci.tex @@ -1,12 +1,5 @@ \begin{aosachapter}{A Continuous Integration System}{s:ci}{Malini Das} -\emph{Malini Das is a software engineer who is passionate about -developing quickly (but safely!), and solving cross-functional problems. -She has worked at Mozilla as a tools engineer and is currently honing -her skills at Twitch. Follow Malini on -\href{https://twitter.com/malinidas}{Twitter} or on her -\href{http://malinidas.com/}{blog}.} - \aosasecti{What is a Continuous Integration System?}\label{what-is-a-continuous-integration-system} diff --git a/tex/cluster.tex b/tex/cluster.tex index 20c5a2755..8e324bc1b 100644 --- a/tex/cluster.tex +++ b/tex/cluster.tex @@ -1,12 +1,5 @@ \begin{aosachapter}{Clustering by Consensus}{s:cluster}{Dustin J. Mitchell} -\emph{Dustin is an open source software developer and release engineer -at Mozilla. He has worked on projects as varied as a host configuration -system in Puppet, a Flask-based web framework, unit tests for firewall -configurations, and a continuous integration framework in Twisted -Python. Find him as \href{http://github.com/djmitche}{@djmitche} on -GitHub or at \href{mailto:dustin@mozilla.com}{dustin@mozilla.com}.} - \aosasecti{Introduction}\label{introduction} In this chapter, we'll explore implementation of a network protocol diff --git a/tex/contingent.tex b/tex/contingent.tex index 08568386f..dfc23b24d 100644 --- a/tex/contingent.tex +++ b/tex/contingent.tex @@ -1,24 +1,5 @@ \begin{aosachapter}{Contingent: A Fully Dynamic Build System}{s:contingent}{Brandon Rhodes and Daniel Rocco} -\emph{Brandon Rhodes started using Python in the late 1990s, and for 17 -years has maintained the PyEphem library for amateur astronomers. He -works at Dropbox, has taught Python programming courses for corporate -clients, consulted on projects like the New England Wildflower Society's -``Go Botany'' Django site, and will be the chair of the PyCon conference -in 2016 and 2017. Brandon believes that well-written code is a form of -literature, that beautifully formatted code is a work of graphic design, -and that correct code is one of the most transparent forms of thought.} - -\emph{Daniel Rocco loves Python, coffee, craft, stout, object and system -design, bourbon, teaching, trees, and Latin guitar. Thrilled that he -gets to write Python for a living, he is always on the lookout for -opportunities to learn from others in the community, and to contribute -by sharing knowledge. He is a frequent speaker at PyAtl on introductory -topics, testing, design, and shiny things; he loves seeing the spark of -wonder and delight in people's eyes when someone shares a novel, -surprising, or beautiful idea. Daniel lives in Atlanta with a -microbiologist and four aspiring rocketeers.} - \aosasecti{Introduction}\label{introduction} Build systems have long been a standard tool within computer diff --git a/tex/crawler.tex b/tex/crawler.tex index 7675236e8..2ea3e6139 100644 --- a/tex/crawler.tex +++ b/tex/crawler.tex @@ -1,17 +1,5 @@ \begin{aosachapter}{A Web Crawler With asyncio Coroutines}{s:crawler}{A. Jesse Jiryu Davis and Guido van Rossum} -\emph{A. Jesse Jiryu Davis is a staff engineer at MongoDB in New York. -He wrote Motor, the async MongoDB Python driver, and he is the lead -developer of the MongoDB C Driver and a member of the PyMongo team. He -contributes to asyncio and Tornado. He writes at -\url{http://emptysqua.re}.} - -\emph{Guido van Rossum is the creator of Python, one of the major -programming languages on and off the web. The Python community refers to -him as the BDFL (Benevolent Dictator For Life), a title straight from a -Monty Python skit. Guido's home on the web is -\url{http://www.python.org/~guido/}.} - \aosasecti{Introduction}\label{introduction} Classical computer science emphasizes efficient algorithms that complete diff --git a/tex/dagoba.tex b/tex/dagoba.tex index 6f996921c..ca4449d30 100644 --- a/tex/dagoba.tex +++ b/tex/dagoba.tex @@ -1,9 +1,5 @@ \begin{aosachapter}{Dagoba: an in-memory graph database}{s:dagoba}{Dann Toliver} -\emph{\href{https://twitter.com/dann}{Dann} enjoys building things, like -programming languages, databases, distributed systems, communities of -smart friendly humans, and pony castles with his two year old.} - \aosasecti{Prologue}\label{prologue} \begin{quote} diff --git a/tex/data-store.tex b/tex/data-store.tex index 5d25b91c0..17b97ca2e 100644 --- a/tex/data-store.tex +++ b/tex/data-store.tex @@ -1,15 +1,5 @@ \begin{aosachapter}{DBDB: Dog Bed Database}{s:data-store}{Taavi Burns} -\emph{As the newest bass (and sometimes tenor) in -\href{http://www.countermeasuremusic.com}{Countermeasure}, Taavi strives -to break the mould\ldots{} sometimes just by ignoring its existence. -This is certainly true through the diversity of workplaces in his -career: IBM (doing C and Perl), FreshBooks (all the things), Points.com -(doing Python), and now at PagerDuty (doing Scala). Aside from -that---when not gliding along on his Brompton folding bike---you might -find him playing Minecraft with his son or engaging in parkour (or rock -climbing, or other adventures) with his wife. He knits continental.} - \aosasecti{Introduction}\label{introduction} DBDB (Dog Bed Database) is a Python library that implements a simple diff --git a/tex/event-web-framework.tex b/tex/event-web-framework.tex new file mode 100644 index 000000000..3a4913b6f --- /dev/null +++ b/tex/event-web-framework.tex @@ -0,0 +1,1451 @@ +\begin{aosachapter}{An Event-driven Web Framework}{s:event-web-framework}{Leo Zovic} + +In 2013, I decided to write a +\href{https://github.com/Inaimathi/deal}{web-based game prototyping +tool} for card and board games called \emph{House}. In these types of +games, it is common for one player to wait for another player to make a +move; however, when the other player finally does take action, we would +like for the waiting player to be notified of the move quickly +thereafter. + +This is a problem that turns out to be more complicated than it first +seems. In this chapter, we'll explore the issues with using HTTP to +build this sort of interaction, and then we'll build a \emph{web +framework} in Common Lisp that allows us to solve similar problems in +the future. + +\aosasecti{The Basics of HTTP Servers}\label{the-basics-of-http-servers} + +At the simplest level, an HTTP exchange is a single request followed by +a single response. A \emph{client} sends a request, which includes a +resource identifier, an HTTP version tag, some headers and some +parameters. The \emph{server} parses that request, figures out what to +do about it, and sends a response which includes the same HTTP version +tag, a response code, some headers and a request body. (For more on +this, see \aosachapref{s:web-server}.) + +Notice that, in this description, the server responds to a request from +a specific client. In our case, we want each player to be updated about +\emph{any} moves as soon as they happen, rather than only getting +notifications when their own move is made. This means we need the server +to \emph{push} messages to clients without first receiving a request for +the information \footnote{One solution to this problem is to force the + clients to \emph{poll} the server. That is, each client would + periodically send the server a request asking if anything has changed. + This can work for simple applications, but in this chapter we're going + to focus on the solutions available to you when this model stops + working.}. + +There are several standard approaches to enabling server push over HTTP. + +\aosasectii{Comet/Longpoll}\label{cometlongpoll} + +The ``longpoll'' technique has the client send the server a new request +as soon as it receives a response. Instead of fulfilling that request +right away, the server waits on a subsequent event to respond. This is a +bit of a semantic distinction, since the client is still taking action +on the users' behalf on every update. + +\aosasectii{Server-sent Events (SSE)}\label{server-sent-events-sse} + +Server-sent events require that the client initiates a connection and +then keeps it open. The server periodically writes new data to the +connection without closing it, and the client interprets incoming new +messages as they arrive rather than waiting for the response connection +to terminate. This is a bit more efficient than the Comet/Longpoll +approach because each message doesn't have to incur the overhead of new +HTTP headers. + +\aosasectii{Websockets}\label{websockets} + +Websockets are a communication protocol built on top of HTTP. The server +and client open up an HTTP conversation, then perform a handshake and +protocol escalation. The end result is that they're still communicating +over TCP/IP, but they're not using HTTP to do it at all. The advantage +this has over SSEs is that you can customize the protocol for +efficiency. + +\aosasectii{Long-Lived Connections}\label{long-lived-connections} + +These three approaches are quite different from one another, but they +all share an important characteristic: They all depend on long-lived +connections. Longpolling depends on the server keeping requests around +until new data is available, SSEs keep an open stream between client and +server to which data is periodically written, and Websockets change the +protocol a particular connection is speaking, but leave it open. + +To see why this might cause problems for your average HTTP server, let's +consider how the underlying implementation might work. + +\aosasectii{Traditional HTTP Server +Architecture}\label{traditional-http-server-architecture} + +\label{sec.eventsweb.serverarch} + +A single HTTP server processes many requests concurrently. Historically, +many HTTP servers have used a \emph{thread-per-request} architecture. +That is, for each incoming request, the server creates a thread to do +the work necessary to respond. + +Since each of these connections is intended to be short-lived, we don't +need many threads executing in parallel to handle them all. This model +also simplifies the \emph{implementation} of the server by enabling the +server programmer to write code as if there were only one connection +being handled at at any given time. It also gives us the freedom to +clean up failed or ``zombie'' connections and their associated resources +by killing the corresponding thread and letting the garbage collector do +its job. + +The key observation is that an HTTP server hosting a ``traditional'' web +application that has $N$ concurrent users might only need to handle a +very small fraction of $N$ requests \emph{in parallel} to succeed. For +the type of interactive application that we are trying to build in our +initial problem statement, $N$ users of our application will almost +certainly require the application to maintain at least $N$ connections +in parallel at once. + +The consequence of keeping long-lived connections around is that we're +going to want either: + +\begin{aosaitemize} + +\item + A platform where threads are ``cheap'' enough that we can use large + numbers of them at once +\item + A server architecture that can handle many connections with a single + thread +\end{aosaitemize} + +There are programming environments such as +\href{http://racket-lang.org/}{Racket}, +\href{http://www.erlang.org/}{Erlang}, and +\href{http://hackage.haskell.org/package/base-4.7.0.1/docs/Control-Concurrent.html}{Haskell} +that do provide thread-like constructs that are ``lightweight'' enough +to consider the first option. This approach requires the programmer to +explicitly deal with synchronization issues, which are going to be much +more prevalent in a system where connections are open for a long time +and likely all competing for similar resources. Specifically, if we have +some sort of central data shared by several users simltaneously, we will +need to coordinate reads and writes of that data in some way. + +If we don't have cheap threads at our disposal or we are unwilling to +work with explicit synchronization, we must consider having a single +thread handle many connections\footnote{We could consider a more general + system that handles $N$ concurrent users with $M$ threads for some + configurable value of $M$; in this model, the $N$ connections are said + to be \emph{multiplexed} across the $M$ threads. In this chapter, we + are going to focus on writing a program where $M$ is fixed at 1; + however, the lessons learned here should be partially applicable to + the more general model.}. In this model, our single thread is going to +be handling tiny ``slices'' of many requests all at once, switching +between them as efficiently as it possibly can. This system architecture +pattern is most commonly referred to as \emph{event-driven} or +\emph{event-based}\footnote{This nomenclature is a bit confusing, and + has its origin in early operating-systems research. It refers to how + communication is done between multiple concurrent processes. In a + thread-based system, communication is done through a synchronized + resource such as shared memory. In an event-based system, processes + generally communicate through a queue where they post items that + describe what they have done or what they want done, which is + maintained by our single thread of execution. Since these items + generally describe desired or past actions, they are referred to as + `events'.}. + +Since we are only managing a single thread, we don't have to worry as +much about protecting shared resources from simultaneous access. +However, we do have a unique problem of our own in this model. Since our +single thread is working on all in-flight requests at once, we must make +sure that it \textbf{never blocks}. Blocking on any connection blocks +the entire server from making progress on any other request. We have to +be able to move on to another client if the current one can't be +serviced further, and we need to be able to do so in a manner that +doesn't throw out all of the work done so far\footnote{See + \aosachapref{s:crawler} for another take on this problem.}. + +While it is uncommon for a programmer to explicitly tell a thread to +stop working, many common operations carry a risk of blocking. Because +threads are so prevalent and reasoning about asychronousity is a heavy +burden on the programmer, many languages and their frameworks assume +that blocking on IO is a desirable property. This makes it very easy to +block somewhere \emph{by accident}. Luckily, Common Lisp does provide us +with a minimal set of asynchronous IO primitives which we can build on +top of. + +\aosasectii{Architectural decisions}\label{architectural-decisions} + +Now that we've studied the background of this problem, we've arrived at +the point where we need to make informed decisions about \emph{what} we +are building. + +At the time I started thinking about this project, Common Lisp didn't +have a complete green-thread implementation, and the +\href{http://common-lisp.net/project/bordeaux-threads/}{standard +portable threading library} doesn't qualify as ``really REALLY cheap''. +The options amounted to either picking a different language, or building +an event-driven web server for my purpose. I chose the latter. + +In addition to the server architecture, we also need to choose which of +the 3 server-push approaches to use. The use-case we are considering (an +interactive multiplayer board game) requires frequent updates to each +client, but relatively sparse requests \emph{from} each client, which +fits the SSE approach to pushing updates, so we'll go with this. + +Now that we've motivated our architectural decision and decided on a +mechanism for simulating bidirectional communication between clients and +server, let's get started on building our web framework. We'll start by +building a relatively ``dumb'' server first, and then we'll extend it +into a web-application framework that lets us focus on \emph{what} our +heavily-interactive program needs to do, and not \emph{how} it is doing +it. + +\aosasecti{Building an Event-Driven Web +Server}\label{building-an-event-driven-web-server} + +\aosasectii{The Event Loop}\label{the-event-loop} + +The core of every event-driven program is the \emph{event loop}, which +looks something like this: + +\begin{verbatim} +(defmethod start ((port integer)) + (let ((server (socket-listen + usocket:*wildcard-host* port + :reuse-address t + :element-type 'octet)) + (conns (make-hash-table))) + (unwind-protect + (loop (loop for ready + in (wait-for-input + (cons server (alexandria:hash-table-keys conns)) + :ready-only t) + do (process-ready ready conns))) + (loop for c being the hash-keys of conns + do (loop while (socket-close c))) + (loop while (socket-close server))))) +\end{verbatim} + +In this loop, we have: + +\begin{aosaitemize} + +\item + a server socket that listens for incoming connections; +\item + a structure to store connections/buffers; +\item + an infinite loop waiting for new handshakes or incoming data on an + existing connection; +\item + cleanup clauses to prevent dangling sockets that are unexpectedly + killed (e.g.~by an interrupt) +\end{aosaitemize} + +If you haven't written a Common Lisp program before, this code block +requires some explanation. What we have written here is a \emph{method +definition}. While Lisp is popularly known as a functional language, it +also has its own system for object-oriented programming called ``The +Common Lisp Object System'', which is usually abbreviated as +``CLOS''\footnote{Pronounced ``kloss'', ``see-loss'' or ``see-lows'', + depending on who you talk to.}. + +\aosasectii{CLOS and Generic +Functions}\label{clos-and-generic-functions} + +In CLOS, instead of focusing on classes and methods, we instead write +\href{http://www.gigamonkeys.com/book/object-reorientation-generic-functions.html}{\emph{generic +functions}} that are implemented as collections of \emph{methods}. In +this model, methods don't \emph{belong} to classes, they +\emph{specialize on} types\footnote{The Julia programming language takes + a similar approach to object-oriented programming; you can learn more + about it in \aosachapref{s:static-analysis}.}. The \texttt{start} +method we just wrote is a unary method where the argument \texttt{port} +is \emph{specialized on} the type \texttt{integer}. This means that we +could have several implementations of \texttt{start} where \texttt{port} +varies in type, and the runtime will select which implementation to use +depending on the type of \texttt{port} when \texttt{start} is called. + +More generally, methods can specialize on more than one argument. When a +\texttt{method} is called, the runtime: + +\begin{aosaitemize} + +\item + dispatches on the type of its arguments to figure out which method + body should be run, and +\item + runs the appropriate function. +\end{aosaitemize} + +\aosasectii{Processing sockets}\label{processing-sockets} + +We'll see another generic function at work in \texttt{process-ready}, +which was called earlier from our event loop. It processes a ready +socket with one of two methods, depending on the type of socket we are +handling. + +The two types we're concerned with are the \texttt{stream-usocket}, +which represents a client socket that will make a request and expect to +be sent some data back, and the \texttt{stream-server-usocket}, which +represents our local TCP listener that will have new client connections +for us to deal with. + +If a \texttt{stream-server-socket} is \texttt{ready}, that means there's +a new client socket waiting to start a conversation. We call +\texttt{socket-accept} to accept the connection, and then put the result +in our connection table so that our event loop can begin processing it +with the others. + +\begin{verbatim} +(defmethod process-ready ((ready stream-server-usocket) (conns hash-table)) + (setf (gethash (socket-accept ready :element-type 'octet) conns) nil)) +\end{verbatim} + +When a \texttt{stream-usocket} is \texttt{ready}, that means that it has +some bytes ready for us to read. (It's also possible that the other +party has terminated the connection.) + +\begin{verbatim} +(defmethod process-ready ((ready stream-usocket) (conns hash-table)) + (let ((buf (or (gethash ready conns) + (setf (gethash ready conns) + (make-instance 'buffer :bi-stream (flex-stream ready)))))) + (if (eq :eof (buffer! buf)) + (ignore-errors + (remhash ready conns) + (socket-close ready)) + (let ((too-big? + (> (total-buffered buf) + +max-request-size+)) + (too-old? + (> (- (get-universal-time) (started buf)) + +max-request-age+)) + (too-needy? + (> (tries buf) + +max-buffer-tries+))) + (cond (too-big? + (error! +413+ ready) + (remhash ready conns)) + ((or too-old? too-needy?) + (error! +400+ ready) + (remhash ready conns)) + ((and (request buf) (zerop (expecting buf))) + (remhash ready conns) + (when (contents buf) + (setf (parameters (request buf)) + (nconc (parse buf) (parameters (request buf))))) + (handler-case + (handle-request ready (request buf)) + (http-assertion-error () (error! +400+ ready)) + ((and (not warning) + (not simple-error)) (e) + (error! +500+ ready e)))) + (t + (setf (contents buf) nil))))))) +\end{verbatim} + +This is more involved than the first case. We: + +\begin{aosaenumerate} +\def\labelenumi{\arabic{enumi}.} + +\item + Get the buffer associated with this socket, or create it if it doesn't + exist yet; +\item + Read output into that buffer, which happens in the call to + \texttt{buffer!}; +\item + If that read got us an \texttt{:eof}, it means the other side hung up, + so we discard the socket \emph{and} its buffer; +\item + Otherwise, we check if the buffer is one of \texttt{complete?}, + \texttt{too-big?}, \texttt{too-old?} or \texttt{too-needy?}. If it's + any of them, we remove it from the connections table and return the + appropriate HTTP response. +\end{aosaenumerate} + +This is the first time we're seeing I/O in our event loop. In our +discussion in \aosasecref{sec.eventsweb.serverarch}, we mentioned that +we have to be very careful about I/O in an event-driven system, because +we could accidentally block our single thread. So, what do we do here to +ensure that this doesn't happen? We have to explore our implementation +of \texttt{buffer!} to find out exactly how this works. + +\aosasectii{Processing Connections Without +Blocking}\label{processing-connections-without-blocking} + +The basis of our approach to processing connections without blocking is +the library function +\href{http://clhs.lisp.se/Body/f_rd_c_1.htm}{\texttt{read-char-no-hang}}, +which immediately returns \texttt{nil} when called on a stream that has +no available data. Where there is data to be read, we use a buffer to +store intermediate input for this connection. + +\begin{verbatim} +(defmethod buffer! ((buffer buffer)) + (handler-case + (let ((stream (bi-stream buffer))) + (incf (tries buffer)) + (loop for char = (read-char-no-hang stream) until (null char) + do (push char (contents buffer)) + do (incf (total-buffered buffer)) + when (request buffer) do (decf (expecting buffer)) + when (line-terminated? (contents buffer)) + do (multiple-value-bind (parsed expecting) (parse buffer) + (setf (request buffer) parsed + (expecting buffer) expecting) + (return char)) + when (> (total-buffered buffer) +max-request-size+) return char + finally (return char))) + (error () :eof))) +\end{verbatim} + +When \texttt{buffer!} is called on a \texttt{buffer}, it: - increments +the \texttt{tries} count, so that we can evict ``needy'' buffers in +\texttt{process-ready}; - loops to read characters from the input +stream, and - returns the last character it read if it has read all of +the available input. - It also tracks any +\texttt{\textbackslash{}r\textbackslash{}n\textbackslash{}r\textbackslash{}n} +sequences so that we can later detect complete requests. - Finally, any +error results it returns an \texttt{:eof} to signal that +\texttt{process-ready} should discard this particular connection. + +The \texttt{buffer} type is a CLOS +\href{http://www.gigamonkeys.com/book/object-reorientation-classes.html}{\emph{class}}. +Classes in CLOS let us define a type with fields called \texttt{slots}. +We don't see the behaviours associated with \texttt{buffer} on the class +definition, because (as we've already learned), we do that using generic +functions like \texttt{buffer!}. + +\texttt{defclass} does allow us to specify getters/setters +(\texttt{reader}s/\texttt{accessor}s), and slot initializers; +\texttt{:initform} specifies a default value, while \texttt{:initarg} +identifies a hook that the caller of \texttt{make-instance} can use to +provide a default value. + +\begin{verbatim} +(defclass buffer () + ((tries :accessor tries :initform 0) + (contents :accessor contents :initform nil) + (bi-stream :reader bi-stream :initarg :bi-stream) + (total-buffered :accessor total-buffered :initform 0) + (started :reader started :initform (get-universal-time)) + (request :accessor request :initform nil) + (expecting :accessor expecting :initform 0))) +\end{verbatim} + +Our \texttt{buffer} class has seven slots: + +\begin{aosaitemize} + +\item + \texttt{tries}, which keeps count of how many times we've tried + reading into this buffer +\item + \texttt{contents}, which contains what we've read so far +\item + \texttt{bi-stream}, which a hack around some of those Common + Lisp-specific, non-blocking-IO annoyances I mentioned earlier +\item + \texttt{total-buffered}, which is a count of chars we've read so far +\item + \texttt{started}, which is a timestamp that tells us when we created + this buffer +\item + \texttt{request}, which will eventually contain the request we + construct from buffered data +\item + \texttt{expecting}, which will signal how many more chars we're + expecting (if any) after we buffer the request headers +\end{aosaitemize} + +\aosasectii{Interpreting Requests}\label{interpreting-requests} + +\label{sec.eventsweb.handlerfunc} Now that we've seen how we +incrementally assemble full requests from bits of data that are pooled +into our buffers, what happens when we have a full request ready for +handling? This happens in the method \texttt{handle-request}. + +\begin{verbatim} +(defmethod handle-request ((socket usocket) (req request)) + (aif (lookup (resource req) *handlers*) + (funcall it socket (parameters req)) + (error! +404+ socket))) +\end{verbatim} + +This method adds another layer of error handling so that if the request +is old, big, or needy, we can send a \texttt{400} response to indicate +that the client provided us with some bad or slow data. However, if any +\emph{other} error happens here, it's because the programer made a +mistake defining a \emph{handler}, which should be treated as a +\texttt{500} error. This will inform the client that something went +wrong on the server a result of their legitimate request. + +If the request is well-formed, we do the tiny and obvious job of looking +up the requested resource in the \texttt{*handlers*} table. If we find +one, we \texttt{funcall} \texttt{it}, passing along the client +\texttt{socket} as well as the parsed request parameters. If there's no +matching handler in the \texttt{*handlers*} table, we instead send along +a \texttt{404} error. The handler system will be part of our +full-fledged \emph{web framework}, which we'll discuss in a later +section. + +We still haven't seen how requests are parsed and interpreted from one +of our buffers, though. Let's look at that next: + +\begin{verbatim} +(defmethod parse ((buf buffer)) + (let ((str (coerce (reverse (contents buf)) 'string))) + (if (request buf) + (parse-params str) + (parse str)))) +\end{verbatim} + +This high-level method delegates to a specialization of \texttt{parse} +that works with plain strings or to \texttt{parse-params} that +interprets the buffer contents as HTTP parameters. These are called +depending on how much of the request we've already processed; the final +\texttt{parse} happens when we already have a partial \texttt{request} +saved in the given \texttt{buffer}, at which point we're only looking to +parse the request body. + +\begin{verbatim} +(defmethod parse ((str string)) + (let ((lines (split "\\r?\\n" str))) + (destructuring-bind (req-type path http-version) (split " " (pop lines)) + (declare (ignore req-type)) + (assert-http (string= http-version "HTTP/1.1")) + (let* ((path-pieces (split "\\?" path)) + (resource (first path-pieces)) + (parameters (second path-pieces)) + (req (make-instance 'request :resource resource))) + (loop + for header = (pop lines) + for (name value) = (split ": " header) + until (null name) + do (push (cons (->keyword name) value) (headers req))) + (setf (parameters req) (parse-params parameters)) + req)))) + +(defmethod parse-params ((params null)) nil) + +(defmethod parse-params ((params string)) + (loop for pair in (split "&" params) + for (name val) = (split "=" pair) + collect (cons (->keyword name) (or val "")))) +\end{verbatim} + +In the \texttt{parse} method specializing on \texttt{string}, we +transform the content into usable pieces. We do this on strings instead +of working directly with buffers because this makes it easier to test +the actual parsing code in an environment like an interpreter or REPL. + +The parsing process is: + +\begin{aosaenumerate} +\def\labelenumi{\arabic{enumi}.} + +\item + split on + \texttt{"\textbackslash{}\textbackslash{}r?\textbackslash{}\textbackslash{}n"} +\item + split the first line of that on \texttt{" "} to get the request type + (\texttt{POST}, \texttt{GET}, etc)/URI path/http-version +\item + assert that we're dealing with an \texttt{HTTP/1.1} request +\item + split the URI path on \texttt{"?"}, which gives us plain resource + separate from any potential \texttt{GET} parameters +\item + make a new \texttt{request} instance with the resource in place +\item + populate that \texttt{request} instance with each split header line +\item + set that \texttt{request}s parameters to the result of parsing our + \texttt{GET} parameters +\end{aosaenumerate} + +As you might expect by now, \texttt{request} is an instance of a CLOS +class: + +\begin{verbatim} + (defclass request () + ((resource :accessor resource :initarg :resource) + (headers :accessor headers :initarg :headers :initform nil) + (parameters :accessor parameters :initarg :parameters :initform nil))) +\end{verbatim} + +We've now seen how our clients can send requests and have them +interpreted and handled by our server. The last thing we have to +implement as part of our core server interface is the capability to +write responses back to the client. + +\aosasectii{Rendering Responses}\label{rendering-responses} + +Before we discuss rendering responses, we have to consider that there +are two kinds of responses that we may be returning to our clients. The +first is a ``normal'' HTTP response, complete with HTTP headers and +body. We represent these kinds of responses with instances of the +\texttt{response} class: + +\begin{verbatim} +(defclass response () + ((content-type + :accessor content-type :initform "text/html" :initarg :content-type) + (charset + :accessor charset :initform "utf-8") + (response-code + :accessor response-code :initform "200 OK" :initarg :response-code) + (keep-alive? + :accessor keep-alive? :initform nil :initarg :keep-alive?) + (body + :accessor body :initform nil :initarg :body))) +\end{verbatim} + +The second is an \href{http://www.w3.org/TR/eventsource/}{SSE message}, +which we will use to send an incremental update to our clients. + +\begin{verbatim} +(defclass sse () + ((id :reader id :initarg :id :initform nil) + (event :reader event :initarg :event :initform nil) + (retry :reader retry :initarg :retry :initform nil) + (data :reader data :initarg :data))) +\end{verbatim} + +We'll send an HTTP response whenever we receive a full HTTP request; +however, how do we know when and where to send SSE messages without an +originating client request? + +A simple solution is to register \emph{channels}\footnote{We're + incidentally introducing some new syntax here. This is our way of + declaring a mutable variable. It has the form + \texttt{(defparameter \textless{}name\textgreater{} \textless{}value\textgreater{} \textless{}optional docstring\textgreater{})}.}, +to which we'll subscribe \texttt{socket}s as necessary. + +\begin{verbatim} +(defparameter *channels* (make-hash-table)) + +(defmethod subscribe! ((channel symbol) (sock usocket)) + (push sock (gethash channel *channels*)) + nil) +\end{verbatim} + +We can then \texttt{publish!} notifications to said channels as soon as +they become available. + +\begin{verbatim} +(defmethod publish! ((channel symbol) (message string)) + (awhen (gethash channel *channels*) + (setf (gethash channel *channels*) + (loop with msg = (make-instance 'sse :data message) + for sock in it + when (ignore-errors + (write! msg sock) + (force-output (socket-stream sock)) + sock) + collect it)))) +\end{verbatim} + +In \texttt{publish!}, we call \texttt{write!} to actually write an +\texttt{sse} to a socket. We'll also need a specialization of +\texttt{write!} on \texttt{response}s to write full HTTP responses as +well. Let's handle the HTTP case first. + +\begin{verbatim} +(defmethod write! ((res response) (socket usocket)) + (handler-case + (with-timeout (.2) + (let ((stream (flex-stream socket))) + (flet ((write-ln (&rest sequences) + (mapc (lambda (seq) (write-sequence seq stream)) sequences) + (crlf stream))) + (write-ln "HTTP/1.1 " (response-code res)) + (write-ln + "Content-Type: " (content-type res) "; charset=" (charset res)) + (write-ln "Cache-Control: no-cache, no-store, must-revalidate") + (when (keep-alive? res) + (write-ln "Connection: keep-alive") + (write-ln "Expires: Thu, 01 Jan 1970 00:00:01 GMT")) + (awhen (body res) + (write-ln "Content-Length: " (write-to-string (length it))) + (crlf stream) + (write-ln it)) + (values)))) + (trivial-timeout:timeout-error () + (values)))) +\end{verbatim} + +This version of \texttt{write!} takes a \texttt{response} and a +\texttt{usocket} named \texttt{sock}, and writes content to a stream +provided by \texttt{sock}. We locally define the function +\texttt{write-ln} which takes some number of sequences, and writes them +out to the stream followed by a \texttt{crlf}. This is for readability; +we could instead have called \texttt{write-sequence}/\texttt{crlf} +directly. + +Note that we're doing the ``Must. Not. BLOCK.'' thing again. While +writes are likely to be buffered and are at lower risk of blocking than +reads, we still don't want our server to grind to a halt if something +goes wrong here. If the write takes more than \texttt{.2} +seconds\footnote{\texttt{with-timeout} has different implementations on + different Lisps. In some environments, it may create another thread or + process to monitor the one that invoked it. While we'd only be + creating at most one of these at a time, it is a relatively + heavyweight operation to be performing per-write. We'd potentially + want to consider an alternative approach in those environments.}, we +just move on (throwing out the current socket) rather than waiting any +longer. + +Writing an \texttt{SSE} out is conceptually similar to, but mechanically +different from writing out a \texttt{response}: + +\begin{verbatim} +(defmethod write! ((res sse) (socket usocket)) + (let ((stream (flex-stream socket))) + (handler-case + (with-timeout (.2) + (format + stream "~@[id: ~a~%~]~@[event: ~a~%~]~@[retry: ~a~%~]data: ~a~%~%" + (id res) (event res) (retry res) (data res))) + (trivial-timeout:timeout-error () + (values))))) +\end{verbatim} + +This is simpler than working with full HTTP responses since the SSE +message standard doesn't specify \texttt{CRLF} line-endings, so we can +get away with a single \texttt{format} call. The +\texttt{\textasciitilde{}@{[}...\textasciitilde{}{]}} blocks are +\emph{conditional directives}, which allow us to gracefully handle +\texttt{nil} slots. For example, if \texttt{(id res)} is non-nil, we'll +output \texttt{id: \textless{}the id here\textgreater{}}, otherwise we +will ignore the directive entirely. The payload of our incremental +update \texttt{data} is the only required slot of \texttt{sse}, so we +can include it without worrying about it being \texttt{nil}. And again, +we're not waiting around for \emph{too} long. After \texttt{.2} seconds, +we'll time out and move on to the next thing if the write hasn't +completed by then. + +\aosasectii{Error Responses}\label{error-responses} + +Our treatment of the request/response cycle so far hasn't covered what +happens when something goes wrong. Specifically, we used the +\texttt{error!} function in \texttt{handle-request} and +\texttt{process-ready} without describing what it does. + +\begin{verbatim} +(define-condition http-assertion-error (error) + ((assertion :initarg :assertion :initform nil :reader assertion)) + (:report (lambda (condition stream) + (format stream "Failed assertions '~s'" + (assertion condition))))) +\end{verbatim} + +\texttt{define-condition} creates new error classes in Common Lisp. In +this case, we are defining an HTTP assertion error, and stating that it +will specifically need to know the actual assertion it's acting on, and +a way to output itself to a stream. In other languages, you'd call this +a method. Here, it's a function that happens to be the slot value of a +class. + +How do we represent errors to the client? Let's define the \texttt{4xx} +and \texttt{5xx}-class HTTP errors that we'll be using often: + +\begin{verbatim} +(defparameter +404+ + (make-instance + 'response :response-code "404 Not Found" + :content-type "text/plain" + :body "Resource not found...")) + +(defparameter +400+ + (make-instance + 'response :response-code "400 Bad Request" + :content-type "text/plain" + :body "Malformed, or slow HTTP request...")) + +(defparameter +413+ + (make-instance + 'response :response-code "413 Request Entity Too Large" + :content-type "text/plain" + :body "Your request is too long...")) + +(defparameter +500+ + (make-instance + 'response :response-code "500 Internal Server Error" + :content-type "text/plain" + :body "Something went wrong on our end...")) +\end{verbatim} + +Now we can see what \texttt{error!} does: + +\begin{verbatim} +(defmethod error! ((err response) (sock usocket) &optional instance) + (declare (ignorable instance)) + (ignore-errors + (write! err sock) + (socket-close sock))) +\end{verbatim} + +It takes an error response and a socket, writes the response to the +socket and closes it (ignoring errors, in case the other end has already +disconnected). The \texttt{instance} argument here is for +logging/debugging purposes. + +And with that, we have an event-driven web server that can respond to +HTTP requests or send SSE messages, complete with error handling! + +\aosasecti{Extending the Server Into a Web +Framework}\label{extending-the-server-into-a-web-framework} + +We have now built a reasonably functional web server that will move +requests, responses, and messages to and from clients. The actual work +of any web application hosted by this server is done by delegating to +handler functions, which were introduced in +\aosasecref{sec.eventsweb.handlerfunc} but left underspecified there. + +The interface between our server and the hosted application is an +important one, because it dictates how easily application programmers +can work with our infrastructure. Ideally, our handler interface would +map parameters from a request to a function that does the real work: + +\begin{verbatim} +(define-handler (source :is-stream? nil) (room) + (subscribe! (intern room :keyword) sock)) + +(define-handler (send-message) (room name message) + (publish! (intern room :keyword) + (encode-json-to-string + `((:name . ,name) (:message . ,message))))) + +(define-handler (index) () + (with-html-output-to-string (s nil :prologue t :indent t) + (:html + (:head (:script + :type "text/javascript" + :src "/static/js/interface.js")) + (:body (:div :id "messages") + (:textarea :id "input") + (:button :id "send" "Send"))))) +\end{verbatim} + +One of the concerns I had in mind when writing House was that, like any +application open to the greater internet, it would be processing +requests from untrusted clients. It would be nice to be able to say +specifically what \emph{type} of data each request should contain by +providing a small \emph{schema} that describes the data. Our previous +list of handlers would then look like this: + +\begin{verbatim} +(defun len-between (min thing max) + (>= max (length thing) min)) + +(define-handler (source :is-stream? nil) + ((room :string (len-between 0 room 16))) + (subscribe! (intern room :keyword) sock)) + +(define-handler (send-message) + ((room :string (len-between 0 room 16)) + (name :string (len-between 1 name 64)) + (message :string (len-between 5 message 256))) + (publish! (intern room :keyword) + (encode-json-to-string + `((:name . ,name) (:message . ,message))))) + +(define-handler (index) () + (with-html-output-to-string (s nil :prologue t :indent t) + (:html + (:head (:script + :type "text/javascript" + :src "/static/js/interface.js")) + (:body (:div :id "messages") + (:textarea :id "input") + (:button :id "send" "Send"))))) +\end{verbatim} + +While we are still working with Lisp code, this interface is starting to +look almost like a \emph{declarative language}, in which we state +\emph{what} we want our handlers to validate without thinking too much +about \emph{how} they are going to do it. What we are doing is building +a \emph{domain-specific language} (DSL) for handler functions; that is, +we are creating a specific convention and syntax that allows us to +concisely express exactly what we want our handlers to validate. This +approach of building a small language to solve your problem at hand is +frequently used by Lisp programmers, and it is a useful technique that +can be used in other programming languages. + +\aosasectii{A DSL for Handlers}\label{a-dsl-for-handlers} + +Now that we have a loose specification for how we want our handler DSL +to look, how do we implement it? That is, what specifically do we expect +to happen when we call \texttt{define-handler}? Let's consider the +definition for \texttt{send-message} from above: + +\begin{verbatim} +(define-handler (send-message) + ((room :string (len-between 0 room 16)) + (name :string (len-between 1 name 64)) + (message :string (len-between 5 message 256))) + (publish! (intern room :keyword) + (encode-json-to-string + `((:name . ,name) (:message . ,message))))) +\end{verbatim} + +What we would like \texttt{define-handler} to do here is: + +\begin{aosaenumerate} +\def\labelenumi{\arabic{enumi}.} + +\item + Bind the action \texttt{(publish! ...)} to the URI + \texttt{/send-message} in the handlers table. +\item + When a request to this URI is made, ensure that the HTTP parameters + \texttt{room}, \texttt{name} and \texttt{message} were included, and +\item + validate that \texttt{room} is a string no longer than 16 characters, + \texttt{name} is a string of between 1 and 64 characters (inclusive) + and finally that \texttt{message} is a string of between 5 and 256 + characters (also inclusive). +\item + After the response has been returned, close the channel. +\end{aosaenumerate} + +While we could write Lisp functions to do all of these things, and then +manually assemble the pieces ourselves, a more common approach is to use +a Lisp facility called \texttt{macros} to \emph{generate} the Lisp code +for us. This allows us to concisely express what we want our DSL to do, +without having to maintain a lot of code to do it. You can think of a +macro as an ``executable template'' that will be expanded into Lisp code +at runtime. + +Here's our \texttt{define-handler} macro\footnote{I should note, the + below code-block is VERY unconventional indentation for Common Lisp. + Arglists are typically not broken up over multiple lines, and are + usually kept on the same line as the macro/function name. I had to do + it to stick to the line-width guidelines for this book, but would + otherwise prefer to have longer lines that break naturally at places + dictated by the content of the code.}: + +\begin{verbatim} +(defmacro define-handler + ((name &key (is-stream? t) (content-type "text/html")) (&rest args) + &body body) + (if is-stream? + `(bind-handler + ,name (make-closing-handler + (:content-type ,content-type) + ,args ,@body)) + `(bind-handler + ,name (make-stream-handler ,args ,@body)))) +\end{verbatim} + +It delegates to three other macros (\texttt{bind-handler}, +\texttt{make-closing-handler}, \texttt{make-stream-handler}) that we +will define later. \texttt{make-closing-handler} will create a handler +for a full HTTP request/response cycle; \texttt{make-stream-handler} +will instead handle an SSE message. The predicate \texttt{is-stream?} +distinguishes between these cases for us. The backtick and comma are +macro-specific operators that we can use to ``cut holes'' in our code +that will be filled out by values specified in our Lisp code when we +actually use \texttt{define-handler}. + +Notice how closely our macro conforms to our specification of what we +wanted \texttt{define-handler} to do: If we were to write a series of +Lisp functions to do all of these things, the intent of the code would +be much more difficult to discern by inspection. + +\aosasectii{Expanding a Handler}\label{expanding-a-handler} + +Let's step through the expansion for the \texttt{send-message} handler +so that we better understand what is actually going on when Lisp +``expands'' our macro for us. We'll use the macro expansion feature from +the \href{https://common-lisp.net/project/slime/}{SLIME} Emacs mode to +do this. Calling \texttt{macro-expander} on \texttt{define-handler} will +expand our macro by one ``level'', leaving our helper macros in their +still-condensed form: + +\begin{verbatim} +(BIND-HANDLER + SEND-MESSAGE + (MAKE-CLOSING-HANDLER + (:CONTENT-TYPE "text/html") + ((ROOM :STRING (LEN-BETWEEN 0 ROOM 16)) + (NAME :STRING (LEN-BETWEEN 1 NAME 64)) + (MESSAGE :STRING (LEN-BETWEEN 5 MESSAGE 256))) + (PUBLISH! (INTERN ROOM :KEYWORD) + (ENCODE-JSON-TO-STRING + `((:NAME ,@NAME) (:MESSAGE ,@MESSAGE)))))) +\end{verbatim} + +Our macro has already saved us a bit of typing by substituting our +\texttt{send-message} specific code into our handler template. +\texttt{bind-handler} is another macro which maps a URI to a handler +function on our handlers table; since it's now at the root of our +expansion, let's see how it is defined before expanding this further. + +\begin{verbatim} +(defmacro bind-handler (name handler) + (assert (symbolp name) nil "`name` must be a symbol") + (let ((uri (if (eq name 'root) "/" (format nil "/~(~a~)" name)))) + `(progn + (when (gethash ,uri *handlers*) + (warn ,(format nil "Redefining handler '~a'" uri))) + (setf (gethash ,uri *handlers*) ,handler)))) +\end{verbatim} + +The binding happens in the last line; +\texttt{(setf (gethash ,uri *handlers*) ,handler)}, which is what +hash-table assignments look like in Common Lisp (modulo the commas, +which are part of our macro.) Note that the \texttt{assert} is outside +of the quoted area, which means that it'll be run as soon as the macro +is \emph{called} rather than when its result is evaluated. + +When we further expand our expansion of the \texttt{send-message} +\texttt{define-handler} above, we get: + +\begin{verbatim} +(PROGN + (WHEN (GETHASH "/send-message" *HANDLERS*) + (WARN "Redefining handler '/send-message'")) + (SETF (GETHASH "/send-message" *HANDLERS*) + (MAKE-CLOSING-HANDLER + (:CONTENT-TYPE "text/html") + ((ROOM :STRING (LEN-BETWEEN 0 ROOM 16)) + (NAME :STRING (LEN-BETWEEN 1 NAME 64)) + (MESSAGE :STRING (LEN-BETWEEN 5 MESSAGE 256))) + (PUBLISH! (INTERN ROOM :KEYWORD) + (ENCODE-JSON-TO-STRING + `((:NAME ,@NAME) (:MESSAGE ,@MESSAGE))))))) +\end{verbatim} + +This is starting to look more like a custom implementation of what we +would have written to marshal a request from a URI to a handler function +had we written it all ourselves. But we didn't have to! + +We still have \texttt{make-closing-handler} left to go in our expansion. +Here is its definition: + +\begin{verbatim} +(defmacro make-closing-handler + ((&key (content-type "text/html")) (&rest args) &body body) + `(lambda (sock parameters) + (declare (ignorable parameters)) + ,(arguments + args + `(let ((res (make-instance + 'response + :content-type ,content-type + :body (progn ,@body)))) + (write! res sock) + (socket-close sock))))) +\end{verbatim} + +So making a closing-handler involves making a \texttt{lambda}, which is +just what you call anonymous functions in Common Lisp. We also set up an +interior scope that makes a \texttt{response} out of the \texttt{body} +argument we're passing in, \texttt{write!}s that to the requesting +socket, then closes it. The remaining question is, what is +\texttt{arguments}? + +\begin{verbatim} +(defun arguments (args body) + (loop with res = body + for arg in args + do (match arg + ((guard arg-sym (symbolp arg-sym)) + (setf res `(let ((,arg-sym ,(arg-exp arg-sym))) ,res))) + ((list* arg-sym type restrictions) + (setf res + (let ((sym (or (type-expression + (arg-exp arg-sym) + type restrictions) + (arg-exp arg-sym)))) + `(let ((,arg-sym ,sym)) + ,@(awhen (type-assertion arg-sym type restrictions) + `((assert-http ,it))) + ,res))))) + finally (return res))) +\end{verbatim} + +Welcome to the hard part. \texttt{arguments} turns the validators we +registered with our handler into a tree of parse attempts and +assertions. \texttt{type-expression}, \texttt{arg-exp}, and +\texttt{type-assertion} are used to implement and enforce a ``type +system'' for the kinds of data we're expecting in our responses; we'll +discuss them in \aosasecref{sec.eventsweb.types}. Using this together +with \texttt{make-closing-handler} would implement the validation rules +we wrote here: + +\begin{verbatim} +(define-handler (send-message) + ((room :string (>= 16 (length room))) + (name :string (>= 64 (length name) 1)) + (message :string (>= 256 (length message) 5))) + (publish! (intern room :keyword) + (encode-json-to-string + `((:name . ,name) (:message . ,message))))) +\end{verbatim} + +into an ``unrolled'' sequence of checks needed to validate the request: + +\begin{verbatim} +(LAMBDA (SOCK #:COOKIE?1111 SESSION PARAMETERS) + (DECLARE (IGNORABLE SESSION PARAMETERS)) + (LET ((ROOM (AIF (CDR (ASSOC :ROOM PARAMETERS)) + (URI-DECODE IT) + (ERROR (MAKE-INSTANCE + 'HTTP-ASSERTION-ERROR + :ASSERTION 'ROOM))))) + (ASSERT-HTTP (>= 16 (LENGTH ROOM))) + (LET ((NAME (AIF (CDR (ASSOC :NAME PARAMETERS)) + (URI-DECODE IT) + (ERROR (MAKE-INSTANCE + 'HTTP-ASSERTION-ERROR + :ASSERTION 'NAME))))) + (ASSERT-HTTP (>= 64 (LENGTH NAME) 1)) + (LET ((MESSAGE (AIF (CDR (ASSOC :MESSAGE PARAMETERS)) + (URI-DECODE IT) + (ERROR (MAKE-INSTANCE + 'HTTP-ASSERTION-ERROR + :ASSERTION 'MESSAGE))))) + (ASSERT-HTTP (>= 256 (LENGTH MESSAGE) 5)) + (LET ((RES (MAKE-INSTANCE + 'RESPONSE :CONTENT-TYPE "text/html" + :COOKIE (UNLESS #:COOKIE?1111 + (TOKEN SESSION)) + :BODY (PROGN + (PUBLISH! + (INTERN ROOM :KEYWORD) + (ENCODE-JSON-TO-STRING + `((:NAME ,@NAME) + (:MESSAGE ,@MESSAGE)))))))) + (WRITE! RES SOCK) + (SOCKET-CLOSE SOCK)))))) +\end{verbatim} + +This gets us the validation we need for full HTTP request/response +cycles. What about our SSEs? \texttt{make-stream-handler} does the same +basic thing as \texttt{make-closing-handler}, except that it writes an +\texttt{SSE} rather than a \texttt{RESPONSE}, and it calls +\texttt{force-output} instead of \texttt{socket-close} because we want +to flush data over the connection without closing it: + +\begin{verbatim} +(defmacro make-stream-handler ((&rest args) &body body) + `(lambda (sock parameters) + (declare (ignorable parameters)) + ,(arguments + args + `(let ((res (progn ,@body))) + (write! (make-instance + 'response + :keep-alive? t + :content-type "text/event-stream") + sock) + (write! + (make-instance 'sse :data (or res "Listening...")) + sock) + (force-output + (socket-stream sock)))))) + +(defmacro assert-http (assertion) + `(unless ,assertion + (error (make-instance + 'http-assertion-error + :assertion ',assertion)))) +\end{verbatim} + +\texttt{assert-http} is a macro that creates the boilerplate code we +need in error cases. It expands into a check of the given assertion, +throws an \texttt{http-assertion-error} if it fails, and packs the +original assertion along in that event. + +\begin{verbatim} +(defmacro assert-http (assertion) + `(unless ,assertion + (error (make-instance + 'http-assertion-error + :assertion ',assertion)))) +\end{verbatim} + +\aosasectii{HTTP ``Types''}\label{http-types} + +\label{sec.eventsweb.types} + +In the previous section, we briefly touched on three expressions that +we're using to implement our HTTP type validation system: +\texttt{arg-exp}, \texttt{type-expression} and \texttt{type-assertion}. +Once you understand those, there will be no magic left in our framework. +We'll start with the easy one first. + +\aosasectiii{arg-exp}\label{arg-exp} + +\texttt{arg-exp} takes an argument symbol and creates an \texttt{aif} +expression that checks for the presence of a parameter. + +\begin{verbatim} +(defun arg-exp (arg-sym) + `(aif (cdr (assoc ,(->keyword arg-sym) parameters)) + (uri-decode it) + (error (make-instance + 'http-assertion-error + :assertion ',arg-sym)))) +\end{verbatim} + +Evaluating \texttt{arg-exp} on a symbol looks like: + +\begin{verbatim} +HOUSE> (arg-exp 'room) +(AIF (CDR (ASSOC :ROOM PARAMETERS)) + (URI-DECODE IT) + (ERROR (MAKE-INSTANCE + 'HTTP-ASSERTION-ERROR + :ASSERTION 'ROOM))) +HOUSE> +\end{verbatim} + +We've been using forms like \texttt{aif} and \texttt{awhen} without +understanding how they work, so let's take some time to explore them +now. + +Recall that Lisp code is itself represented as a tree. That's what the +parentheses are for; they show us how leaves and branches fit together. +If we step back to what we were doing in the previous section, +\texttt{make-closing-handler} calls a function called \texttt{arguments} +to generate part of the Lisp tree its constructing, which in turn calls +some tree-manipulating helper functions, including \texttt{arg-exp}, to +generate its return value. + +That is, we've built a small system that takes a Lisp expression as +input, produces a different Lisp expression as output. Possibly the +simplest way of conceptualizing this is as a simple Common Lisp to +Common Lisp compiler that is specialized to the problem at hand. + +A widely used classification of such compilers are called +\emph{anaphoric macros}. This term comes from the linguistic concept of +an \emph{anaphor}, which is the use of one word as a substitute for a +group of words that preceded it. \texttt{aif} and \texttt{awhen} are +anaphoric macros, and they're the only ones that I tend to often use. +There are many more availabile in the +\href{http://www.cliki.net/Anaphora}{\texttt{anaphora} package}. + +As far as I know, anaphoric macros were first defined by Paul Graham in +an \href{http://dunsmor.com/lisp/onlisp/onlisp_18.html}{OnLisp chapter}. +The use case he gives is a situation where you want to do some sort of +expensive or semi-expensive check, then do something conditionally on +the result. In the above context, we're using \texttt{aif} to do a check +the result of an \texttt{alist} traversal. + +\begin{verbatim} +(aif (cdr (assoc :room parameters)) + (uri-decode it) + (error (make-instance + 'http-assertion-error + :assertion 'room))) +\end{verbatim} + +This takes the \texttt{cdr} of looking up the symbol \texttt{:room} in +the association list \texttt{parameters}. If that returns a non-nil +value, \texttt{uri-decode} it, otherwise throw an error of the type +\texttt{http-assertion-error}. + +In other words, the above is equivalent to + +\begin{verbatim} +(let ((it (cdr (assoc :room parameters)))) + (if it + (uri-decode it) + (error (make-instance + 'http-assertion-error + :assertion 'room)))) +\end{verbatim} + +Strongly-typed functional languages like Haskell often use a +\texttt{Maybe} type in this situation. In Common Lisp, we capture the +symbol \texttt{it} in the expansion as the name for the result of the +check. + +Understanding this, we should be able to see that \texttt{arg-exp} is +generating a specific, repetitive, piece of the code tree that we +eventually want to evaluate. In this case, the piece that checks for the +presence of the given parameter among the handlers' \texttt{parameters}. +Now, let's move onto\ldots{} + +\aosasectiii{type-expression}\label{type-expression} + +\begin{verbatim} +(defgeneric type-expression (parameter type) + (:documentation + "A type-expression will tell the server +how to convert a parameter from a string to +a particular, necessary type.")) +... +(defmethod type-expression (parameter type) nil) +\end{verbatim} + +This is a generic function that generates new tree structures +(coincidentally Lisp code), rather than just a function. The only thing +the above tells you is that by default, a \texttt{type-expression} is +\texttt{NIL}. Which is to say, we don't have one. If we encounter a +\texttt{NIL}, we use the raw output of \texttt{arg-exp}, but that +doesn't tell us much about the most common case. To see that, lets take +a look at a built-in (to \texttt{:house}) \texttt{define-http-type} +expression. + +\begin{verbatim} +(define-http-type (:integer) + :type-expression `(parse-integer ,parameter :junk-allowed t) + :type-assertion `(numberp ,parameter)) +\end{verbatim} + +An \texttt{:integer} is something we're making from a \texttt{parameter} +by using \texttt{(parse-integer parameter :junk-allowed t)}. +\texttt{junk-allowed} tells \texttt{parse-integer} that we're not +confident the data we're giving it is actually parseable, so we need to +make sure that the returned result is an integer. If it isn't, we get +this behaviour: + +\begin{verbatim} +HOUSE> (type-expression 'blah :integer) +(PARSE-INTEGER BLAH :JUNK-ALLOWED T) +HOUSE> +\end{verbatim} + +\texttt{define-http-handler}\footnote{This macro is difficult to read + because it tries hard to make its output human-readable, by expanding + \texttt{NIL}s away using \texttt{,@} where possible.} is one of the +exported symbols for our framework. This lets our application +programmers define their own types to simplify parsing above the handful +of ``builtins'' that we give them (\texttt{:string}, \texttt{:integer}, +\texttt{:keyword}, \texttt{:json}, \texttt{:list-of-keyword} and +\texttt{:list-of-integer}.) + +\begin{verbatim} +(defmacro define-http-type ((type) &key type-expression type-assertion) + (with-gensyms (tp) + `(let ((,tp ,type)) + ,@(when type-expression + `((defmethod type-expression (parameter (type (eql ,tp))) + ,type-expression))) + ,@(when type-assertion + `((defmethod type-assertion (parameter (type (eql ,tp))) + ,type-assertion)))))) +\end{verbatim} + +It works by creating \texttt{type-expression} and +\texttt{type-assertion} method definitions for the type being defined. +We could let users of our framewokr do this manually without much +trouble; however, adding this extra level of indirection gives us, the +framework programmers, the freedom to change \emph{how} types are +implemented without forcing our users to re-write their specifications. +This isn't just an academic consideration; I've personally made radical +changes to this part of the system when first building it, and was +pleased to find that I had to make very few edits to the applications +that depended on it. + +Lets take a look at the expansion of that integer definition to see how +it works in detail: + +\begin{verbatim} +(LET ((#:TP1288 :INTEGER)) + (DEFMETHOD TYPE-EXPRESSION (PARAMETER (TYPE (EQL #:TP1288))) + `(PARSE-INTEGER ,PARAMETER :JUNK-ALLOWED T)) + (DEFMETHOD TYPE-ASSERTION (PARAMETER (TYPE (EQL #:TP1288))) + `(NUMBERP ,PARAMETER))) +\end{verbatim} + +As we said, it doesn't reduce code size by much, but it does prevent us +from needing to care what the specific parameters of those methods are, +or even that they're methods at all. + +\aosasectiii{type-assertion}\label{type-assertion} + +Now that we can define types, let's look at how we use +\texttt{type-assertion} to validate that a parse satisfies our +requirements. It, too, takes the form of a complementary +\texttt{defgeneric}/\texttt{defmethod} pair just like +\texttt{type-expression}: + +\begin{verbatim} +(defgeneric type-assertion (parameter type) + (:documentation + "A lookup assertion is run on a parameter +immediately after conversion. Use it to restrict + the space of a particular parameter.")) +... +(defmethod type-assertion (parameter type) nil) +\end{verbatim} + +Here's what this one outputs + +\begin{verbatim} +HOUSE> (type-assertion 'blah :integer) +(NUMBERP BLAH) +HOUSE> +\end{verbatim} + +There are cases where \texttt{type-assertion} won't need to do anything. +For example, since HTTP parameters are given to us as strings, our +\texttt{:string} type assertion has nothing to validate: + +\begin{verbatim} +HOUSE> (type-assertion 'blah :string) +NIL +HOUSE> +\end{verbatim} + +\aosasectii{All Together Now}\label{all-together-now} + +We did it! We built a web framework on top of an event-driven webserver +implementation. Our framework (and handler DSL) defines new applications +by: + +\begin{aosaitemize} + +\item + Mapping URLs to handlers; +\item + Defining handlers to enforce the type safety and validation rules on + requests; +\item + Optionally specifying new types for handlers as required. +\end{aosaitemize} + +Now we can describe our application like this: + +\begin{verbatim} +(defun len-between (min thing max) + (>= max (length thing) min)) + +(define-handler (source :is-stream? nil) + ((room :string (len-between 0 room 16))) + (subscribe! (intern room :keyword) sock)) + +(define-handler (send-message) + ((room :string (len-between 0 room 16)) + (name :string (len-between 1 name 64)) + (message :string (len-between 5 message 256))) + (publish! (intern room :keyword) + (encode-json-to-string + `((:name . ,name) (:message . ,message))))) + +(define-handler (index) () + (with-html-output-to-string (s nil :prologue t :indent t) + (:html + (:head (:script + :type "text/javascript" + :src "/static/js/interface.js")) + (:body (:div :id "messages") + (:textarea :id "input") + (:button :id "send" "Send"))))) + +(start 4242) +\end{verbatim} + +Once we write \texttt{interface.js} to provide the client-side +interactivity, this will start an HTTP chat server on port \texttt{4242} +and listen for incoming connections. + +\end{aosachapter} diff --git a/tex/flow-shop.tex b/tex/flow-shop.tex index 0d70defca..c5bfc51ea 100644 --- a/tex/flow-shop.tex +++ b/tex/flow-shop.tex @@ -1,12 +1,5 @@ \begin{aosachapter}{A Flow Shop Scheduler}{s:flow-shop}{Dr. Christian Muise} -\emph{\href{http://haz.ca}{Dr.~Christian Muise} is a Research Fellow -with the \href{http://groups.csail.mit.edu/mers/}{MERS group} at -\href{http://www.csail.mit.edu/}{MIT's CSAIL}. He is interested in a -variety of topics including AI, data-driven projects, mapping, graph -theory, and data visualization, as well as celtic music, carving, -soccer, and coffee.} - \aosasecti{A Flow Shop Scheduler}\label{a-flow-shop-scheduler} \emph{Flow shop scheduling} is one of the most challenging and diff --git a/tex/functionalDB.tex b/tex/functionalDB.tex index 2a340d894..66154cd6c 100644 --- a/tex/functionalDB.tex +++ b/tex/functionalDB.tex @@ -1,14 +1,5 @@ \begin{aosachapter}{An Archaeology-Inspired Database}{s:functionalDB}{Yoav Rubin} -\emph{Yoav Rubin is a Senior Software Engineer at Microsoft, and prior -to that was a Research Staff Member and a Master Inventor at IBM -Research. He works now in the domain of data security in the cloud, and -in the past his work focused on developing cloud or web based -development environments. Yoav holds an M.Sc. in Medical Research in the -field of Neuroscience and B.Sc in Information Systems Engineering. He -goes by \href{https://twitter.com/yoavrubin}{@yoavrubin} on Twitter, and -occasionally blogs at \url{http://yoavrubin.blogspot.com}.} - \aosasecti{Introduction}\label{introduction} Software development is often viewed as a rigorous process, where the diff --git a/tex/image-filters.tex b/tex/image-filters.tex index aecac2eaf..613aa518f 100644 --- a/tex/image-filters.tex +++ b/tex/image-filters.tex @@ -1,168 +1,164 @@ \begin{aosachapter}{Making Your Own Image Filters}{s:image-filters}{Cate Huston} -\emph{Cate Huston is a developer and entrepreneur focused on mobile. -She's lived and worked in the UK, Australia, Canada, China and the -United States, as an engineer at Google, an Extreme Blue intern at IBM, -and a ski instructor. Cate speaks internationally on mobile development -and her writing has been published on sites as varied as Lifehacker, The -Daily Beast, The Eloquent Woman and Model View Culture. She co-curates -Technically Speaking, blogs at Accidentally in Code and is +\emph{Cate left the tech industry and spent a year finding her way back +whilst building her passion project Show \& Hide. She is Director of +Mobile Engineering at Ride, speaks internationally on mobile development +and engineering culture, co-curates Technically Speaking and is an +advisor at Glowforge. Cate doesn't exactly live in Colombia but she +spends a lot of time there, and has lived and worked in the UK, +Australia, Canada, China the United States, previously as an engineer at +Google, an Extreme Blue intern at IBM, and a ski instructor. Cate blogs +at \href{http://www.catehuston.com/blog/}{Accidentally in Code} and is \href{https://twitter.com/catehstn}{@catehstn} on Twitter.} -\aosasecti{A Story of a Brilliant Idea That Wasn't All That -Brilliant}\label{a-story-of-a-brilliant-idea-that-wasnt-all-that-brilliant} +\aosasecti{A Brilliant Idea (That Wasn't All That +Brilliant)}\label{a-brilliant-idea-that-wasnt-all-that-brilliant} -In Chinese art, there is often a series of four paintings showing the -same place in different seasons. Color - the cool whites of winter, pale -hues of spring, lush greens of summer, and red and yellows of fall is -the differentiation. Sometime around 2011, I had what I thought was a -brilliant idea. I wanted to be able to visualize a photo series, as a -series of colors. I thought it would show travel, and progression -through the seasons. +When I was traveling in China I often saw series of four paintings +showing the same place in different seasons. Color --- the cool whites +of winter, pale hues of spring, lush greens of summer, and reds and +yellows of fall --- is what visually differentiates the seasons. Around +2011, I had what I thought was a brilliant idea: I wanted to be able to +visualize a photo series as a series of colors. I thought it would show +travel, and progression through the seasons. -I didn't know how to calculate the dominant color from an image, and I +But I didn't know how to calculate the dominant color from an image. I thought about scaling the image down to a 1x1 square and seeing what was -left, but that seemed like cheating. I knew how I wanted to display it -though, in a layout called the +left, but that seemed like cheating. I knew how I wanted to display the +images, though: in a layout called the \href{http://www.catehuston.com/applets/Sunflower/index.html}{Sunflower -layout}. It's the most efficient way to layout circles. +layout}. It's the most efficient way to lay out circles. I left this project for years, distracted by work, life, travel, talks. -Eventually I returned to it, and figured out how to calculate the -dominant color and finally +Eventually I returned to it, figured out how to calculate the dominant +color, and finally \href{http://www.catehuston.com/blog/2013/09/02/visualising-a-photo-series/}{finished my visualization}. And that is when I discovered that this idea wasn't, -in fact, brilliant. Because the progression wasn't as clear as I hoped, -the dominant color extracted wasn't generally the most appealing shade, -the creation took a long time (a couple of seconds per image), and -required hundreds of images to make something cool -\aosafigref{500.imagefilters.sunflower}. +in fact, brilliant. The progression wasn't as clear as I hoped, the +dominant color extracted wasn't generally the most appealing shade, the +creation took a long time (a couple of seconds per image), and it took +hundreds of images to make something cool +(\aosafigref{500l.imagefilters.sunflower}). \aosafigure[240pt]{image-filters-images/sunflower.jpg}{Sunflower layout}{500l.imagefilters.sunflower} -You might think this would be discouraging, but by the time I had got to -this point I had learned so many things that hadn't come my way before - -about color spaces, pixel manipulation, and I had started making these -cool partially colored images, of the kind you find on postcards of -London - the red bus, or phone booth, but everything else is in -gray-scale. +You might think this would be discouraging, but by the time I got to +this point I had learned many things that hadn't come my way before --- +about color spaces and pixel manipulation --- and I had started making +those cool partially colored images, the kind you find on postcards of +London with a red bus or phone booth and everything else in grayscale. I used a framework called \href{https://processing.org/}{Processing} because I was familiar with it from developing programming curricula, -and because I knew it made it really easy to create visual applications. -It's a tool originally designed for artists, so it abstracts away much -of the boilerplate. It allowed me to play, and to experiment. - -University, and later work, had filled up my time with other people's -ideas and priorities. Part of ``finishing'' this project was learning -how to carve out time to make progress on my own ideas in the time that -was left to me. I calculated this to be about 4 hours of good mental -time a week. A tool that allowed me to move faster was therefore really -helpful, even necessary. Although it came with it's own set of problems, -especially around writing tests. I felt thorough tests were especially -important for validating how it was working, and for making it easier to -pick up and resume a project that was often on ice for weeks, even -months at a time. Tests (and blogposts!) formed the documentation for -this project. I could leave failing tests to document what should happen -that I hadn't quite figured out yet, make changes with more confidence -that if I changed something that I had forgotten was critical, the tests -would remind me. - -This chapter will cover some details about Processing, talk you through -color spaces, decomposing an image into pixels and manipulating them, -and unit testing something that wasn't designed with testing in mind. -But I hope it will also prompt you to go and make some progress on -whatever idea you haven't made time for lately, because even if your -idea turns out to be as terrible as mine was, you may make something -cool, and learn something fascinating in the process. +and because I knew it made it easy to create visual applications. It's a +tool originally designed for artists, so it abstracts away much of the +boilerplate. It allowed me to play and experiment. + +University, and later work, filled up my time with other people's ideas +and priorities. Part of finishing this project was learning how to carve +out time to make progress on my own ideas; I required about four hours +of good mental time a week. A tool that allowed me to move faster was +therefore really helpful, even necessary --- although it came with its +own set of problems, especially around writing tests. + +I felt that thorough tests were especially important for validating how +the project was working, and for making it easier to pick up and resume +a project that was often on ice for weeks, even months at a time. Tests +(and blogposts!) formed the documentation for this project. I could +leave failing tests to document what should happen that I hadn't figured +out yet, and make changes with confidence that if I changed something +that I had forgotten was critical, the tests would remind me. + +This chapter will cover some details about Processing and talk you +through color spaces, decomposing an image into pixels and manipulating +them, and unit testing something that wasn't designed with testing in +mind. But I hope it will also prompt you to make some progress on +whatever idea you haven't made time for lately; even if your idea turns +out to be as terrible as mine was, you may make something cool and learn +something fascinating in the process. \aosasecti{The App}\label{the-app} -This chapter will show you how to create your own image filter -application using Processing (a programming language and development -environment built on Java, used as a tool for artists to create with), -where you can load in your digital images and manipulate them yourself -using filters that you create. We'll cover aspects of color, setting up -the application in Processing, some of the features of Processing, how -to create color filters (mimicking what used to be used in old-fashioned -photography) and also a special kind of filter that can only be done -digitally - extracting the dominant hue from an image, and showing or -hiding it, to create eerie partially colored images. +This chapter will show you how to create an image filter application +that you can use to manipulate your digital images using filters that +you create. We'll use Processing, a programming language and development +environment built in Java. We'll cover setting up the application in +Processing, some of the features of Processing, aspects of color +representation, and how to create color filters (mimicking what was used +in old-fashioned photography). We'll also create a special kind of +filter that can only be done digitally: determining the dominant hue of +an image and showing or hiding it, to create eerie partially colored +images. -We'll also be adding a thorough test suite, and covering how to handle -some of the limitations of Processing when it comes to testability. +Finally, we'll add a thorough test suite, and cover how to handle some +of the limitations of Processing when it comes to testability. \aosasecti{Background}\label{background} Today we can take a photo, manipulate it, and share it with all our friends in a matter of seconds. However, a long long time ago (in -digital terms anyway), it used to be a process that would take weeks. +digital terms), it was a process that took weeks. -We would take the picture, then when we had used a whole roll of film, -we would take it in to be developed (often at the pharmacy), pick it up -some days later, and then discover that there was something wrong with -many of the pictures - hand not steady enough? Random person/thing that -we didn't remember seeing at the time. Of course by then it was too late -to remedy it. +In the old days, we would take the picture, then when we had used a +whole roll of film, we would take it in to be developed (often at the +pharmacy). We'd pick up the developed pictures some days later --- and +discover that there was something wrong with many of them. Hand not +steady enough? Random person or thing that we didn't notice at the time? +Overexposed? Underexposed? Of course by then it was too late to remedy +the problem. -Then next time we had friends over, we could show them our carefully -curated album of that trip we took, or alternatively, just tip the -pictures out of the shoebox we were keeping them in, onto the coffee -table. +The process that turned the film into pictures was one that most people +didn't understand. Light was a problem, so you had to be careful with +the film. There was a process, involving darkened rooms and chemicals, +that they sometimes showed in films or on TV. -The process that would turn the film into the picture was one that most -people didn't understand. Light was a problem, so you had to be careful -with the film. There was some process featuring darkened rooms and -chemicals that they sometimes showed bits of in films or on TV. - -Which actually sounds familiar. Few people understand how we get from -the point and click on our smartphone camera to an image on instagram. -But actually there are many similarities. +But probably even fewer people understand how we get from the +point-and-click on our smartphone camera to an image on Instagram. There +are actually many similarities. \aosasectii{Photographs, the Old Way}\label{photographs-the-old-way} -Photographs are created by the effect of light on a light sensitive -surface. Photographic film is covered in silver halide crystals (extra -layers are used to create color photographs - for simplicity let's just -stick to black-and-white photography here.) +Photographs are created by the effect of light on a light-sensitive +surface. Photographic film is covered in silver halide crystals. (Extra +layers are used to create color photographs --- for simplicity let's +just stick to black-and-white photography here.) -When talking an old fashioned style photograph - with film - the light +When talking an old-fashioned photograph --- with film --- the light hits the film according to what you're pointing at, and the crystals at -those points are changed in varying degrees - according to the amount of +those points are changed in varying degrees, according to the amount of light. Then, the \href{http://photography.tutsplus.com/tutorials/step-by-step-guide-to-developing-black-and-white-t-max-film-{}-photo-2580}{development process} converts the silver salts to metallic silver, creating the -negative. The negative has light and dark areas of the image inverted to -their opposite. Once the negatives have been developed, there is another -series of steps that reverse the image back and print it. +negative. The negative has the light and dark areas of the image +inverted. Once the negatives have been developed, there is another +series of steps to reverse the image and print it. \aosasectii{Photographs, the Digital Way}\label{photographs-the-digital-way} When taking pictures using our smartphones or digital cameras, there is -no film. There is something called an Active Pixel Sensor which +no film. There is something called an \emph{active-pixel sensor} which functions in a similar way. Where we used to have silver crystals, now -we have pixels - tiny squares (in fact, pixel is short for ``picture -element''). Digital images are made up of pixels, and the higher the +we have pixels --- tiny squares. (In fact, pixel is short for ``picture +element''.) Digital images are made up of pixels, and the higher the resolution the more pixels there are. This is why low-resolution images -are described as ``pixelated'' - you can start to see the squares. These -pixels are just stored in an array, which the number in each array +are described as ``pixelated'' --- you can start to see the squares. +These pixels are stored in an array, with the number in each array ``box'' containing the color. -In \aosafigref{500l.imagefilters.animals}, we see some blow up animals -taken at MoMA in NYC at high resolution. -\aosafigref{500l.imagefilters.pixelanimals} is the same image blown-up, +In \aosafigref{500l.imagefilters.animals}, we see a high-resolution +picture of some blow-up animals taken at MoMA in NYC. +\aosafigref{500l.imagefilters.pixelanimals} is the same image blown up, but with just 24 x 32 pixels. \aosafigure[240pt]{image-filters-images/animals.jpg}{Blow-up animals at MoMA NY}{500l.imagefilters.animals} -\aosafigure[240pt]{image-filters-images/pixelanimals.jpg}{Blow-up animals, blown-up}{500l.imagefilters.pixelanimals} +\aosafigure[240pt]{image-filters-images/pixelanimals.jpg}{Blow-up animals, blown up}{500l.imagefilters.pixelanimals} -See how it is so blurry? The lines of the animals aren't as smooth? We -call that \emph{pixelation}, which means the image is too big for the -number of pixels it contains, and the squares become visible. Here we -can use it to get a better sense of an image made up of squares of -color. +See how it's so blurry? We call that \emph{pixelation}, which means the +image is too big for the number of pixels it contains and the squares +become visible. Here we can use it to get a better sense of an image +being made up of squares of color. What do these pixels look like? If we print out the colors of some of the pixels in the middle (10,10 to 10,14) using the handy @@ -176,7 +172,7 @@ FFF5B7 \end{verbatim} -Hex colors are 6 characters long. The first two are the red value, the +Hex colors are six characters long. The first two are the red value, the second two the green value, and the third two the blue value. Sometimes there are an extra two characters which are the alpha value. In this case \texttt{FFFAC4} means: @@ -191,12 +187,12 @@ blue = C4 (hex) = 196 (base 10) \end{aosaitemize} -\aosasecti{Running The App}\label{running-the-app} +\aosasecti{Running the App}\label{running-the-app} In \aosafigref{500l.imagefilters.app}, we have a picture of our app running. It's very much developer-designed, I know, but we only have 500 -lines of Java here so something had to suffer! You can see the list of -commands on the right. Some things we can do: +lines of Java to work with so something had to suffer! You can see the +list of commands on the right. Some things we can do: \begin{aosaitemize} @@ -217,21 +213,17 @@ \aosafigure[240pt]{image-filters-images/app.jpg}{The App}{500l.imagefilters.app} -We're using Processing, as it makes it really simple to create a little -application, and do image manipulation. Processing is an IDE (Integrated -Development Environment) and a set of libraries to make visual -applications. It was originally created as a way for designers and -artists to create digital apps, so it has a very visual focus. - -We'll focus on the Java-based version although Processing has now been -ported to other languages, including Javascript, which is awesome if you -want to upload your apps to the internet. +Processing makes it simple to create a little application and do image +manipulation; it has a very visual focus. We'll work with the Java-based +version, although Processing has now been ported to other languages +(including Javascript which is awesome if you want to upload your apps +to the Internet). -For this tutorial, I use it in Eclipse by adding \texttt{core.jar} to my -build path. If you want, you can use the Processing IDE, which removes -the need for a lot of boilerplate Java code. If you later want to port -it over to Processing.js and upload it online, you need to replace the -file chooser with something else. +For this tutorial, I use Processing in Eclipse by adding +\texttt{core.jar} to my build path. If you want, you can use the +Processing IDE, which removes the need for a lot of boilerplate Java +code. If you later want to port it over to Processing.js and upload it +online, you need to replace the file chooser with something else. There are detailed instructions with screenshots in the project's \href{https://github.com/aosabook/500lines/blob/master/image-filters/SETUP.MD}{repository}. @@ -242,24 +234,23 @@ \aosasectii{Size and Color}\label{size-and-color} We don't want our app to be a tiny grey window, so the two essential -methods that we will start by overriding (implementing in our own class, -instead of using the default implementation in the superclass, in this -case \texttt{PApplet}) are +methods that we will start by overriding are \href{http://processing.org/reference/setup_.html}{\texttt{setup()}}, and \href{http://processing.org/reference/draw_.html}{\texttt{draw()}}. -\texttt{setup()} is only called when the app starts, and is where we do -things like set the size. \texttt{draw()} is called for every animation, -or after some action can be triggered by calling \texttt{redraw()} (as -covered in the Processing Documentation, \texttt{draw()} should not be -called explicitly). +The \texttt{setup()} method is only called when the app starts, and is +where we do things like set the size of the app window. The +\texttt{draw()} method is called for every animation, or after some +action can be triggered by calling \texttt{redraw()}. (As covered in the +Processing Documentation, \texttt{draw()} should not be called +explicitly.) Processing is designed to work nicely to create animated sketches, but in this case we don't want animation\footnote{If we wanted to create an animated sketch we would not call \texttt{noLoop()} (or, if we wanted to start animating later, we would call \texttt{loop()}). The frequency of the animation is determined by \texttt{frameRate()}.}, we -want to respond to key presses. To prevent animation (this would be a -drag on performance) we will want to call +want to respond to key presses. To prevent animation (which would be a +drag on performance) we will call \href{http://www.processing.org/reference/noLoop_.html}{\texttt{noLoop()}} from setup. This means that \texttt{draw()} will only be called immediately after \texttt{setup()}, and whenever we call @@ -282,12 +273,13 @@ } \end{verbatim} -These don't really do much yet, but run the app again adjusting the -constants in \texttt{WIDTH} and \texttt{HEIGHT} to see different sizes. +These don't really do much yet, but try running the app again, adjusting +the constants in \texttt{WIDTH} and \texttt{HEIGHT}, to see different +sizes. \texttt{background(0)} specifies a black background. Try changing the -number passed into \texttt{background()} and see what happens - it's the -alpha value, and so if you only pass one number in, it is always +number passed into \texttt{background()} and see what happens --- it's +the alpha value, and so if you only pass one number in, it is always greyscale. Alternatively, you can call \texttt{background(int r, int g, int b)}. @@ -337,24 +329,25 @@ \aosasectii{File Chooser}\label{file-chooser} -Processing handles most of this, we just need to call +Processing handles most of the file choosing process; we just need to +call \href{http://www.processing.org/reference/selectInput_.html}{\texttt{selectInput()}}, and implement a callback (which must be public). -To people familiar with Java this might seem odd, a listener or a lambda -expression might make more sense. However as Processing was developed as -a tool for artists, for the most part the necessity for these things has -been abstracted away by the language to keep it unintimidating. This is -a choice the designers made - to prioritize simplicity and being -unintimidating over power and flexibility. If you use the stripped down -Processing editor, rather than Processing as a library in Eclipse you -don't even need to define class names! +To people familiar with Java this might seem odd; a listener or a lambda +expression might make more sense. However, as Processing was developed +as a tool for artists, for the most part these things have been +abstracted away by the language to keep it unintimidating. This is a +choice the designers made: to prioritize simplicity and approachability +over power and flexibility. If you use the stripped-down Processing +editor, rather than Processing as a library in Eclipse, you don't even +need to define class names. Other language designers with different target audiences make different -choices, as they should. For example if we consider Haskell, a purely -functional language, that purity of functional language paradigms is -prioritised over everything else. This makes it a better tool for -mathematical problems than anything requiring IO. +choices, as they should. For example, in Haskell, a purely functional +language, purity of functional language paradigms is prioritised over +everything else. This makes it a better tool for mathematical problems +than for anything requiring IO. \begin{verbatim} // Called on key press. @@ -373,11 +366,11 @@ } \end{verbatim} -\aosasectii{Responding To Key Presses}\label{responding-to-key-presses} +\aosasectii{Responding to Key Presses}\label{responding-to-key-presses} -Normally in Java doing this requires adding listeners and implementing -anonymous functions. However like the file chooser, Processing handles a -lot of this for us. We just need to implement +Normally in Java, responding to key presses requires adding listeners +and implementing anonymous functions. However, as with the file chooser, +Processing handles a lot of this for us. We just need to implement \href{https://www.processing.org/reference/keyPressed_.html}{\texttt{keyPressed()}}. \begin{verbatim} @@ -388,52 +381,51 @@ If you run the app again, every time you press a key it will output it to the console. Later, you'll want to do different things depending on -what key was pressed, and to do this you just switch on the key value -(this exists in the \texttt{PApplet} superclass, and contains the last -key pressed). +what key was pressed, and to do this you just switch on the key value. +(This exists in the \texttt{PApplet} superclass, and contains the last +key pressed.) \aosasecti{Writing Tests}\label{writing-tests} This app doesn't do a lot yet, but we can already see number of places -where things can go wrong, for example triggering the wrong action with +where things can go wrong; for example, triggering the wrong action with key presses. As we add complexity, we add more potential problems, such -as updating the image state incorrectly, or miscalculations of the pixel -colors after applying a filter. I also just (some think weirdly) enjoy -writing unit tests. Whilst some people seem to think of testing as a -thing that delays checking code in, I see tests as my \#1 debugging -tool, and an opportunity to deeply understand what is going on in my -code. - -I adore Processing, but as covered above it's designed as a tool for -artists to create visual applications, and in this maybe unit testing -isn't a huge concern. It's clear it isn't written for testability, in -fact it's written in such a way that makes it untestable, as is. Part of -this is because it hides complexity, some of that hidden complexity is -really useful in writing unit tests. The use of static and final methods -make it much harder to use mocks (objects that record interaction and -allow you to fake part of your system to verify another part is behaving -correctly), which rely on the ability to subclass. +as updating the image state incorrectly, or miscalculating pixel colors +after applying a filter. I also just enjoy (some think weirdly) writing +unit tests. Whilst some people seem to think of testing as a thing that +delays checking code in, I see tests as my \#1 debugging tool, and as an +opportunity to deeply understand what is going on in my code. + +I adore Processing, but it's designed to create visual applications, and +in this area maybe unit testing isn't a huge concern. It's clear it +isn't written for testability; in fact it's written in such a way that +makes it untestable, as is. Part of this is because it hides complexity, +and some of that hidden complexity is really useful in writing unit +tests. The use of static and final methods make it much harder to use +mocks (objects that record interaction and allow you to fake part of +your system to verify another part is behaving correctly), which rely on +the ability to subclass. We might start a greenfield project with great intentions to do Test Driven Development (TDD) and achieve perfect test coverage, but in reality we are usually looking at a mass of code written by various and assorted people and trying to figure out what it is supposed to be doing, and how and why it is going wrong. Then maybe we don't write -perfect tests, but writing tests at all will help us navigate this +perfect tests, but writing tests at all will help us navigate the situation, document what is happening and move forward. -To do that we create ``seams'' that will allow us to break something up -from it's amorphous mass of tangled pieces and verify. To do this, we -will sometimes create wrapper classes that can be mocked. These do -nothing more than hold a collection of similar methods, or forward calls -on to another object that can not be mocked (due to final or static +We create ``seams'' that allow us to break something up from its +amorphous mass of tangled pieces and verify it in parts. To do this, we +will sometimes create wrapper classes that can be mocked. These classes +do nothing more than hold a collection of similar methods, or forward +calls on to another object that cannot be mocked (due to final or static methods), and as such they are very dull to write, but key to creating seams and making the code testable. -For tests, as I was working in Java with Processing as a library, I used -JUnit. For mocking, I used Mockito. You can download -\href{https://code.google.com/p/mockito/downloads/list}{mockito} and add -the jar to your buildpath in the same way you added \texttt{core.jar}. I +I used JUnit for tests, as I was working in Java with Processing as a +library. For mocking I used Mockito. You can download +\href{https://code.google.com/p/mockito/downloads/list}{Mockito} and add +the JAR to your buildpath in the same way you added \texttt{core.jar}. I created two helper classes that make it possible to mock and test the app (otherwise we can't test behavior involving \texttt{PImage} or \texttt{PApplet} methods). @@ -441,42 +433,42 @@ \texttt{IFAImage} is a thin wrapper around PImage. \texttt{PixelColorHelper} is a wrapper around applet pixel color methods. These wrappers call the final, and static methods, but the -caller methods are neither final nor static themselves - this allows +caller methods are neither final nor static themselves --- this allows them to be mocked. These are deliberately lightweight, and we could have -gone further, however this was sufficient to address the major problem -of testability when using Processing - static, and final methods. The -goal here was to make an app after all - not a unit testing framework -for Processing! +gone further, but this was sufficient to address the major problem of +testability when using Processing --- static, and final methods. The +goal was to make an app, after all --- not a unit testing framework for +Processing! A class called \texttt{ImageState} forms the ``model'' of this application, removing as much logic from the class extending \texttt{PApplet} as possible, for better testability. It also makes for -a cleaner design and separation of concerns - the \texttt{App} controls +a cleaner design and separation of concerns: the \texttt{App} controls the interactions and the UI, not the details of the image manipulation. -\aosasecti{Do It Yourself Filters}\label{do-it-yourself-filters} +\aosasecti{Do-It-Yourself Filters}\label{do-it-yourself-filters} \aosasectii{RGB Filters}\label{rgb-filters} Before we start writing more complicated pixel processing, we can start with a short exercise that will get us comfortable doing pixel -manipulation . We'll create standard (red, green, blue) color filters -that will allow us to create the same effect as a colored plate over the -lens of a camera, only letting through light with enough red (or green, -or blue). +manipulation. We'll create standard (red, green, blue) color filters +that will allow us to create the same effect as placing a colored plate +over the lens of a camera, only letting through light with enough red +(or green, or blue). By applying different RGB filters to an image we can make it almost seem -like the seasons are different (remember the Chinese four seasons -paintings mentioned earlier?), depending on which colors are filtered -out and which are emphasized. +like the seasons are different depending which colors are filtered out +and which are emphasized. (Remember the four-seasons paintings we +imagined earlier?) How do we do it? \begin{aosaitemize} \item - Set the filter (you can combine red, green and blue filters as in the - image earlier, I haven't in these examples so that the effect is - clearer). + Set the filter. (You can combine red, green and blue filters as in the + image earlier; I haven't in these examples so that the effect is + clearer.) \item For each pixel in the image, check its RGB value. \item @@ -490,9 +482,9 @@ \end{aosaitemize} Although our image is 2-dimensional, the pixels live in a 1-dimensional -array starting top left moving +array starting top-left and moving \href{https://processing.org/tutorials/pixels/}{left to right, top to -bottom}. The array indices for a 4x4 image are shown in . +bottom}. The array indices for a 4x4 image are shown here: \begin{table} \centering @@ -539,10 +531,9 @@ how our filters work. To prepare ourselves for working on our next filter, let's explore the concept of color a bit more. -You may not have known it at the time, but we were unwittingly using a -concept in the previous section called a ``color space'', which is way -of representing color digitally. Kids mixing paints learn that all -colors can be made from other colors, but things work slightly +We were using a concept in the previous section called ``color space'', +which is way of representing color digitally. Kids mixing paints learn +that colors can be made from other colors; things work slightly differently in digital (less risk of being covered in paint!) but similar. Processing makes it really easy to work with whatever color space you want, but you need to know which one to pick, so it's @@ -550,75 +541,69 @@ \aosasectiii{RGB colors}\label{rgb-colors} -The color space that most programmers are familiar with is RGBA - red, -green, blue and alpha. In hexadecimal (base 16) the first two digits are -the amount of red the second two blue, the third two green, and the -final two (if they are there) are the alpha value. These values range -from 00 in base 16, 0 in base 10, through to FF, the equivalent of 255 -in base 10. The alpha represents the opacity, where 0 is transparent, -and 100\% opaque. +The color space that most programmers are familiar with is RGBA: red, +green, blue and alpha; it's what we were using above. In hexadecimal +(base 16), the first two digits are the amount of red, the second two +blue, the third two green, and the final two (if they are there) are the +alpha value. The values range from 00 in base 16 (0 in base 10) through +to FF (255 in base 10). The alpha represents opacity, where 0 is +transparent and 100\% is opaque. \aosasectiii{HSB or HSV colors}\label{hsb-or-hsv-colors} -This color space is not quite as well known as RGB, the first number -represents the hue, the second the saturation (how intense the color -is), and the third number the brightness. The image at the top was -created by manipulating in this color space. The HSB color space can be -represented by drawing a cone. The hue is the position around the cone, +This color space is not quite as well known as RGB. The first number +represents the hue, the second number the saturation (how intense the +color is), and the third number the brightness. The HSB color space can +be represented by a cone: The hue is the position around the cone, saturation the distance from the centre, and brightness the height (0 brightness is black). -\aosasectii{Extracting The Dominant Hue From an +\aosasectii{Extracting the Dominant Hue from an Image}\label{extracting-the-dominant-hue-from-an-image} -Now we're comfortable with pixel manipulation, let's do something that -we could only do digitally. Digitally, we can manipulate it in a way -that isn't so uniform. - -When I look through my stream of pictures, say on Flickr, I can see -themes emerging. The nighttime series from the boat I took at sunset -around Hong Kong harbour, the grey of North Korea, the lush greens of -Bali, the icy whites and pale blues of an Icelandic winter. Can we take -a picture and pull out that main color that dominates the scene? - -It makes sense to use the HSB color space for this - we are interested -in the hue, not the saturation or brightness, when figuring out what the -main color is. It's possible to do this using RGB values, but more -difficult (we would have to compare all three values) and it would be -more sensitive to darkness. We can change to this colorspace using +Now that we're comfortable with pixel manipulation, let's do something +that we could only do digitally. Digitally, we can manipulate the image +in a way that isn't so uniform. + +When I look through my stream of pictures I can see themes emerging. The +nighttime series I took at sunset from a boat on Hong Kong harbour, the +grey of North Korea, the lush greens of Bali, the icy whites and pale +blues of an Icelandic winter. Can we take a picture and pull out that +main color that dominates the scene? + +It makes sense to use the HSB color space for this --- we are interested +in the hue when figuring out what the main color is. It's possible to do +this using RGB values, but more difficult (we would have to compare all +three values) and it would be more sensitive to darkness. We can change +to the HSB color space using \href{http://processing.org/reference/colorMode_.html}{colorMode}. Having settled on this color space, it's simpler than it would have been using RGB. We need to find the hue of each pixel, and figure out which -is most ``popular''. We probably don't want to be exact, we want to +is most ``popular''. We probably don't want to be exact --- we want to group very similar hues together, and we can handle this using two strategies. Firstly we will round the decimals that come back to whole numbers, as this makes it simple to determine which ``bucket'' we put each pixel in. -Secondly we can change the range of the hues. If we think back up to the -diagram above, we might think of hues as having 360 degrees (like a -circle). Processing uses 255 by default, which is the same as is typical -for RGB (255 is FF in hexadecimal). The higher the range we use, the -more distinct the hues in the picture will be. Using a smaller range +Secondly we can change the range of the hues. If we think back to the +cone representation above, we might think of hues as having 360 degrees +(like a circle). Processing uses 255 by default, which is the same as is +typical for RGB (255 is FF in hexadecimal). The higher the range we use, +the more distinct the hues in the picture will be. Using a smaller range will allow us to group together similar hues. Using a 360 degree range, it's unlikely that we will be able to tell the difference between a hue of 224 and a hue of 225, as the difference is very small. If we make the range one-third of that instead, 120, both these hues become 75 after rounding. -We can change the range of hues using \texttt{colorMode}. If we call: - -\begin{verbatim} - colorMode(HSB, 120); -\end{verbatim} - -We have just made our hue detection a bit less than half as exact as if -we used the 255 range. We also know that our hues will fall into 120 -``buckets'', so we can simply go through our image, get the hue for a -pixel, and add one to the corresponding count in an array. This will be -order of $O(n)$, where $n$ is the number of pixels, as it requires -action on each one. +We can change the range of hues using \texttt{colorMode}. If we call +\texttt{colorMode(HSB, 120)} we have just made our hue detection a bit +less than half as exact as if we used the 255 range. We also know that +our hues will fall into 120 ``buckets'', so we can simply go through our +image, get the hue for a pixel, and add one to the corresponding count +in an array. This will be $O(n)$, where $n$ is the number of pixels, as +it requires action on each one. \begin{verbatim} for(int px in pixels) { @@ -633,8 +618,10 @@ Once we've extracted the ``dominant'' hue, we can choose to either show or hide it in the image. We can show the dominant hue with varying tolerance (ranges around it that we will accept). Pixels that don't fall -into this range can be changed to grayscale, by setting the value based -on the brightness. Alternatively, we can hide the dominant hue. +into this range can be changed to grayscale by setting the value based +on the brightness. Alternatively, we can hide the dominant hue by +setting the color for pixels with that hue to greyscale, and leaving +other pixels as they are. Each image requires a double pass (looking at each pixel twice), so on images with a large number of pixels it can take a noticeable amount of @@ -700,18 +687,18 @@ With the UI as it is, the user can combine the red, green, and blue filters together. If they combine the dominant hue filters with the red, -green, and blue filters the results can sometimes be a little unexpected -- because of changing the color spaces. +green, and blue filters the results can sometimes be a little +unexpected, because of changing the color spaces. Processing has some -\href{https://www.processing.org/reference/filter_.html}{built in -methods} that support the manipulation of images, for example +\href{https://www.processing.org/reference/filter_.html}{built-in +methods} that support the manipulation of images; for example, \texttt{invert} and \texttt{blur}. -To achieve effects like sharpening, or blurring, or sepia ourselves we -apply matrices. For every pixel of the image, take the sum of products -where each product is the color value of the current pixel or a neighbor -of it, with the corresponding value of the +To achieve effects like sharpening, blurring, or sepia we apply +matrices. For every pixel of the image, take the sum of products where +each product is the color value of the current pixel or a neighbor of +it, with the corresponding value of the \href{http://lodev.org/cgtutor/filtering.html}{filter matrix}. There are some special matrices of specific values that sharpen images. @@ -722,30 +709,30 @@ \aosasectii{The App}\label{the-app-1} -Consists of one file: \texttt{ImageFilterApp.java}. This extends +The app consists of one file: \texttt{ImageFilterApp.java}. This extends \texttt{PApplet} (the Processing app superclass) and handles layout, -user interaction etc. This class is the hardest to test, so we want to +user interaction, etc. This class is the hardest to test, so we want to keep it as small as possible. \aosasectii{Model}\label{model} -Consists of three files. \texttt{HSBColor.java} which is a simple +Model consists of three files: \texttt{HSBColor.java} is a simple container for HSB colors (consisting of hue, saturation, and -brightness). \texttt{IFAImage} which is a wrapper around \texttt{PImage} -for testability (\texttt{PImage} contains a number of final methods -which cannot be mocked). Finally \texttt{ImageState.java} is the object -describing the state of the image - what level of filters should be -applied, and which, and handles loading the image (note: the image needs -to be reloaded whenever color filters are adjusted down, and whenever -the dominant hue is recalculated. For clarity, we just reload each time -the image is processed). +brightness). \texttt{IFAImage} is a wrapper around \texttt{PImage} for +testability. (\texttt{PImage} contains a number of final methods which +cannot be mocked.) Finally, \texttt{ImageState.java} is the object which +describes the state of the image --- what level of filters should be +applied, and which filters --- and handles loading the image. (Note: The +image needs to be reloaded whenever color filters are adjusted down, and +whenever the dominant hue is recalculated. For clarity, we just reload +each time the image is processed.) \aosasectii{Color}\label{color-1} -Consists of two files: \texttt{ColorHelper.java}, which is where all the -image processing and filtering takes place. And -\texttt{PixelColorHelper.java} which abstracts out final -\texttt{PApplet} methods for pixel colors for testability. +Color consists of two files: \texttt{ColorHelper.java} is where all the +image processing and filtering takes place, and +\texttt{PixelColorHelper.java} abstracts out final \texttt{PApplet} +methods for pixel colors for testability. \aosafigure[240pt]{image-filters-images/architecture.jpg}{Architecture diagram}{500l.imagefilters.architecture} @@ -753,13 +740,13 @@ Briefly mentioned above, there are two wrapper classes (\texttt{IFAImage} and \texttt{PixelColorHelper}) that wrap library -methods for testability. This is because in Java, final methods are -methods that cannot be overridden or hidden by subclasses, which means -they cannot be mocked. +methods for testability. This is because, in Java, the keyword ``final'' +indicates a method that cannot be overridden or hidden by subclasses, +which means they cannot be mocked. \texttt{PixelColorHelper} wraps methods on the applet. This means we -need to pass the applet in to each method call (we could alternatively -make it a field and set it on initialization). +need to pass the applet in to each method call. (Alternatively, we could +make it a field and set it on initialization.) \begin{verbatim} package com.catehuston.imagefilter.color; @@ -808,9 +795,9 @@ \end{verbatim} \texttt{IFAImage} is a wrapper around \texttt{PImage}, so in our app we -don't initialize a \texttt{PImage}, but rather an \texttt{IFAImage} -instead. Although we do have to expose the \texttt{PImage} so that it -can be rendered. +don't initialize a \texttt{PImage}, but rather an \texttt{IFAImage} --- +although we do have to expose the \texttt{PImage} so that it can be +rendered. \begin{verbatim} package com.catehuston.imagefilter.model; @@ -888,12 +875,12 @@ \href{https://docs.oracle.com/javase/7/docs/api/java/awt/Color.html}{Java itself}. Without going too much into the details of these, both of them are more focused on RGB color, and the Java class in particular adds way -more complexity than we need. We would probably be OK if we did want to -use Java's awt.Color, however +more complexity than we need. We would probably be okay if we did want +to use Java's \texttt{awt.Color}; however \href{http://processing.org/reference/javadoc/core/processing/core/PApplet.html}{awt -GUI components cannot be used in Processing} so for our purposes -creating this simple container class to just hold these bits of data we -need is easiest. +GUI components cannot be used in Processing}, so for our purposes +creating this simple container class to hold these bits of data we need +is easiest. \begin{verbatim} package com.catehuston.imagefilter.model; @@ -917,8 +904,8 @@ \texttt{ColorHelper} is where all the image manipulation lives. The methods in this class could be static if not for needing a -\texttt{PixelColorHelper}. Although we won't get into the debate as to -the merits of static methods here! +\texttt{PixelColorHelper}. (Although we won't get into the debate about +the merits of static methods here.) \begin{verbatim} package com.catehuston.imagefilter.color; @@ -1026,10 +1013,12 @@ } \end{verbatim} -Clearly we can't test this with whole images. Instead we can mock the -images and make them return an array of pixels - in this case, 5. This -allows us to verify that the behavior is as expected. Earlier we covered -the concept of mock objects, and here we see their use. We are using +We don't want to test this with whole images, because we want images +that we know the properties of and reason about. We approximate this by +mocking the images and making them return an array of pixels --- in this +case, 5. This allows us to verify that the behavior is as expected. +Earlier we covered the concept of mock objects, and here we see their +use. We are using \href{http://docs.mockito.googlecode.com/hg/org/mockito/Mockito.html}{Mockito} as our mock object framework. @@ -1150,27 +1139,27 @@ We mock \texttt{PApplet}, \texttt{IFAImage} (created for expressly this purpose), and \texttt{ImageColorHelper}. \item - Test methods are annotated with \texttt{@Test} \footnote{Method names - in tests need not start with \texttt{test} as of JUnit 4 but habits - are hard to break.}. If you want to ignore a test (e.g.~whilst + Test methods are annotated with \texttt{@Test}\footnote{Method names + in tests need not start with \texttt{test} as of JUnit 4, but habits + are hard to break.}. If you want to ignore a test (e.g., whilst debugging) you can add the annotation \texttt{@Ignore}. \item In \texttt{setup()}, we create the pixel array and have the mock image always return it. \item - Helper methods make it easier to set expectations for reoccurring - tasks (e.g. \texttt{setHsbValuesForPixel()}, - \texttt{setRgbValuesForPixel()}.) + Helper methods make it easier to set expectations for recurring tasks + (e.g., \texttt{setHsbValuesForPixel()}, + \texttt{setRgbValuesForPixel()}). \end{aosaitemize} \aosasectii{Image State and Associated Tests}\label{image-state-and-associated-tests} -\texttt{ImageState} holds the current ``state'' of the image - the image -itself, and the settings and filters that will be applied. We'll omit -the full implementation of ImageState here, but we'll show how it can be -tested. You can the source repository for this project to see the -implementation details of \texttt{ImageState}. +\texttt{ImageState} holds the current ``state'' of the image --- the +image itself, and the settings and filters that will be applied. We'll +omit the full implementation of \texttt{ImageState} here, but we'll show +how it can be tested. You can visit the source repository for this +project to see the full implementation details. \begin{verbatim} package com.catehuston.imagefilter.model; @@ -1221,7 +1210,7 @@ \end{verbatim} Here we can test that the appropriate actions happen for the given -state, that fields are incremented and decremented appropriately. +state; that fields are incremented and decremented appropriately. \begin{verbatim} package com.catehuston.imagefilter.model; @@ -1369,16 +1358,16 @@ \item We exposed a protected initialization method \texttt{set} for testing - that helps us quickly get the system under test into a specific state + that helps us quickly get the system under test into a specific state. \item We mock \texttt{PApplet}, \texttt{ColorHelper}, and \texttt{IFAImage} (created expressly for this purpose). \item - This time we use a helper method (\texttt{assertState()} to simplify + This time we use a helper method (\texttt{assertState()}) to simplify asserting the state of the image. \end{aosaitemize} -\aosasectiii{Measuring Test Coverage}\label{measuring-test-coverage} +\aosasectiii{Measuring test coverage}\label{measuring-test-coverage} I use \href{http://www.eclemma.org/installation.html\#marketplace}{EclEmma} to @@ -1392,14 +1381,14 @@ \aosasectii{ImageFilterApp}\label{imagefilterapp} This is where everything is tied together, but we want as little as -possible there. The App is hard to unit test (much of it is layout), but +possible here. The App is hard to unit test (much of it is layout), but because we've pushed so much of the app's functionality into our own tested classes, we're able to assure ourselves that the important parts are working as intended. -We set the size of the app, and do the layout (these things are verified -by running the app and making sure it looks OK - no matter how good test -coverage this step should not be skipped!) +We set the size of the app, and do the layout. (These things are +verified by running the app and making sure it looks okay --- no matter +how good the test coverage, this step should not be skipped!) \begin{verbatim} package com.catehuston.imagefilter.app; @@ -1555,25 +1544,25 @@ \aosasecti{The Value of Prototyping}\label{the-value-of-prototyping} In real world programming, we spend a lot of time on productionisation -work. Making things look just so. Making them fail over. Maintaining -that 99.9\% uptime. We spend more time hunting down corner cases than -refining algorithms. +work. Making things look just so. Maintaining that 99.9\% uptime. We +spend more time hunting down corner cases than refining algorithms. These constraints and requirements are important for our users. However -there's also a space for freeing ourselves from them to play, and -explore. +there's also space for freeing ourselves from them to play and explore. Eventually, I decided to port this to a native mobile app. Processing has an Android library, but as many mobile developers do, I opted to go iOS first. I had years of iOS experience, although I'd done little with CoreGraphics, but I don't think even if I had had this idea initially, I would have been able to build it straight away on iOS. The platform -forced me to operate in the RGB colorspace, and made it hard to extract +forced me to operate in the RGB color space, and made it hard to extract the pixels from the image (hello, C). Memory and waiting was a major -risk factor. There were exhilarating moments, when it worked for the -first time. When it first ran on my device\ldots{} without crashing. -When I optimized memory usage by 66\% and cut seconds off the runtime. -And there were large periods of time locked away in a dark room, cursing +risk factor. + +There were exhilarating moments, when it worked for the first time. When +it first ran on my device\ldots{} without crashing. When I optimized +memory usage by 66\% and cut seconds off the runtime. And there were +large periods of time locked away in a dark room, cursing intermittently. Because I had my prototype, I could explain to my business partner and diff --git a/tex/modeller.tex b/tex/modeller.tex index 44cba2ab7..d28345253 100644 --- a/tex/modeller.tex +++ b/tex/modeller.tex @@ -1,11 +1,5 @@ \begin{aosachapter}{A 3D Modeller}{s:modeller}{Erick Dransch} -\emph{Erick is a software developer and 2D and 3D computer graphics -enthusiast. He has worked on video games, 3D special effects software, -and computer aided design tools. If it involves simulating reality, -chances are he'd like to learn more about it. You can find him online at -\href{http://erickdransch.com}{erickdransch.com}.} - \aosasecti{Introduction}\label{introduction} Humans are innately creative. We continuously design and build novel, diff --git a/tex/objmodel.tex b/tex/objmodel.tex index 8e574e658..9288b1263 100644 --- a/tex/objmodel.tex +++ b/tex/objmodel.tex @@ -1,12 +1,5 @@ \begin{aosachapter}{A Simple Object Model}{s:objmodel}{Carl Friedrich Bolz} -\emph{Carl Friedrich Bolz is a researcher at King's College London and -is broadly interested in the implementation and optimization of all -kinds of dynamic languages. He is one of the core authors of -PyPy/RPython and has worked on implementations of Prolog, Racket, -Smalltalk, PHP and Ruby. He's \href{https://twitter.com/cfbolz}{@cfbolz} -on Twitter.} - \aosasecti{Introduction}\label{introduction} Object-oriented programming is one of the major programming paradigms in diff --git a/tex/pedometer.tex b/tex/pedometer.tex index b5c6b26ea..c80f8d285 100644 --- a/tex/pedometer.tex +++ b/tex/pedometer.tex @@ -1,13 +1,5 @@ \begin{aosachapter}{A Pedometer in the Real World}{s:pedometer}{Dessy Daskalov} -\emph{Dessy is an engineer by trade, an entrepreneur by passion, and a -developer at heart. She's currently the CTO and co-founder of -\href{http://nudgerewards.com/.}{Nudge Rewards} When she's not busy -building product with her team, she can be found teaching others to -code, attending or hosting a Toronto tech event, and online at -\href{http://www.dessydaskalov.com/}{dessydaskalov.com} and -\href{https://twitter.com/dess_e}{@dess\_e}.} - \aosasecti{A Perfect World}\label{a-perfect-world} Many software engineers reflecting on their training will remember diff --git a/tex/same-origin-policy.tex b/tex/same-origin-policy.tex index 2a3b70e85..bc784afb0 100644 --- a/tex/same-origin-policy.tex +++ b/tex/same-origin-policy.tex @@ -1,30 +1,5 @@ \begin{aosachapter}{The Same-Origin Policy}{s:same-origin-policy}{Eunsuk Kang, Santiago Perez De Rosso, and Daniel Jackson} -\emph{Eunsuk Kang is a PhD candidate and a member of the Software Design -Group at MIT. He received his SM (Master of Science) in Computer Science -from MIT (2010), and a Bachelor of Software Engineering from the -University of Waterloo (2007). His research projects have focused on -developing tools and techniques for software modeling and verification, -with applications to security and safety-critical systems.} - -\emph{Santiago Perez De Rosso is a PhD student in the Software Design -Group at MIT. He received his SM in Computer Science from MIT (2015), -and an undergraduate degree from ITBA (2011). He used to work at Google, -developing frameworks and tools to make engineers more productive -(2012). He currently spends most of his time thinking about design and -version control.} - -\emph{Daniel Jackson is a professor in the Department of Electrical -Engineering and Computer Science at MIT, and leads the Software Design -Group in the Computer Science and Artificial Intelligence Laboratory. He -received an MA from Oxford University (1984) in Physics, and his SM -(1988) and PhD (1992) in Computer Science from MIT. He was a software -engineer for Logica UK Ltd. (1984-1986), Assistant Professor of Computer -Science at Carnegie Mellon University (1992-1997), and has been at MIT -since 1997. He has broad interests in software engineering, especially -in development methods, design and specification, formal methods, and -safety critical systems.} - \aosasecti{Introduction}\label{introduction} The same-origin policy (SOP) is an important part of the security @@ -159,11 +134,11 @@ sig Resource {} \end{verbatim} -The keyword ``sig'' identifies this as an Alloy signature declaration. -This introduces a set of resource objects; think of these, just like the -objects of a class with no instance variables, as blobs that have -identity but no content. When the analysis runs, this set will be -determined, just as a class in an object-oriented language comes to +The keyword \texttt{sig} identifies this as an Alloy \emph{signature} +declaration. This introduces a set of resource objects; think of these, +just like the objects of a class with no instance variables, as blobs +that have identity but no content. When the analysis runs, this set will +be determined, just as a class in an object-oriented language comes to denote a set of objects when the program executes. Resources are named by URLs (\emph{uniform resource locators}): @@ -175,27 +150,28 @@ port: lone Port, path: Path } -sig Protocol, Domain, Port, Path {} +sig Protocol, Port, Path {} +sig Domain { subsumes: set Domain } \end{verbatim} Here we have five signature declarations, introducing a set of URLs and four additional sets for each of the basic kinds of objects they -comprise. Within the URL declaration, we have four fields. Fields are -like instance variables in a class; if \texttt{u} is a URL, for example, -then \texttt{u.protocol} would represent the protocol of that URL (just -like dot in Java). But in fact, as we'll see later, these fields are -relations. You can think of each one as if it were a two-column database -table. Thus \texttt{protocol} is a table with the first column -containing URLs and the second column containing protocols. And the -innocuous looking dot operator is in fact a rather general kind of -relational join, so that you could also write \texttt{protocol.p} for +comprise. Within the URL declaration, we have four \emph{fields}. Fields +are like instance variables in a class; if \texttt{u} is a URL, for +example, then \texttt{u.protocol} would represent the protocol of that +URL (just like dot in Java). But in fact, as we'll see later, these +fields are relations. You can think of each one as if it were a +two-column database table. Thus \texttt{protocol} is a table with the +first column containing URLs and the second column containing protocols. +And the innocuous looking dot operator is in fact a rather general kind +of relational join, so that you could also write \texttt{protocol.p} for all the URLs with a protocol \texttt{p} --- but more on that later. -Note that domains and paths, unlike URLs, are treated as if they have no -structure --- a simplification. The keyword \texttt{lone} (which can be -read ``less than or equal to one'') says that each URL has at most one -port. The path is the string that follows the host name in the URL, and -which (for a simple static server) corresponds to the file path of the +Note that paths, unlike URLs, are treated as if they have no structure +--- a simplification. The keyword \texttt{lone} (which can be read +``less than or equal to one'') says that each URL has at most one port. +The path is the string that follows the host name in the URL, and which +(for a simple static server) corresponds to the file path of the resource; we're assuming that it's always present, but can be an empty path. @@ -286,7 +262,7 @@ It's a polymorphic module, so it's instantiated with \texttt{Endpoint}, the set of things calls are from and to. (The module appears in full in -the Appendix.) +\aosasecref{500l.sop.appendix}.) Following the field declarations in \texttt{HttpRequest} is a collection of constraints. Each of these constraints applies to all members of the @@ -304,7 +280,7 @@ \end{verbatim} As soon as the analyzer finds a possible instance of the system, it -displays it graphically, like in +automatically produces a diagram of the instance, like in \aosafigref{500l.same-origin-policy.fig-http-1}. \aosafigure[240pt]{same-origin-policy-images/fig-http-1.png}{A possible instance}{500l.same-origin-policy.fig-http-1} @@ -364,7 +340,7 @@ Note that while the DNS maps \texttt{Domain} to both \texttt{Server0} and \texttt{Server1} (in reality, this is a common practice for load balancing), only \texttt{Server1} maps \texttt{Path} to a resource -object, causing \texttt{HttpRequest1} to result in empty response: +object, causing \texttt{HttpRequest1} to result in an empty response: another error in our model. To fix this, we add an Alloy \emph{fact} recording the assumption that any two servers to which DNS maps a single host provide the same set of resources: @@ -381,12 +357,18 @@ doesn't mean the property has been proven to be true, since there might be a counterexample in a larger scope. But it is unlikely that the property is false, since the analyzer has tested all possible instances -involving 3 objects of each type. In this scope, the \texttt{map} field -alone (representing the DNS mapping) has 512 possible values. +involving 3 objects of each type. + +If desired, however, we can re-run the analysis with a larger scope for +increased confidence. For example, running the above check with the +scope of 10 still does not produce any counterexample, suggesting that +the property is likely to be valid. However, keep in mind that given a +larger scope, the analyzer needs to test a greater number of instances, +and so it will likely take longer to complete. \aosasectii{Browser}\label{browser} -Let's introduce browsers: +Let's now introduce browsers into our model: \begin{verbatim} sig Browser extends Client { @@ -404,7 +386,7 @@ browser \texttt{b} at a particular time \texttt{t}. Likewise, the \texttt{documents} field associates a set of documents with each browser at a given time. (For more details about how we model the dynamic -behavior, see the Appendix.) +behavior, see \aosasecref{500l.sop.appendix}.) Documents are created from a response to an HTTP request. They can also be destroyed if, for example, the user closes a tab or the browser, but @@ -474,16 +456,16 @@ except for a new entry in the table mapping \texttt{from} to \texttt{doc}. -Some constraints use the \texttt{++} relational override operator: -\texttt{e1 ++ e2} contains all tuples of \texttt{e2}, and additionally, -any tuples of \texttt{e1} whose first element is not the first element -of a tuple in \texttt{e2}. For example, the constraint +Some constraints use the \texttt{++} relational \emph{override} +operator: \texttt{e1 ++ e2} contains all tuples of \texttt{e2}, and +additionally, any tuples of \texttt{e1} whose first element is not the +first element of a tuple in \texttt{e2}. For example, the constraint \texttt{content.end = content.start ++ doc -\textgreater{} response} says that after the call, the \texttt{content} mapping will be updated -to map \texttt{doc} to \texttt{response} (clobbering any previous +to map \texttt{doc} to \texttt{response} (overriding any previous mapping of \texttt{doc}). If we were to use the union operator \texttt{+} instead, then the same document might (incorrectly) be mapped -in the after state to multiple resources. +to multiple resources in the after state. \aosasectii{Script}\label{script} @@ -792,7 +774,7 @@ } \end{verbatim} -The integrity property is the double of confidentiality: +The integrity property is the dual of confidentiality: \begin{verbatim} // No malicious data should ever flow into a trusted module @@ -849,7 +831,7 @@ \aosafigure[240pt]{same-origin-policy-images/fig-attack-1b.png}{Confidentiality counterexample at time 1}{500l.same-origin-policy.fig-attack-1b} This counterexample involves two steps. In the first step -(\aosafigref{500l.same-origin-policy-fig-attack-1a}), +(\aosafigref{500l.same-origin-policy.fig-attack-1a}), \texttt{EvilScript}, executing inside \texttt{AdBanner} from \texttt{EvilDomain}, reads the content of \texttt{InboxPage}, which originates from \texttt{EmailDomain}. In the next step @@ -888,8 +870,8 @@ \aosasecti{Same-Origin Policy}\label{same-origin-policy} Before we can state the SOP, the first thing we should do is to -introduce the notion of an origin, which is composed of a protocol, -host, and optional port: +introduce the notion of an \emph{origin}, which is composed of a +protocol, host, and optional port: \begin{verbatim} sig Origin { @@ -899,8 +881,8 @@ } \end{verbatim} -We define a function that, given a URL, returns the corresponding -origin: +For convenience, let us define a function that, given a URL, returns the +corresponding origin: \begin{verbatim} fun origin[u: Url] : Origin { @@ -914,15 +896,16 @@ that comes from the same origin as the script: \begin{verbatim} -pred domSop { +fact domSop { all o: ReadDom + WriteDom | let target = o.doc, caller = o.from.context | origin[target] = origin[caller] } \end{verbatim} -An instance such as the first script scenario is not possible under -\texttt{domSop}, since \texttt{Script} is not allowed to invoke -\texttt{ReadDom} on a document from a different origin. +An instance such as the first script scenario (from the previous +section) is not possible under \texttt{domSop}, since \texttt{Script} is +not allowed to invoke \texttt{ReadDom} on a document from a different +origin. The second part of the policy says that a script cannot send an HTTP request to a server unless its context has the same origin as the target @@ -930,7 +913,7 @@ scenario. \begin{verbatim} -pred xmlHttpReqSop { +fact xmlHttpReqSop { all x: XmlHttpRequest | origin[x.url] = origin[x.from.context.src] } \end{verbatim} @@ -962,8 +945,8 @@ security; we want to make sure our sites are robust and functional, but the mechanism for securing it can sometimes get in the way. Indeed, when the SOP was initially introduced, developers ran into trouble building -sites that made legitimate uses of cross-domain communication --- for -example, for mashups. +sites that made legitimate uses of cross-domain communication (e.g., +mashups). In this section, we will discuss four techniques that have been devised and frequently used by web developers to bypass the restrictions imposed @@ -1043,7 +1026,7 @@ other's DOM). \begin{verbatim} -pred domSop { +fact domSop { -- For every successful read/write DOM operation, all o: ReadDom + WriteDom | let target = o.doc, caller = o.from.context | -- (1) target and caller documents are from the same origin, or @@ -1128,14 +1111,14 @@ executes the \texttt{SetDomain} operation to modify the domain property of \texttt{BlogPage} to \texttt{ExampleDomain}. -\aosafigure[240pt]{same-origin-policy-images/fig-setdomain-2a.png}{Cross-origin counterexample at time 4}{500l.same-origin-policy.fig-setdomain-2a} +\aosafigure[240pt]{same-origin-policy-images/fig-setdomain-2a.png}{Cross-origin counterexample at time 3}{500l.same-origin-policy.fig-setdomain-2a} Now that \texttt{BlogPage} has the same domain property as the other two documents, it can successfully execute the \texttt{ReadDOM} operation to access their content (\aosafigref{500l.same-origin-policy.fig-setdomain-2b}.) -\aosafigure[240pt]{same-origin-policy-images/fig-setdomain-2b.png}{Cross-origin counterexample at time 5}{500l.same-origin-policy.fig-setdomain-2b} +\aosafigure[240pt]{same-origin-policy-images/fig-setdomain-2b.png}{Cross-origin counterexample at time 4}{500l.same-origin-policy.fig-setdomain-2b} This attack points out one crucial weakness of the domain property method for cross-origin communication: The security of an application @@ -1225,6 +1208,11 @@ } \end{verbatim} +(\texttt{EventHandler} is a special type of call that must take place +sometime after another call, which is denoted by \texttt{causedBy}; we +will use event handlers to model actions that are performed by scripts +in response to browser events.) + Note that the callback function executed is the same as the one that's included in the response (\texttt{cb = resp.@cb}), but \emph{not} necessarily the same as \texttt{padding} in the original JSONP request. @@ -1359,8 +1347,10 @@ \emph{additionally} check the \texttt{srcOrigin} parameter to ensure that the message is coming from a trustworthy document. Unfortunately, in practice, many sites omit this check, enabling a malicious document -to inject bad content as part of a \texttt{PostMessage} {[}cite -PostMessage study{]}. +to inject bad content as part of a \texttt{PostMessage} \footnote{Sooel + Son and Vitaly Shmatikov. \emph{The Postman Always Rings Twice: + Attacking and Defending postMessage in HTML5 Websites}. Network and + Distributed System Security Symposium (NDSS), 2013.}. However, the omission of the origin check may not simply be the result of programmer ignorance. Implementing an appropriate check on an @@ -1446,7 +1436,9 @@ resource is considered public and accessible to anyone. However, it turns out that many sites use ``*'' as the default value even for private resources, inadvertently allowing malicious scripts to access -them through CORS requests {[}cite CORS study{]}. +them through CORS requests \footnote{Sebastian Lekies, Martin Johns, and + Walter Tighzert. \emph{The State of the Cross-Domain Nation}. Web 2.0 + Security and Privacy (W2SP), 2011.}. Why would a developer ever use the wildcard? It turns out that specifying the allowed origins can be tricky, since it may not be clear @@ -1484,14 +1476,15 @@ \aosasecti{Appendix: Reusing Modules in Alloy}\label{appendix-reusing-modules-in-alloy} -As mentioned earlier in this chapter, Alloy makes no assumptions about -the behavior of the system being modeled. The lack of a built-in -paradigm allows the user to encode a wide range of modeling idioms using -a small core of the basic language constructs. We could, for example, -specify a system as a state machine, a data model with complex -invariants, a distributed event model with a global clock, or whatever -idiom is most suitable for the problem at hand. Commonly used idioms can -be captured as a generic module and reused across multiple systems. +\label{500l.sop.appendix} As mentioned earlier in this chapter, Alloy +makes no assumptions about the behavior of the system being modeled. The +lack of a built-in paradigm allows the user to encode a wide range of +modeling idioms using a small core of the basic language constructs. We +could, for example, specify a system as a state machine, a data model +with complex invariants, a distributed event model with a global clock, +or whatever idiom is most suitable for the problem at hand. Commonly +used idioms can be captured as a generic module and reused across +multiple systems. In our model of the SOP, we model the system as a set of endpoints that communicate with each other by making one or more \emph{calls}. Since diff --git a/tex/sampler.tex b/tex/sampler.tex index 73a4c2e54..0fd500ea1 100644 --- a/tex/sampler.tex +++ b/tex/sampler.tex @@ -1,11 +1,5 @@ \begin{aosachapter}{A Rejection Sampler}{s:sampler}{Jessica B. Hamrick} -\emph{Jess is a Ph.D.~student at UC Berkeley where she studies human -cognition by combining probabilistic models from machine learning with -behavioral experiments from cognitive science. In her spare time, Jess -is a core contributor to IPython and Jupyter. She also holds a B.S. and -M.Eng. in Computer Science from MIT.} - \aosasecti{Introduction}\label{introduction} Frequently, in computer science and engineering, we run into problems diff --git a/tex/spreadsheet.tex b/tex/spreadsheet.tex index d6da388a8..49ded8dd7 100644 --- a/tex/spreadsheet.tex +++ b/tex/spreadsheet.tex @@ -1,13 +1,5 @@ \begin{aosachapter}{Web Spreadsheet}{s:spreadsheet}{Audrey Tang} -\emph{A self-educated programmer and translator, Audrey works with Apple -as an independent contractor on cloud service localization and natural -language technologies. Audrey has previously designed and led the first -working Perl 6 implementation, and served in computer language design -committees for Haskell, Perl 5, and Perl 6. Currently Audrey is a -full-time g0v contributor and leads Taiwan's first e-Rulemaking -project.} - This chapter introduces a \href{http://audreyt.github.io/500lines/spreadsheet/}{web spreadsheet} written in diff --git a/tex/static-analysis.tex b/tex/static-analysis.tex index bcd4fd773..532d7e2cc 100644 --- a/tex/static-analysis.tex +++ b/tex/static-analysis.tex @@ -1,9 +1,5 @@ \begin{aosachapter}{Static Analysis}{s:static-analysis}{Leah Hanson} -\emph{Leah Hanson is a proud alumni of Hacker School and loves helping -people learn about Julia. She blogs at \url{http://blog.leahhanson.us/} -and tweets at \href{https://twitter.com/astrieanna}{@astrieanna}.} - \aosasecti{Introduction}\label{introduction} You may be familiar with a fancy IDE that draws red underlines under diff --git a/tex/web-server.tex b/tex/web-server.tex index 5add5e926..e48fee3e3 100644 --- a/tex/web-server.tex +++ b/tex/web-server.tex @@ -1,13 +1,5 @@ \begin{aosachapter}{A Simple Web Server}{s:web-server}{Greg Wilson} -\emph{\href{https://twitter.com/gvwilson}{Greg Wilson} is the founder of -Software Carpentry, a crash course in computing skills for scientists -and engineers. He has worked for 30 years in both industry and academia, -and is the author or editor of several books on computing, including the -2008 Jolt Award winner \emph{Beautiful Code} and the first two volumes -of \emph{The Architecture of Open Source Applications}. Greg received a -PhD in Computer Science from the University of Edinburgh in 1993.} - \aosasecti{Introduction}\label{introduction} The web has changed society in countless ways over the last two decades,