Check-in [06ec941e60]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Delete Idris source, add Literate Programming document
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 06ec941e6024f56b388889a9cab41ad3018064b553d87a650824f3da11aed5a7
User & Date: sehqlr 2017-12-17 21:56:51.465
Context
2017-12-21
02:00
Renamed the org file, and finally got the GET requests to work check-in: fdaa20eb53 user: sehqlr tags: trunk
2017-12-17
21:56
Delete Idris source, add Literate Programming document check-in: 06ec941e60 user: sehqlr tags: trunk
2017-12-05
17:19
more changes to org file check-in: 87058bb2cc user: sehqlr tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Deleted DataStore.idr.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
module Main

import Data.Vect

infixr 5 .+.

data Schema = SString
            | SInt
            | (.+.) Schema Schema

SchemaType : Schema -> Type
SchemaType SString = String
SchemaType SInt = Int
SchemaType (x .+. y) = (SchemaType x, SchemaType y)

record DataStore where
       constructor MkData
       schema : Schema
       size : Nat
       items : Vect size (SchemaType schema)

addToStore : (store : DataStore) -> SchemaType (schema store) -> DataStore
addToStore (MkData schema size store) newitem
           = MkData schema _ (addToData store)
  where
    addToData : Vect oldsize (SchemaType schema) ->
                Vect (S oldsize) (SchemaType schema)
    addToData [] = [newitem]
    addToData (item :: items) = item :: addToData items

data Command : Schema -> Type where
     SetSchema : (newschema : Schema) -> Command schema
     Add : SchemaType schema -> Command schema
     Get : Integer -> Command schema
     Quit : Command schema

parsePrefix : (schema : Schema) -> String -> Maybe (SchemaType schema, String)
parsePrefix SString input = getQuoted (unpack input)
  where
    getQuoted : List Char -> Maybe (String, String)
    getQuoted ('"' :: xs)
      = case span (/= '"') xs of
        (quoted, '"' :: rest) => Just (pack quoted, ltrim (pack rest))
        _ => Nothing
    getQuoted _ = Nothing
parsePrefix SInt input = case span isDigit input of
                              ("", rest) => Nothing
                              (num, rest) => Just (cast num, ltrim rest)
parsePrefix (schemal .+. schemar) input = do
                                        (l_val, input') <- parsePrefix schemal input
                                        (r_val, input'') <- parsePrefix schemar input'
                                        Just ((l_val, r_val), input')

parseBySchema : (schema : Schema) -> String -> Maybe (SchemaType schema)
parseBySchema schema input = case parsePrefix schema input of
                                  Nothing => Nothing
                                  Just (res, "") => Just res
                                  Just _ => Nothing

parseSchema : List String -> Maybe Schema
parseSchema ("String" :: xs) = case xs of
                                    [] => Just SString
                                    _ => case parseSchema xs of
                                               Nothing => Nothing
                                               Just xs_sch => Just (SString .+. xs_sch)
parseSchema ("Int" :: xs) = case xs of
                                 [] => Just SInt
                                 _ => case parseSchema xs of
                                            Nothing => Nothing
                                            Just xs_sch => Just (SInt .+. xs_sch)
parseSchema _ = Nothing

parseCommand : (schema : Schema) -> String -> String -> Maybe (Command schema)
parseCommand schema "add" rest = case parseBySchema schema rest of
                                      Nothing => Nothing
                                      Just restok => Just (Add restok)
parseCommand schema "get" val = case all isDigit (unpack val) of
                                     False => Nothing
                                     True => Just (Get (cast val))
parseCommand schema "quit" "" = Just Quit
parseCommand schema "schema" rest = case parseSchema (words rest) of
                                         Nothing => Nothing
                                         Just schemaok => Just (SetSchema schemaok)
parseCommand _ _ _ = Nothing

parse : (schema : Schema) ->
        (input : String) -> Maybe (Command schema)
parse schema input = case span (/= ' ') input of
                          (cmd, args) => parseCommand schema cmd (ltrim args)

display : SchemaType schema -> String
display {schema = SString} item = show item
display {schema = SInt} item = show item
display {schema = (x .+. y)} (iteml, itemr)
        = display iteml ++ ", " ++ display itemr

getEntry : (pos : Integer) -> (store : DataStore) ->
           Maybe (String, DataStore)
getEntry pos store
  = let store_items = items store in
        case integerToFin pos (size store) of
             Nothing => Just ("Out of range\n", store)
             Just id => let display' = display (index id store_items) in
                            Just (display' ++ "\n", store)

setSchema : (store : DataStore) -> Schema -> Maybe DataStore
setSchema store schema = case size store of
                              Z => Just (MkData schema _ [])
                              (S k) => Nothing

processInput : DataStore -> String -> Maybe (String, DataStore)
processInput store input
  = case parse (schema store) input of
         Nothing => Just ("Invalid command\n", store)
         Just (Add item) =>
           Just ("ID " ++ show (size store) ++ "\n", addToStore store item)
         Just (Get pos) => getEntry pos store
         Just (SetSchema schema') => case setSchema store schema' of
                                           Nothing => Just ("Can't update schema\n", store)
                                           Just store' => Just ("OK\n", store')
         Just Quit => Nothing

main : IO ()
main = replWith (MkData SString _ [])
                "Command: " processInput
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


























































































































































































































































Changes to Project.org.
1




2
3
4
5

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


33
34
35
36
37
38
39
40
41



42
43
44
45

46
47

48








49
50
51



52
53
54



55
56
57
58
59




60
61

62
63

64
65
66
67
68



69
70
71
72




73
74
75
76
77



78
79
80

81
82
83
84




85
86
87


88
89
90
91

92
93
94
95
96


97


98


99
100
101
102
103
104
105
106
107

108
109









110



111





112
113





114
115

116
117
118
119


120
121




122
123
124

125




126

127

128
129
130

131

132

133
134
135
136
137
138
139
140
#+TITLE: Static: a semi-dynamic documents-as-database web content management system





This is a literate programming document for this project. Version control will
be Fossil SCM.


* What does the tagline mean?
  The tagline for this project is, "a database-as-document web content
  management system". Since these are pre-buzzwords, no one can guess the meaning.
  
** "semi-dynamic" means blending the best parts of static and dynamic sites
   Web sites and applications are either static files only or
   static template files injected with data, AKA "dynamic web
   pages". This system will be a hybrid of the two. The first
   iterations of this project will only support the static site
   half of the equation first, since the dynamic site features
   depend on them.
** "database-as-documents" means whole documents are stored in a database
   HTTP revolves around documents, which are typically stored within a web
   servers' file system. Dynamic sites combine static template files with
   queries to a data store. I want to create is a system where all of the
   documents on a server are wholly within a database system, and HTTP
   requests are mapped to SQL statements. There are a lot of technical
   implications with this kind of system, especially when it comes to
   security, which we'll explore.
** Web Content Management System means an integrated tool
   I want an integrated tool that handles the web server, document
   versioning, static site generation, and everything else that developers
   know how to do today, but can also be used by normal folks. I also want
   a tool that can run locally and remotely the same, like Fossil SCM.

* What this document is
  This is going to be a Literate Programming document that will contain a few things:


  1. an OpenAPI Specification (swagger) document
  2. Implementations
  3. Tests
  4. TODO lists to write all of these

  I plan on using Python as the language of the prototype, since I've worked
  with Python the longest. However, I also want to implement this in Haskell,
  since I love Haskell.




* Functional Description
  This is the beginnings of a specification that will be implemented as a runnable specification later.
** Interface
   The main interface will be a RESTful web service defined with an

   OpenAPI Specification document. It will behave like a standard web server
   serving up static files.










   #+NAME: base sketch
   #+BEGIN_SRC yaml :exports none :tangle swagger.yaml :noweb yes
     openapi: 3.0.0



     info:
       title: Static
       description: >-



         a semi-dynamic database-as-documents web content management system
       version: 0.0.1

     paths:
       /documents:




         get:
           summary: Returns paths for all documents currently in the database

           description: >-
             This returns an 

   #+END_SRC

   I will leverage code generation tools to create the clients. According to the
   swagger code-gen docs, you can generate a Bash client, so that could cover a
   basic CLI as well.




   If front end or other graphical interfaces would benefit from a secondary
   protocol, we can add that later. The prototypes and alpha/beta versions will
   target the RESTful API only.





** Components
   These are the main components of static, starting from the HTTP service to
   the DB service. The components will start as Idris modules that come together
   in Main.main.




   Each of these components will be Literate Idris files that describe the
   implementation. Therefore, in the long run, this section will be peeled off

   into those documents.
*** HTTP connection
    This service sends and receives HTTP requests. The main requirement for this
    service is to be responsive. This service could be internal to the app, or




    it could be provided by an external service, like Nginx or Apache. The
    internal version will be developed first, and that will be what runs locally
    when you run =static= to view your site. The option to link 



    The first HTTP verb that will be supported is GET, which will form the basis
    of the "documents-as-database" paradigm. Each document path available from
    GET will be mapped to a row in a SQLite database by the other components.


    Other verbs will be needed for the web content management system part, which
    will be one or more front ends to the database. My eventual goal is to build
    an interface that can generate a rudimentary front end that ships with the
    main project, and guides the creation of better front ends.





    This component would encapsulate the effects of interacting with HTTP


    clients.
*** HTTP/SQL Transformer
    This service revolves around one function, =fetch=, a sketch with is here:
    #+BEGIN_EXAMPLE
      fetch : HTTP -> HTTP
      fetch req = toHTTP (toSQL req)
        where
          toSQL : HTTP -> SQL
          toHTTP : SQL -> HTTP

    #+END_EXAMPLE
    The function may have to be split into separate functions, we'll see what









    makes the most sense. It really depends on how complex it is to parse HTML



    and SQL and transform the two into each other. I anticipate complexity.






    This module will be pure, with all the transformations being total functions.





*** sqlite database connection
    This component will connect to the sqlite database where the documents are

    kept. It sends queries, and extracts the document from the query results.
    This component should also do some auditing of the requests to try to
    mitigate attacks, but if that gets involved, that should be made into its
    own component.



    As for the database schema, the naive one is this:




    #+BEGIN_SRC sql
      CREATE TABLE documents (
      id PRIMARY KEY,

      fpath TEXT,




      content BLOB

      )

    #+END_SRC
    The idea here is that each row has a file path and the content. The query to
    get the document would look like this:

    #+BEGIN_SRC sql

      SELECT content FROM documents WHERE fpath = $FPATH

    #+END_SRC
    The =?FPATH= variable would be safely injected by the =fetch= function
    mentioned above. Then, once the DB returns the results of that query, the
    document is serialized into an HTTP response by the transform layer. 

    This component would encapsulate the effects of interfacing with the DB.
*** Main
    =Main.main= would wire up the connections between all the components.

>
>
>
>




>
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
>
>
|
|
<
<

|
|
|

>
>
>
|
|
|
<
>
|
|
>

>
>
>
>
>
>
>
>
|
|
<
>
>
>
|
<
<
>
>
>
|
|
|
|
|
>
>
>
>
|
|
>
|
<
>
|
|
<
<
<
>
>
>

<
<
<
>
>
>
>

<
<
|
|
>
>
>

<
<
>
|
|
|
<
>
>
>
>
|
|
<
>
>

|
|
|
>

<
|
|
|
>
>

>
>
|
>
>
|
<
|
|
<
<
|
<
<
>
|
|
>
>
>
>
>
>
>
>
>
|
>
>
>
|
>
>
>
>
>

<
>
>
>
>
>
|
|
>
|
|
|
<
>
>

<
>
>
>
>
|
<
<
>
|
>
>
>
>
|
>
|
>
|
<
<
>
|
>
|
>
|
|
<
<

<
<
<
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

68
69
70
71


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

88
89
90



91
92
93
94



95
96
97
98
99


100
101
102
103
104
105


106
107
108
109

110
111
112
113
114
115

116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
131
132
133
134
135

136
137


138


139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

162
163
164
165
166
167
168
169
170
171
172

173
174
175

176
177
178
179
180


181
182
183
184
185
186
187
188
189
190
191


192
193
194
195
196
197
198


199



#+TITLE: Static: a semi-dynamic documents-as-database web content management system

# Local Variables:
# org-src-preserve-indentation: t
# End:

This is a literate programming document for this project. Version control will
be Fossil SCM.

* Overview
** What does the tagline mean?
   The tagline for this project is, "a database-as-document web content
   management system". Since these are pre-buzzwords, no one can guess the meaning.
  
*** "semi-dynamic" means blending the best parts of static and dynamic sites
    Web sites and applications are either static files only or
    static template files injected with data, AKA "dynamic web
    pages". This system will be a hybrid of the two. The first
    iterations of this project will only support the static site
    half of the equation first, since the dynamic site features
    depend on them.
*** "database-as-documents" means whole documents are stored in a database
    HTTP revolves around documents, which are typically stored within a web
    servers' file system. Dynamic sites combine static template files with
    queries to a data store. I want to create is a system where all of the
    documents on a server are wholly within a database system, and HTTP
    requests are mapped to SQL statements. There are a lot of technical
    implications with this kind of system, especially when it comes to
    security, which we'll explore.
*** Web Content Management System means an integrated tool
    I want an integrated tool that handles the web server, document
    versioning, static site generation, and everything else that developers
    know how to do today, but can also be used by normal folks. I also want
    a tool that can run locally and remotely the same, like Fossil SCM.

** What this document is
   This is going to be a Literate Programming document, where I sketch out the
   system from a high level and then drill down on each part. Each part should
   include any testing that verifies the code. Because =org-mode= makes it really
   easy to tangle one document into multiple files, the code base will conform to
   a conventional shape for whatever language it is in.



   I plan on using Python as the language of the first prototype, since I've
   worked with Python the longest. However, once I understand that
   implementation, I plan on rewriting it in Haskell.

* The Code
  In this section, I specify and then implement the different components of this
  application, and define some tests for them as well. Each component will have
  its own section.

** Document schema

   All of the code snippets below assume this schema for the =documents= table.
   #+NAME: documents schema
   #+BEGIN_SRC sql
     DROP TABLE IF EXISTS documents;

     CREATE TABLE documents (
     id PRIMARY KEY,
     fpath TEXT,
     content BLOB
     );
   #+END_SRC
** Running the server
   This is lifted directly from the [[https://docs.python.org/3/library/http.server.html][=http.server= documentation on python.org]].
   #+NAME: run the server
   #+BEGIN_SRC python 

     def run(server_class=HTTPServer, handler_class=BaseHTTPRequestHandler):
         server_address = ('', 8000)
         httpd = server_class(server_address, handler_class)
         httpd.serve_forever()


   #+END_SRC
** Make file
   Since I'm going to be dealing with a server and a database, I'm going to
   write a =Makefile= to manage it.
*** =website.db= target
    Create a fresh DB file when we change the schema
    #+NAME: website make target
    #+BEGIN_SRC makefile
      website.db: schema.sql
        $(RM) website.db
        cat schema.sql | sqlite3 website.db
    #+END_SRC
*** =all= target
    #+NAME: all make target
    #+BEGIN_SRC makefile
    .PHONY: all

    all: database
    #+END_SRC
** =StaticRequestHandler= class



   The next thing I need to do is define =StaticRequestHandler=, the class that
   I'll pass into the =run= function. I'll start by defining a new class that
   inherits from =BaseHTTPRequestHandler=.




   #+BEGIN_SRC python
     class StaticRequestHandler(BaseHTTPRequestHandler):
         pass
   #+END_SRC



   #+NAME: StaticRequestHandler class
   #+BEGIN_SRC python :export none :noweb yes
     class StaticRequestHandler(BaseHTTPRequestHandler):
         <<GET method>>
   #+END_SRC



   The =StaticRequestHandler= class will match the URL of the request to a SQL
   query, send that query, get the document, and return the document to the
   HTTP client.


*** Get a document by ID
    If you know what document you need by ID, fetch that from the DB directly.
    I believe that this will be the easiest to implement, and it'll be what is
    used for accessing a page long term. In this way, the URL for these will be
    like a citation link.


    According the docs, I my Handler needs to define a method named =do_GET= to
    process =GET= requests. I'll add that method first.

    #+BEGIN_SRC python
      def do_GET(self):
          pass
    #+END_SRC


    #+NAME: GET method
    #+BEGIN_SRC python
      def do_GET(self):
          self.wfile = self.path
    #+END_SRC

    According to the docs, the path of the request is stored in =self.path=. At
    first, I was going to write a test/REPL command to take a look at the type
    interactively, but I forgot that Python has cOOPling making it really hard
    to test in the small. I guess I'll have to write a make target to run the
    server and test it in the shell.


    #+NAME: server make target
    #+BEGIN_SRC makefile


      server:


        chmod 755 static.py && ./static.py
    #+END_SRC

*** Get a document by path
    This is the original vision for this project. The =static= web server
    should behave exactly like a regular web server, where paths correspond
    with documents.
*** Error pages, especially 404
    If the document doesn't exist in the database, the class should be able to
    return a 404 page, preferably from the database as well. That leads me to
    conclude that the database should have error pages in a separate table
    where they are indexed solely by HTTP status code. 

    #+NAME: error pages schema
    #+BEGIN_SRC sql
      DROP TABLE IF EXISTS error_pages;

      CREATE TABLE error_pages (
      status_code INT PRIMARY KEY,
      page BLOB
      );
    #+END_SRC


* Files
** =static.py=
   #+BEGIN_SRC python :tangle static.py :noweb yes
     #!/usr/bin/env python3
     from http.server import HTTPServer, BaseHTTPRequestHandler
     import sqlite3

     <<run the server>>

     <<StaticRequestHandler class>>


     def main():
         run(handler_class=StaticRequestHandler)


     if __name__ == "__main__":
         main()
   #+END_SRC
** =test.py=
   #+BEGIN_SRC python :tangle test.py :noweb yes


   import unittest
   import .static
   #+END_SRC
** =Makefile=
   #+BEGIN_SRC makefile :tangle Makefile :noweb yes
     <<all make target>>

     <<website make target>>

     <<server make target>>
   #+END_SRC


** =schema.sql=
   #+BEGIN_SRC sql :tangle schema.sql :noweb yes
     <<docuemnts schema>>

     <<error pages schema>>
   #+END_SRC
   






Deleted main.idr.
1
2
module Main

<
<