# setup
import sqlite3
import comp521

check, report = comp521.start('A5')

import json
books = json.load(file('books.json'))


# I'll create an in-memory database for you. Do all your work in here
import sqlite3
db = sqlite3.Connection(':memory:')
cursor = db.cursor()


# Write code here to create the tables
# use the db variable from above

#Apparently primary key INTs are autoincremented automatically in SQLite

cursor.execute("CREATE TABLE authors (\
email VARCHAR DEFAULT NULL, \
first_name VARCHAR DEFAULT NULL, \
last_name VARCHAR DEFAULT NULL, \
login VARCHAR DEFAULT NULL, \
PRIMARY KEY(login))")

cursor.execute("CREATE TABLE language (\
languageCode VARCHAR DEFAULT NULL, \
PRIMARY KEY(languageCode))")


cursor.execute("CREATE TABLE images (\
url VARCHAR DEFAULT NULL, \
width VARCHAR DEFAULT NULL, \
height INT,\
PRIMARY KEY(url)\
)")


cursor.execute("CREATE TABLE books (\
title VARCHAR DEFAULT NULL, \
created VARCHAR DEFAULT NULL, \
bid INTEGER,\
languageCode VARCHAR DEFAULT NULL, \
login VARCHAR DEFAULT NULL,\
PRIMARY KEY(bid),\
FOREIGN KEY(languageCode) REFERENCES language(languageCode),\
FOREIGN KEY(login) REFERENCES authors(login)\
)")

cursor.execute("CREATE TABLE category (\
categoryTitle VARCHAR DEFAULT NULL, \
cid INTEGER,\
PRIMARY KEY(cid)\
)")

cursor.execute("CREATE TABLE categoryRelations (\
crid INTEGER,\
bid INT,\
cid INT,\
PRIMARY KEY(crid)\
FOREIGN KEY(bid) REFERENCES books(bid),\
FOREIGN KEY(cid) REFERENCES category(cid)\
)")

cursor.execute("CREATE TABLE pages (\
caption VARCHAR DEFAULT NULL, \
pid INTEGER,\
bid INT,\
url VARCHAR DEFAULT NULL, \
PRIMARY KEY(pid),\
FOREIGN KEY(bid) REFERENCES books(bid),\
FOREIGN KEY(url) REFERENCES images(url)\
)")

<sqlite3.Cursor at 0x107c64dc0>


# Write your code here, use the same db as above
# the source data is already in the variable books
# you'll, of course, need to iterate over it.

for book in books:
    #check if language code exists
    languageDiscovery = cursor.execute("SELECT languageCode FROM language WHERE languageCode=?",(book['language'],)).fetchone() 
    if languageDiscovery != None:
        languageIDToInsert = languageDiscovery[0]
    else: 
        cursor.execute("INSERT INTO language (languageCode) VALUES (?)",(book['language'],))
        languageIDToInsert = cursor.execute("SELECT languageCode FROM language WHERE languageCode=?",(book['language'],)).fetchone()[0] 
    
    #check if author/login exists
    authorDiscovery = cursor.execute("SELECT login FROM authors WHERE login=?",(book['login'],)).fetchone()
    if authorDiscovery != None:
        authorLoginToInsert = authorDiscovery[0]
    else: 
        cursor.execute("INSERT INTO authors (login,email,first_name,last_name) VALUES (?,?,?,?)",(book['login'],book['email'],book['first_name'],book['last_name']))
        authorLoginToInsert = cursor.execute("SELECT login FROM authors WHERE login=?",(book['login'],)).fetchone()[0] 
    
    #actually make the book object with these discovered ids
    cursor.execute("INSERT INTO books (title,created,languageCode,login) VALUES (?,?,?,?)",(book['title'],book['created'],languageIDToInsert,authorLoginToInsert))
    
    #get the bookid which we'll need for tying in the other tables
    bookID = cursor.lastrowid
    #^^this could be a problem, not sure if it's really selecting what we want it to. 
    
    #make the new categories if and add a row to our relations table (indicating the many-to-many relation)
    for category in book['categories']:
        categoryDiscovery = cursor.execute("SELECT cid FROM category WHERE categoryTitle=?",(category,)).fetchone()
        if categoryDiscovery != None:
            categoryIDToInsert = categoryDiscovery[0]
        else: 
            cursor.execute("INSERT INTO category (categoryTitle) VALUES (?)",(category,))
            categoryIDToInsert = cursor.execute("SELECT cid FROM category WHERE categoryTitle=?",(category,)).fetchone()[0] 

        cursor.execute("INSERT INTO categoryRelations (bid,cid) VALUES (?,?)",(bookID,categoryIDToInsert))
    
    #go through each page and add it
    for page in book['pages']:
        #check to see if the image for that page has been created
        imageDiscovery = cursor.execute("SELECT url FROM images WHERE url=?",(page['url'],)).fetchone() 
        if imageDiscovery != None:
            imageURLToInsert = imageDiscovery[0]
        else: 
            cursor.execute("INSERT INTO images (url,width,height) VALUES (?,?,?)",(page['url'],page['width'],page['height']))
            imageURLToInsert = cursor.execute("SELECT url FROM images WHERE url=?",(page['url'],)).fetchone()[0] 
        
        cursor.execute("INSERT INTO pages (caption,bid,url) VALUES (?,?,?)",(page['caption'],bookID,imageURLToInsert))
   
    
'''
cursor.execute("SELECT * FROM books")
print cursor.fetchone()
cursor.execute("SELECT * FROM category")
print cursor.fetchone()
cursor.execute("SELECT * FROM categoryRelations")
print cursor.fetchone()
cursor.execute("SELECT * FROM pages")
print cursor.fetchone()
cursor.execute("SELECT * FROM images")
print cursor.fetchone()
'''

'\ncursor.execute("SELECT * FROM books")\nprint cursor.fetchone()\ncursor.execute("SELECT * FROM category")\nprint cursor.fetchone()\ncursor.execute("SELECT * FROM categoryRelations")\nprint cursor.fetchone()\ncursor.execute("SELECT * FROM pages")\nprint cursor.fetchone()\ncursor.execute("SELECT * FROM images")\nprint cursor.fetchone()\n'


# a helper for the questions below
def getTableSize(name):
    return cursor.execute('''select count(*) from %s''' % name).fetchone()[0]

# How many entries are in your authors table?
NumberOfAuthors = getTableSize("authors")
check("Part 2 Number Of Authors", NumberOfAuthors, points=5)

# How many images are in your table?
NumberOfImages = getTableSize("images")
check("Part 2 Number Of Images", NumberOfImages, points=5)

# How many pages?
NumberOfPages = getTableSize("pages")
check("Part 2 Number of Pages", NumberOfPages, points=5)

Part 2 Number Of Authors correct
Part 2 Number Of Images correct
Part 2 Number of Pages correct


def listCategoryCounts(language):
    cursor.execute("SELECT C.categoryTitle,COUNT(B.bid) FROM books B, category C, categoryRelations CR WHERE \
    B.languageCode = ? \
    AND \
    B.bid = CR.bid \
    AND \
    CR.cid = C.cid \
    GROUP BY C.categoryTitle \
    ORDER BY C.categoryTitle ASC",(language,))
    
    return cursor.fetchall()

r = listCategoryCounts('en')

check('Part 3', r, points=20)

Part 3 correct


#The "first image" part of this problem was really annoying. 

def listBooks(language, category, chunk):
    # language is the 2-letter language code
    # category is a category name
    # chunk is an integer. 0 means the first 12 books, 1 means the next 12, etc.
    # write your code here
    cursor.execute("SELECT B.bid,B.title,A.last_name,COUNT(P.pid),I2.URL\
    FROM books B, category C, categoryRelations CR, authors A, pages P, images I\
    LEFT OUTER JOIN images I2 WHERE \
    B.languageCode = ? \
    AND \
    C.categoryTitle = ? \
    AND \
    B.bid = CR.bid \
    AND \
    CR.cid = C.cid \
    AND \
    B.login = A.login \
    AND \
    P.url = I.url \
    AND \
    P.bid = B.bid \
    AND \
    I2.URL = (SELECT P2.url FROM pages P2 WHERE P2.bid = B.bid ORDER BY P2.pid)\
    GROUP BY B.bid \
    ORDER BY B.created DESC \
    LIMIT ?*12,12",(language,category,chunk))
    
    return cursor.fetchall()

r1 = listBooks('en', 'Animals and Nature', 0)

# I'm stripping off the ID before testing
check('Part 4 page 1', [ r[1:] for r in r1 ], points=10)

r2 = listBooks('en', 'Animals and Nature', 1)
toRead = r2[-1][0] # get the id of the last book on the second page, we'll read it below
# I'm stripping off the ID before testing
check('Part 4 page 2', [ r[1:] for r in r2], points=10)

[(999, u'Seaside Aquarium', u'Simpson', 8, u'/cache/images/21/122430121_a653d2f8ab.jpg'), (998, u'The Funny Crab', u'Bennett', 5, u'/cache/images/49/5638860949_d89877e19d.jpg'), (994, u'Polar Bears', u'Rogers', 10, u'/cache/images/70/3582475670_df8e8e8974.jpg'), (993, u'Horse', u'Rice', 6, u'/cache/images/79/57632579_c7482974fe.jpg'), (990, u'The Mouse', u'Kelly', 13, u'/cache/images/39/8093539_473996654f.jpg'), (936, u'Cats', u'Patterson', 8, u'/cache/images/70/5110834170_0797f39278.jpg'), (987, u'The Tired Lion', u'Bennett', 4, u'/cache/images/57/5657669257_d4be2a3f98.jpg'), (977, u'My book on how to help the Earth.', u'Harper', 8, u'/cache/images/48/2435883148_0eba86944f.jpg'), (976, u'Help the Earth', u'Harper', 7, u'/cache/images/77/3717759677_4a520a1dbb.jpg'), (975, u'My book about how to save the Earth', u'Harper', 8, u'/cache/images/77/3717759677_4a520a1dbb.jpg'), (986, u'Farm Animals', u'Reynolds', 8, u'/cache/images/89/291005289_a05ae73053.jpg'), (982, u"Sharp's Survival Chance", u'Bennett', 5, u'/cache/images/09/14539630909_c269d1423e.jpg')]

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-6bb4e19381b8> in <module>()
     13 
     14 # I'm stripping off the ID before testing
---> 15 check('Part 4 page 1', [ r[1:] for r in r1 ], points=10)
     16 
     17 r2 = listBooks('en', 'Animals and Nature', 1)

TypeError: 'NoneType' object is not iterable


def fetchBook(id):
    book = {} # fill in the values here
    bookRow = cursor.execute("SELECT title,created,languageCode,B.login,email,first_name,last_name FROM books B,authors A WHERE B.bid = ? AND B.login = A.login",(id,)).fetchone()
    book['title'] = bookRow[0]
    book['created'] = bookRow[1]
    book['language'] = bookRow[2]
    book['login'] = bookRow[3]
    book['email'] = bookRow[4]
    book['first_name'] = bookRow[5]
    book['last_name'] = bookRow[6]
    categoriesForBook = cursor.execute("SELECT categoryTitle FROM books B,categoryRelations CR,category C WHERE B.bid = ? AND CR.cid = C.cid AND CR.bid = B.bid",(id,)).fetchall()
    book['categories'] = [category[0] for category in categoriesForBook]
    pagesForBook = cursor.execute("SELECT I.url,width,height,caption FROM images I,pages P,books B WHERE B.bid = ? AND P.bid = B.bid AND I.url = P.url",(id,)).fetchall()
    jsonPagesForBook = []
    for page in pagesForBook:
        jsonPage = {}
        jsonPage['url'] = page[0]
        jsonPage['width'] = int(page[1])
        jsonPage['height'] = int(page[2])
        jsonPage['caption'] = page[3]
        jsonPagesForBook.append(jsonPage)
    book['pages'] = jsonPagesForBook
    
    #Write your code here
    
    return json.dumps(book, sort_keys=True)
    
b = fetchBook(toRead)
check('Part 5', b, points=30)

Part 5 correct


def listAuthor(login):
    # login is the unique login for an author
    cursor.execute("SELECT B.bid,B.title,A.last_name,COUNT(DISTINCT(P.pid)),I2.URL\
    FROM books B, category C, categoryRelations CR, authors A, pages P, images I\
    LEFT OUTER JOIN images I2 WHERE \
    B.bid = CR.bid \
    AND \
    CR.cid = C.cid \
    AND \
    B.login = A.login \
    AND \
    P.url = I.url \
    AND \
    P.bid = B.bid \
    AND \
    I2.URL = (SELECT P2.url FROM pages P2 WHERE P2.bid = B.bid ORDER BY P2.pid)\
    AND \
    A.login = ? \
    GROUP BY B.bid \
    ORDER BY B.created DESC \
    ",(login,))
    return cursor.fetchall()

r1 = listAuthor('jhansenrr')
# I'm stripping off the ID before testing
check('Part 6', [ r[1:] for r in r1 ], points=15)

Part 6 correct


onyen = 'jpuccio'
collaborators = ['Zen Yang',]
report(onyen, collaborators)

Part 2 Number Of Authors
Part 2 Number Of Images
Part 2 Number of Pages
Part 3
Part 4 page 1
Part 4 page 2
Part 5
Part 6
Report for jpuccio
  Collaborators: ['Zen Yang']
  8 of 8 correct, 100 of 100 points

Assignment 5¶

Your mission¶

1. Design the Database¶

2. Load the data into your database.¶

Verify your tables¶

3. List the categories with the number of books in each for a given language.¶

4. List the books in a category.¶

5. Fetch a single book¶

6. List the books by an author.¶

Done!¶