After scraping and analyzing Facebook as a graph (looking at it as a network and seeing how many hops you had to take to get between any two nodes, just like the six degrees of separation hypothesis), I became interested in other things that could be seen as a network. I tried analyzing words via their synonyms by scraping a thesaurus, hoping that I'd be able to show that two words that clearly don't mean the same are technically the synonym of synonyms and stuff like that. I also did the same thing with YouTube videos, scraping them and trying to map out the network of "related videos". Main question was: how many steps would it take to get from a video of your choosing to some set video, like say "Shake It Off" by Taylor Swift. I don't think any of these projects yielded any interesting results.
read more
#! /usr/bin/env python
import csv
import gdata.youtube
import gdata.youtube.service
client = gdata.youtube.service.YouTubeService()
feed=client.GetRecentlyFeaturedVideoFeed()
startingID= 'Fjlw1CGn5qM'
reader = csv.reader(open('youtube.csv','rb'), delimiter=',')
writer = csv.writer(open('youtube.csv','a'), delimiter=',')
completedNodes = []
for row in reader:
list = row[0::1]
completedNodes.append(list[0])
print completedNodes
def parser(links):
filteredLinks = []
for link in links:
filteredLinks.append(link[32:43])
return filteredLinks
def storeNextIDs(currentID,doOne,linksToDo1,linksToDo2,completedNodes,exception):
if (currentID not in completedNodes):
unfilteredLinks = []
relatedVideos = client.GetYouTubeRelatedVideoFeed(video_id=currentID)
for entry in relatedVideos.entry:
unfilteredLinks.append(entry.media.player.url)
filteredLinks = []
filteredLinks.append(currentID)
filteredLinks.extend(parser(unfilteredLinks))
writer.writerow(filteredLinks)
##print(filteredLinks)
nextLinks = filteredLinks[1::1]
if doOne == True:
linksToDo1.extend(nextLinks)
if len(linksToDo2) == 0:
doOne = False
tempLinks = linksToDo1[:]
for link in tempLinks:
linksToDo1.remove(link)
storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)
else:
linksToDo2.extend(nextLinks)
if len(linksToDo1) == 0:
doOne = True
tempLinks1 = linksToDo2[:]
for link in tempLinks1:
linksToDo2.remove(link)
storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)
if exception:
unfilteredLinks = []
relatedVideos = client.GetYouTubeRelatedVideoFeed(video_id=currentID)
for entry in relatedVideos.entry:
unfilteredLinks.append(entry.media.player.url)
filteredLinks = []
filteredLinks.append(currentID)
filteredLinks.extend(parser(unfilteredLinks))
##writer.writerow(filteredLinks)
##print(filteredLinks)
nextLinks = filteredLinks[1::1]
if doOne == True:
linksToDo1.extend(nextLinks)
if len(linksToDo2) == 0:
doOne = False
tempLinks = linksToDo1[:]
for link in tempLinks:
linksToDo1.remove(link)
storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)
else:
linksToDo2.extend(nextLinks)
if len(linksToDo1) == 0:
doOne = True
tempLinks1 = linksToDo2[:]
for link in tempLinks1:
linksToDo2.remove(link)
storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)
linksToDo1 = []
linksToDo2 = []
doOne = True
shouldBreak = False
if startingID in completedNodes:
reader = csv.reader(open('youtube.csv','rb'), delimiter=',')
for row in reader:
if shouldBreak == True:
break
else:
list1 = row[0::1]
print(list1)
items = list1[1::1]
print items
whoWeDo = list1[0]
for anItem in items:
if anItem not in completedNodes:
storeNextIDs(whoWeDo,doOne,linksToDo1,linksToDo2,completedNodes,True)
print whoWeDo
shouldBreak = True
break
else:
print("sup")
storeNextIDs(startingID,doOne,linksToDo1,linksToDo2,completedNodes,False)
##works fine if we stop when exactly all of the startingID's guys are in the list.
##works fine if we stop when all or more of the startingID's guys are in the list.
##DOES NOT WORK IF WE STOP WHEN NOT ALL OF THE STARTING ID'S guys ARE IN THE LIST
##I'm thinking about checking