Mapping Networks

January 29, 2012

After scraping and analyzing Facebook as a graph (looking at it as a network and seeing how many hops you had to take to get between any two nodes, just like the six degrees of separation hypothesis), I became interested in other things that could be seen as a network. I tried analyzing words via their synonyms by scraping a thesaurus, hoping that I'd be able to show that two words that clearly don't mean the same are technically the synonym of synonyms and stuff like that. I also did the same thing with YouTube videos, scraping them and trying to map out the network of "related videos". Main question was: how many steps would it take to get from a video of your choosing to some set video, like say "Shake It Off" by Taylor Swift. I don't think any of these projects yielded any interesting results.

#! /usr/bin/env python

import csv
import gdata.youtube
import gdata.youtube.service


client = gdata.youtube.service.YouTubeService()
feed=client.GetRecentlyFeaturedVideoFeed()
startingID= 'Fjlw1CGn5qM'
reader = csv.reader(open('youtube.csv','rb'), delimiter=',')
writer = csv.writer(open('youtube.csv','a'), delimiter=',')
completedNodes = []

for row in reader:
	list = row[0::1]
	completedNodes.append(list[0])

print completedNodes

def parser(links):
	filteredLinks = []
	for link in links:
		filteredLinks.append(link[32:43])
	return filteredLinks


def storeNextIDs(currentID,doOne,linksToDo1,linksToDo2,completedNodes,exception):
        if (currentID not in completedNodes):
                unfilteredLinks = []
                relatedVideos = client.GetYouTubeRelatedVideoFeed(video_id=currentID)
                for entry in relatedVideos.entry:
                        unfilteredLinks.append(entry.media.player.url)
                filteredLinks = []
                filteredLinks.append(currentID)
                filteredLinks.extend(parser(unfilteredLinks))
                writer.writerow(filteredLinks)
                ##print(filteredLinks)
                nextLinks = filteredLinks[1::1]
                if doOne == True:
                        linksToDo1.extend(nextLinks)
                        if len(linksToDo2) == 0:
                                doOne = False
                                tempLinks = linksToDo1[:]
                                for link in tempLinks:
                                        linksToDo1.remove(link)
                                        storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)


                else:
                        linksToDo2.extend(nextLinks)
                        if len(linksToDo1) == 0:
                                doOne = True
                                tempLinks1 = linksToDo2[:]
                                for link in tempLinks1:
                                        linksToDo2.remove(link)
                                        storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)


        if exception:
                unfilteredLinks = []
                relatedVideos = client.GetYouTubeRelatedVideoFeed(video_id=currentID)
                for entry in relatedVideos.entry:
                        unfilteredLinks.append(entry.media.player.url)
                filteredLinks = []
                filteredLinks.append(currentID)
                filteredLinks.extend(parser(unfilteredLinks))
                ##writer.writerow(filteredLinks)
                ##print(filteredLinks)
                nextLinks = filteredLinks[1::1]
                if doOne == True:
                        linksToDo1.extend(nextLinks)
                        if len(linksToDo2) == 0:
                                doOne = False
                                tempLinks = linksToDo1[:]
                                for link in tempLinks:
                                        linksToDo1.remove(link)
                                        storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)


                else:
                        linksToDo2.extend(nextLinks)
                        if len(linksToDo1) == 0:
                                doOne = True
                                tempLinks1 = linksToDo2[:]
                                for link in tempLinks1:
                                        linksToDo2.remove(link)
                                        storeNextIDs(link,doOne,linksToDo1,linksToDo2,completedNodes,False)




                
linksToDo1 = []
linksToDo2 = []
doOne = True
shouldBreak = False
if startingID in completedNodes:
        reader = csv.reader(open('youtube.csv','rb'), delimiter=',') 
        for row in reader:
                if shouldBreak == True:
                        break
                else:
                        
                        list1 = row[0::1]
                        print(list1)
                        items = list1[1::1]
                        print items
                        whoWeDo = list1[0]
                        for anItem in items:
                                if anItem not in completedNodes:
                                        storeNextIDs(whoWeDo,doOne,linksToDo1,linksToDo2,completedNodes,True)
                                        print whoWeDo
                                        shouldBreak = True
                                        break

else:
        print("sup")
        storeNextIDs(startingID,doOne,linksToDo1,linksToDo2,completedNodes,False)







##works fine if we stop when exactly all of the startingID's guys are in the list.
##works fine if we stop when all or more of the startingID's guys are in the list.
##DOES NOT WORK IF WE STOP WHEN NOT ALL OF THE STARTING ID'S guys ARE IN THE LIST
##I'm thinking about checking