Browse Source

Added code to scrape iFixit smartphone website and generate lists of each metric

pull/1/head
Ravi Shah 5 years ago
parent
commit
8690994b93
  1. 52
      scraper.py

52
scraper.py

@ -1 +1,53 @@
import time
from bs4 import BeautifulSoup
import requests
import re
def get_device_manufacturers(device_names):
list_of_manufacturers_without_tags = []
for i in device_names:
i = str(i)
i = i.replace('<div class="cell device-name">\n','')
i = i.lstrip()
i = i.split('<')[0]
i = i.rstrip()
list_of_manufacturers_without_tags.append(i)
return list_of_manufacturers_without_tags
def get_device_models(device_names, soup):
list_of_models_without_tags = []
list_of_models = soup.findAll("span", {"class": "selected"})
for i in list_of_models:
i = str(i)
i = i.replace('<span class="selected">','').replace('</span>','')
list_of_models_without_tags.append(i)
return list_of_models_without_tags
def format_device_scores(device_scores, soup):
list_of_scores_without_tag = []
list_of_scores = soup.findAll("h3")
for i in list_of_scores:
i = str(i)
i = i.replace('<h3>','').replace('</h3>','')
list_of_scores_without_tag.append(i)
return list_of_scores_without_tag
def data_from_each(list_of_manufacturers_without_tags, list_of_models_without_tags, list_of_scores_without_tag):
j = 0
for i in list_of_manufacturers_without_tags:
print(list_of_manufacturers_without_tags[j], list_of_models_without_tags[j], list_of_scores_without_tag[j])
j = j + 1
def main():
link = "https://www.ifixit.com/smartphone-repairability"
page = requests.get(link)
soup = BeautifulSoup(page.content, 'html.parser')
device_names = soup.findAll("div", {"class": "cell device-name"})
device_scores = soup.findAll("div", {"class": "cell device-score"})
list_of_manufacturers_without_tags = get_device_manufacturers(device_names)
list_of_models_without_tags = get_device_models(device_names, soup)
list_of_scores_without_tag = format_device_scores(device_scores, soup)
data_from_each(list_of_manufacturers_without_tags, list_of_models_without_tags, list_of_scores_without_tag)
if __name__ == '__main__':
main()
Loading…
Cancel
Save