diff options
author | Calvin Morrison <calvin@fastmailteam.com> | 2023-03-14 13:39:22 -0400 |
---|---|---|
committer | Calvin Morrison <calvin@fastmailteam.com> | 2023-03-14 13:39:22 -0400 |
commit | 1041db2cd2040db777bea22a309c200c21c0ebb5 (patch) | |
tree | efdc881764f15ead9b137835658e66d401ba9382 /scrape_technical.py |
initial commit
Diffstat (limited to 'scrape_technical.py')
-rw-r--r-- | scrape_technical.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/scrape_technical.py b/scrape_technical.py new file mode 100644 index 0000000..6e0fe13 --- /dev/null +++ b/scrape_technical.py @@ -0,0 +1,26 @@ + +import csv +import sys +import json +from selenium import webdriver +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.by import By + + +driver = webdriver.Chrome() + +products = {}; +for line in sys.stdin.readlines(): + + url = line.strip() + driver.get(url); + + pname = driver.find_element(By.CSS_SELECTOR, ".productView-info-value--sku").get_attribute("innerHTML"); + print(pname); + products[pname] = {}; + for technical in driver.find_elements(By.CSS_SELECTOR, ".productView-table.technical .productView-table-row"): + header = technical.find_element(By.CSS_SELECTOR, ".productView-table-header").get_attribute("innerHTML"); + data = technical.find_element(By.CSS_SELECTOR, ".productView-table-data").get_attribute("innerHTML"); + products[pname][header] = data; + +print(json.dumps(products)); |