Newer
Older
from selenium import webdriver
from selenium.webdriver.common.by import By
def get_href(url, href_list):
"""
url: The url that you want to get all the possible hyperlinks
href_list: The list where you want to save all the hyperlink
"""
driver = webdriver.Chrome()
driver.get(url)
item_data_div_list = driver.find_elements(By.CSS_SELECTOR, '.list-item .item-data')
href_list = []
for item_data_div in item_data_div_list:
hyperlink = item_data_div.find_element(By.CSS_SELECTOR, 'a.title')
href = hyperlink.get_attribute('href')
href_list.append(href)
driver.quit()
def write_href(file_name):
"""
Save all the hyperlink of cars in a file.
"""
href_list = []
for page_number in range(36):
url_pro_page = f'https://ev-database.org/#sort:path~type~order=.rank~number~desc|rs-price:prev~next=10000~100000|rs-range:prev~next=0~1000|rs-fastcharge:prev~next=0~1500|rs-acceleration:prev~next=2~23|rs-topspeed:prev~next=110~350|rs-battery:prev~next=10~200|rs-towweight:prev~next=0~2500|rs-eff:prev~next=100~350|rs-safety:prev~next=-1~5|paging:currentPage={page_number}|paging:number=10'
get_href(url_pro_page, href_list=href_list)
with open(file_name, mode="w+") as f:
for href in href_list:
f.write("%s\n" % href)
def read_href(file_name):
with open(file_name) as f:
return hrefs
################# Get Cell Infomation in Table ############################
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def get_all_information(url, charging_file_name, information_file_name):
driver = webdriver.Chrome()
driver.get(url)
model_name = url.split('/')[-1]
table_classes = ['charging-table-standard', 'charging-table-fast']
for table_class in table_classes:
table = driver.find_element(by=By.CLASS_NAME, value=table_class)
tablerows = table.find_elements(by=By.XPATH, value=".//tr")
table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'
with open(charging_file_name, mode='a', newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow([table_name])
writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
for tr in tablerows:
cells = tr.find_elements(by=By.XPATH, value=".//td")
row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
writer.writerow(row_data)
with open(information_file_name, "a", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
data_tables = driver.find_elements(by=By.CLASS_NAME, value="data-table")
for idx, data_table in enumerate(data_tables):
try:
category = data_table.find_element(by=By.XPATH, value=".//h2")
category_text = category.get_attribute("textContent").strip()
tables = data_table.find_elements(by=By.TAG_NAME, value="table")
for table in tables:
rows = table.find_elements(by=By.XPATH, value=".//tr")
for tr in rows:
cells = tr.find_elements(by=By.XPATH, value=".//td")
if len(cells) == 2:
attribute = cells[0]
attribute_text = attribute.get_attribute("textContent").strip()
value = cells[1]
value_text = value.get_attribute("textContent").strip()
writer.writerow([model_name, category_text, attribute_text, value_text])
except:
print("There aren't category")
driver.quit()
"""
Get the cell contect in the table 'Home and Destination Charging' and
'Fast Charging'
"""
driver = webdriver.Chrome()
driver.get(url)
model_name = url.split('/')[-1]
table_classes = ['charging-table-standard', 'charging-table-fast']
for table_class in table_classes:
table = driver.find_element(by=By.CLASS_NAME, value=table_class)
tablerows = table.find_elements(by=By.XPATH, value=".//tr")
table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'
with open(file_name, mode='a', newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow([table_name])
writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
for tr in tablerows:
cells = tr.find_elements(by=By.XPATH, value=".//td")
row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
writer.writerow(row_data)
driver.quit()
def get_cell_in_data_table_save(url, file_name):
driver = webdriver.Chrome()
driver.get(url)
model_name = url.split('/')[-1]
with open(file_name, "a", newline="", encoding="utf-8") as csvfile:
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
writer = csv.writer(csvfile)
writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
data_tables = driver.find_elements(by=By.CLASS_NAME, value="data-table")
for idx, data_table in enumerate(data_tables):
try:
category = data_table.find_element(by=By.XPATH, value=".//h2")
category_text = category.get_attribute("textContent").strip()
tables = data_table.find_elements(by=By.TAG_NAME, value="table")
for table in tables:
rows = table.find_elements(by=By.XPATH, value=".//tr")
for tr in rows:
cells = tr.find_elements(by=By.XPATH, value=".//td")
if len(cells) == 2:
attribute = cells[0]
attribute_text = attribute.get_attribute("textContent").strip()
value = cells[1]
value_text = value.get_attribute("textContent").strip()
writer.writerow([model_name, category_text, attribute_text, value_text])
except:
print("There aren't category")
driver.quit()
#%%
href_list = read_href('..\output\href.txt')
# %%
charging_file = '..\output\charging_table.csv'
information_file = '..\output\information.csv'
# %%
for url in tqdm(href_list[20:25]):
# nach dem Meeting laufen lassen
for url in tqdm(href_list[25:26]):
get_all_information(url, charging_file, information_file)