From fbee7a1136652fab4c15aa6f94799b6d3f778ef2 Mon Sep 17 00:00:00 2001 From: jcheng <jiaying.cheng@iee.fraunhofer.de> Date: Mon, 6 May 2024 10:39:30 +0200 Subject: [PATCH] add changes --- basic/utils.py | 57 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/basic/utils.py b/basic/utils.py index 2fd8596..464b996 100644 --- a/basic/utils.py +++ b/basic/utils.py @@ -1,6 +1,8 @@ +#%% from selenium import webdriver from selenium.webdriver.common.by import By import csv +from tqdm import tqdm ################# Get Hyperlink ############################ @@ -37,12 +39,12 @@ def write_href(file_name): def read_href(file_name): with open(file_name) as f: - hrefs = f.read.splitlines() + hrefs = [line.strip() for line in f] return hrefs ################# Get Cell Infomation in Table ############################ -def get_cell_in_charging_table(url, table_class, file_name): +def get_cell_in_charging_tables(url, file_name): """ Get the cell contect in the table 'Home and Destination Charging' and 'Fast Charging' @@ -51,35 +53,33 @@ def get_cell_in_charging_table(url, table_class, file_name): driver = webdriver.Chrome() driver.get(url) model_name = url.split('/')[-1] - - table = driver.find_element(by=By.CLASS_NAME, value=table_class) - - tablerows = table.find_elements(by=By.XPATH, value=".//tr") - table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging' - - with open(file_name, mode='a', newline="", encoding="utf-8") as file: - writer = csv.writer(file) - writer.writerow([table_class]) - writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate']) - - for tr in tablerows: - cells = tr.find_elements(by=By.XPATH, value=".//td") - row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells] - writer.writerow(row_data) - driver.quit() - -def get_cell_in_charging_tables(url): table_classes = ['charging-table-standard', 'charging-table-fast'] for table_class in table_classes: - get_cell_in_charging_table(url, table_class) + + table = driver.find_element(by=By.CLASS_NAME, value=table_class) + + tablerows = table.find_elements(by=By.XPATH, value=".//tr") + table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging' + + with open(file_name, mode='a', newline="", encoding="utf-8") as file: + writer = csv.writer(file) + + writer.writerow([table_name]) + writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate']) + + for tr in tablerows: + cells = tr.find_elements(by=By.XPATH, value=".//td") + row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells] + writer.writerow(row_data) + driver.quit() def get_cell_in_data_table_save(url, file_name): driver = webdriver.Chrome() driver.get(url) model_name = url.split('/')[-1] - with open(file_name, "w", newline="", encoding="utf-8") as csvfile: + with open(file_name, "a", newline="", encoding="utf-8") as csvfile: writer = csv.writer(csvfile) writer.writerow(['Model_name''Category', 'Attribute', 'Value']) @@ -106,3 +106,16 @@ def get_cell_in_data_table_save(url, file_name): print("There aren't category") driver.quit() + +#%% +href_list = read_href('..\output\href.txt') + +# %% +charging_file = '..\output\charging_table.csv' +for url in tqdm(href_list[20:30]): + get_cell_in_charging_tables(url, charging_file) +# %% +information_file = '..\output\information.csv' +for url in tqdm(href_list[0:10]): + get_cell_in_data_table_save(url, information_file) +# %% -- GitLab