From fbee7a1136652fab4c15aa6f94799b6d3f778ef2 Mon Sep 17 00:00:00 2001
From: jcheng <jiaying.cheng@iee.fraunhofer.de>
Date: Mon, 6 May 2024 10:39:30 +0200
Subject: [PATCH] add changes

---
 basic/utils.py | 57 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/basic/utils.py b/basic/utils.py
index 2fd8596..464b996 100644
--- a/basic/utils.py
+++ b/basic/utils.py
@@ -1,6 +1,8 @@
+#%%
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 import csv
+from tqdm import tqdm
 
 ################# Get Hyperlink ############################
 
@@ -37,12 +39,12 @@ def write_href(file_name):
 
 def read_href(file_name):
     with open(file_name) as f:
-        hrefs = f.read.splitlines()
+        hrefs = [line.strip() for line in f]
     return hrefs
 
 ################# Get Cell Infomation in Table ############################
 
-def get_cell_in_charging_table(url, table_class, file_name):
+def get_cell_in_charging_tables(url, file_name):
     """
     Get the cell contect in the table 'Home and Destination Charging' and 
     'Fast Charging'
@@ -51,35 +53,33 @@ def get_cell_in_charging_table(url, table_class, file_name):
     driver = webdriver.Chrome()
     driver.get(url)
     model_name = url.split('/')[-1]
-    
-    table = driver.find_element(by=By.CLASS_NAME, value=table_class)
-    
-    tablerows = table.find_elements(by=By.XPATH, value=".//tr")
-    table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'
-
-    with open(file_name, mode='a', newline="", encoding="utf-8") as file:
-        writer = csv.writer(file)
 
-        writer.writerow([table_class])
-        writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
-    
-        for tr in tablerows:
-            cells = tr.find_elements(by=By.XPATH, value=".//td")
-            row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
-            writer.writerow(row_data)
-    driver.quit()
-
-def get_cell_in_charging_tables(url):
     table_classes = ['charging-table-standard', 'charging-table-fast']
     for table_class in table_classes:
-        get_cell_in_charging_table(url, table_class)
+
+        table = driver.find_element(by=By.CLASS_NAME, value=table_class)
+        
+        tablerows = table.find_elements(by=By.XPATH, value=".//tr")
+        table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'
+
+        with open(file_name, mode='a', newline="", encoding="utf-8") as file:
+            writer = csv.writer(file)
+
+            writer.writerow([table_name])
+            writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
+        
+            for tr in tablerows:
+                cells = tr.find_elements(by=By.XPATH, value=".//td")
+                row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
+                writer.writerow(row_data)
+    driver.quit()
 
 def get_cell_in_data_table_save(url, file_name):
     driver = webdriver.Chrome()
     driver.get(url)
     model_name = url.split('/')[-1]
 
-    with open(file_name, "w", newline="", encoding="utf-8") as csvfile:
+    with open(file_name, "a", newline="", encoding="utf-8") as csvfile:
         writer = csv.writer(csvfile)
 
         writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
@@ -106,3 +106,16 @@ def get_cell_in_data_table_save(url, file_name):
                 print("There aren't category")
             
     driver.quit()
+
+#%%
+href_list = read_href('..\output\href.txt')
+
+# %%
+charging_file = '..\output\charging_table.csv'
+for url in tqdm(href_list[20:30]):
+    get_cell_in_charging_tables(url, charging_file)
+# %%
+information_file = '..\output\information.csv'
+for url in tqdm(href_list[0:10]):
+    get_cell_in_data_table_save(url, information_file)
+# %%
-- 
GitLab