Skip to content
Snippets Groups Projects
utils.py 6.92 KiB
Newer Older
Jiaying Cheng's avatar
Jiaying Cheng committed
#%%
Jiaying Cheng's avatar
Jiaying Cheng committed
from selenium import webdriver
from selenium.webdriver.common.by import By
Jiaying Cheng's avatar
Jiaying Cheng committed
import csv
Jiaying Cheng's avatar
Jiaying Cheng committed
from tqdm import tqdm
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
################# Get Hyperlink ############################
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
def get_href(url, href_list):
    """
    url: The url that you want to get all the possible hyperlinks
    href_list: The list where you want to save all the hyperlink
    """
Jiaying Cheng's avatar
Jiaying Cheng committed
    driver = webdriver.Chrome()

    driver.get(url)

    item_data_div_list = driver.find_elements(By.CSS_SELECTOR, '.list-item .item-data')
    href_list = []
    for item_data_div in item_data_div_list:
        hyperlink = item_data_div.find_element(By.CSS_SELECTOR, 'a.title')
        href = hyperlink.get_attribute('href')
        href_list.append(href)
    
    driver.quit()
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
def write_href(file_name):
    """
    Save all the hyperlink of cars in a file.
    """
    href_list = []
    for page_number in range(36):
        url_pro_page = f'https://ev-database.org/#sort:path~type~order=.rank~number~desc|rs-price:prev~next=10000~100000|rs-range:prev~next=0~1000|rs-fastcharge:prev~next=0~1500|rs-acceleration:prev~next=2~23|rs-topspeed:prev~next=110~350|rs-battery:prev~next=10~200|rs-towweight:prev~next=0~2500|rs-eff:prev~next=100~350|rs-safety:prev~next=-1~5|paging:currentPage={page_number}|paging:number=10'
        get_href(url_pro_page, href_list=href_list)

    with open(file_name, mode="w+") as f:
        for href in href_list:
            f.write("%s\n" % href)

def read_href(file_name):
    with open(file_name) as f:
Jiaying Cheng's avatar
Jiaying Cheng committed
        hrefs = [line.strip() for line in f]
Jiaying Cheng's avatar
Jiaying Cheng committed
    return hrefs

################# Get Cell Infomation in Table ############################

Jiaying Cheng's avatar
Jiaying Cheng committed
def get_all_information(url, charging_file_name, information_file_name):
    driver = webdriver.Chrome()
    driver.get(url)
    model_name = url.split('/')[-1]

    table_classes = ['charging-table-standard', 'charging-table-fast']
    for table_class in table_classes:

        table = driver.find_element(by=By.CLASS_NAME, value=table_class)
        
        tablerows = table.find_elements(by=By.XPATH, value=".//tr")
        table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'

        with open(charging_file_name, mode='a', newline="", encoding="utf-8") as file:
            writer = csv.writer(file)

            writer.writerow([table_name])
            writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
        
            for tr in tablerows:
                cells = tr.find_elements(by=By.XPATH, value=".//td")
                row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
                writer.writerow(row_data)
    
    with open(information_file_name, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)

        writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
        data_tables = driver.find_elements(by=By.CLASS_NAME, value="data-table")

        for idx, data_table in enumerate(data_tables):
            try:
                category = data_table.find_element(by=By.XPATH, value=".//h2")
                category_text = category.get_attribute("textContent").strip()

                tables = data_table.find_elements(by=By.TAG_NAME, value="table")

                for table in tables:
                    rows = table.find_elements(by=By.XPATH, value=".//tr")
                    for tr in rows:
                        cells = tr.find_elements(by=By.XPATH, value=".//td")
                        if len(cells) == 2:
                            attribute = cells[0]
                            attribute_text = attribute.get_attribute("textContent").strip()
                            value = cells[1]
                            value_text = value.get_attribute("textContent").strip()
                            writer.writerow([model_name, category_text, attribute_text, value_text])
            except:
                print("There aren't category")

    driver.quit()

Jiaying Cheng's avatar
Jiaying Cheng committed
def get_cell_in_charging_tables(url, file_name):
Jiaying Cheng's avatar
Jiaying Cheng committed
    """
    Get the cell contect in the table 'Home and Destination Charging' and 
    'Fast Charging'
    """

    driver = webdriver.Chrome()
    driver.get(url)
    model_name = url.split('/')[-1]

Jiaying Cheng's avatar
Jiaying Cheng committed
    table_classes = ['charging-table-standard', 'charging-table-fast']
    for table_class in table_classes:
Jiaying Cheng's avatar
Jiaying Cheng committed

        table = driver.find_element(by=By.CLASS_NAME, value=table_class)
        
        tablerows = table.find_elements(by=By.XPATH, value=".//tr")
        table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'

        with open(file_name, mode='a', newline="", encoding="utf-8") as file:
            writer = csv.writer(file)

            writer.writerow([table_name])
            writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
        
            for tr in tablerows:
                cells = tr.find_elements(by=By.XPATH, value=".//td")
                row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
                writer.writerow(row_data)
    driver.quit()
Jiaying Cheng's avatar
Jiaying Cheng committed

def get_cell_in_data_table_save(url, file_name):
    driver = webdriver.Chrome()
    driver.get(url)
    model_name = url.split('/')[-1]

Jiaying Cheng's avatar
Jiaying Cheng committed
    with open(file_name, "a", newline="", encoding="utf-8") as csvfile:
Jiaying Cheng's avatar
Jiaying Cheng committed
        writer = csv.writer(csvfile)

        writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
        data_tables = driver.find_elements(by=By.CLASS_NAME, value="data-table")

        for idx, data_table in enumerate(data_tables):
            try:
                category = data_table.find_element(by=By.XPATH, value=".//h2")
                category_text = category.get_attribute("textContent").strip()

                tables = data_table.find_elements(by=By.TAG_NAME, value="table")

                for table in tables:
                    rows = table.find_elements(by=By.XPATH, value=".//tr")
                    for tr in rows:
                        cells = tr.find_elements(by=By.XPATH, value=".//td")
                        if len(cells) == 2:
                            attribute = cells[0]
                            attribute_text = attribute.get_attribute("textContent").strip()
                            value = cells[1]
                            value_text = value.get_attribute("textContent").strip()
                            writer.writerow([model_name, category_text, attribute_text, value_text])
            except:
                print("There aren't category")
            
    driver.quit()
Jiaying Cheng's avatar
Jiaying Cheng committed

#%%
href_list = read_href('..\output\href.txt')

# %%
charging_file = '..\output\charging_table.csv'
Jiaying Cheng's avatar
Jiaying Cheng committed
information_file = '..\output\information.csv'
# %%
for url in tqdm(href_list[20:25]):
Jiaying Cheng's avatar
Jiaying Cheng committed
    get_cell_in_charging_tables(url, charging_file)
# %%
Jiaying Cheng's avatar
Jiaying Cheng committed
for url in tqdm(href_list[20:25]):
Jiaying Cheng's avatar
Jiaying Cheng committed
    get_cell_in_data_table_save(url, information_file)
# %%
Jiaying Cheng's avatar
Jiaying Cheng committed
# nach dem Meeting laufen lassen
for url in tqdm(href_list[25:26]):
    get_all_information(url, charging_file, information_file)