Skip to content
Snippets Groups Projects
utils.py 4.16 KiB
Newer Older
Jiaying Cheng's avatar
Jiaying Cheng committed
from selenium import webdriver
from selenium.webdriver.common.by import By
Jiaying Cheng's avatar
Jiaying Cheng committed
import csv
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
################# Get Hyperlink ############################
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
def get_href(url, href_list):
    """
    url: The url that you want to get all the possible hyperlinks
    href_list: The list where you want to save all the hyperlink
    """
Jiaying Cheng's avatar
Jiaying Cheng committed
    driver = webdriver.Chrome()

    driver.get(url)

    item_data_div_list = driver.find_elements(By.CSS_SELECTOR, '.list-item .item-data')
    href_list = []
    for item_data_div in item_data_div_list:
        hyperlink = item_data_div.find_element(By.CSS_SELECTOR, 'a.title')
        href = hyperlink.get_attribute('href')
        href_list.append(href)
    
    driver.quit()
Jiaying Cheng's avatar
Jiaying Cheng committed

Jiaying Cheng's avatar
Jiaying Cheng committed
def write_href(file_name):
    """
    Save all the hyperlink of cars in a file.
    """
    href_list = []
    for page_number in range(36):
        url_pro_page = f'https://ev-database.org/#sort:path~type~order=.rank~number~desc|rs-price:prev~next=10000~100000|rs-range:prev~next=0~1000|rs-fastcharge:prev~next=0~1500|rs-acceleration:prev~next=2~23|rs-topspeed:prev~next=110~350|rs-battery:prev~next=10~200|rs-towweight:prev~next=0~2500|rs-eff:prev~next=100~350|rs-safety:prev~next=-1~5|paging:currentPage={page_number}|paging:number=10'
        get_href(url_pro_page, href_list=href_list)

    with open(file_name, mode="w+") as f:
        for href in href_list:
            f.write("%s\n" % href)

def read_href(file_name):
    with open(file_name) as f:
        hrefs = f.read.splitlines()
    return hrefs

################# Get Cell Infomation in Table ############################

def get_cell_in_charging_table(url, table_class, file_name):
Jiaying Cheng's avatar
Jiaying Cheng committed
    """
    Get the cell contect in the table 'Home and Destination Charging' and 
    'Fast Charging'
    """

    driver = webdriver.Chrome()
    driver.get(url)
    model_name = url.split('/')[-1]
    
    table = driver.find_element(by=By.CLASS_NAME, value=table_class)
    
    tablerows = table.find_elements(by=By.XPATH, value=".//tr")
Jiaying Cheng's avatar
Jiaying Cheng committed
    table_name = 'Home and Destination' if table_class == 'charging-table-standard' else 'Fast Charging'

    with open(file_name, mode='a', newline="", encoding="utf-8") as file:
Jiaying Cheng's avatar
Jiaying Cheng committed
        writer = csv.writer(file)

        writer.writerow([table_class])
Jiaying Cheng's avatar
Jiaying Cheng committed
        writer.writerow(['Model_name','Charging Point', 'Max. Power', 'Power', 'Time Rate'])
Jiaying Cheng's avatar
Jiaying Cheng committed
    
        for tr in tablerows:
            cells = tr.find_elements(by=By.XPATH, value=".//td")
Jiaying Cheng's avatar
Jiaying Cheng committed
            row_data = [model_name] + [cell.get_attribute("textContent").strip() for cell in cells]
Jiaying Cheng's avatar
Jiaying Cheng committed
            writer.writerow(row_data)
Jiaying Cheng's avatar
Jiaying Cheng committed
    driver.quit()

def get_cell_in_charging_tables(url):
    table_classes = ['charging-table-standard', 'charging-table-fast']
    for table_class in table_classes:
        get_cell_in_charging_table(url, table_class)

def get_cell_in_data_table_save(url, file_name):
    driver = webdriver.Chrome()
    driver.get(url)
    model_name = url.split('/')[-1]

    with open(file_name, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)

        writer.writerow(['Model_name''Category', 'Attribute', 'Value'])
        data_tables = driver.find_elements(by=By.CLASS_NAME, value="data-table")

        for idx, data_table in enumerate(data_tables):
            try:
                category = data_table.find_element(by=By.XPATH, value=".//h2")
                category_text = category.get_attribute("textContent").strip()

                tables = data_table.find_elements(by=By.TAG_NAME, value="table")

                for table in tables:
                    rows = table.find_elements(by=By.XPATH, value=".//tr")
                    for tr in rows:
                        cells = tr.find_elements(by=By.XPATH, value=".//td")
                        if len(cells) == 2:
                            attribute = cells[0]
                            attribute_text = attribute.get_attribute("textContent").strip()
                            value = cells[1]
                            value_text = value.get_attribute("textContent").strip()
                            writer.writerow([model_name, category_text, attribute_text, value_text])
            except:
                print("There aren't category")
            
    driver.quit()