#!/bin/python3
import requests
from bs4 import BeautifulSoup
#from lxml import html
MAIN_URL = '
https://www.almacom.kz'
URL = '
https://www.almacom.kz/products/196/'
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:45.0) Gecko/20100101 Firefox/45.0'
}
#Типы кондеров по их назначению
destination = []
#Типы кондеров по брендам
brands = []
def get_html(url, params = None):
r = requests.get(url, headers = HEADERS, params = params)
return r
def get_content(html):
#Получаем список кондиционеров по их назначению(полупромышленные, бытовые, etc... )
soup_destination = BeautifulSoup(html, 'html.parser')
Items_Destination = soup_destination.findAll('div', class_='category-item')
# destination = []
count_destination = -1
for item_destination in Items_Destination:
count_destination = count_destination +1
#Составляем список кондеров по их назначению
destination.append({
'count': count_destination,
'category': item_destination.find('a', title=True).get_text().strip('\n'),
'link' : MAIN_URL + item_destination.find('a', href=True).get('href'),
'image_link' : MAIN_URL + item_destination.find('img', src=True).get('src')
})
print(destination)
def get_brands():
#Определяем размер списка кондеров по назначению
len_destination = len(destination)
print('Размер списка ', len_destination)
for count_destination in range(len_destination):
#Получили элемент списка, ищем дочерние категории по брендам и собираем.
print(destination[count_destination]['link'])
#Скачиваем страницу
html_brands = get_html(destination[count_destination]['link'])
#print('html_brands_response', html_brands)
soup_brands = BeautifulSoup(html_brands, 'html.parser')
#print('soup brands', soup_brands)
Items_Brands = soup_brands.findAll('div', class_='category-item')
#brands = []
count_brands = -1
for item_brand in Items_Brands:
count_brands = count_brands + 1
brands({
'count': count_brands,
'brand': item_brand.find('a', title=True).get_text().strip('\n'),
'link' : MAIN_URL + item_brand.find('a', href=True).get('href'),
'image_link' : MAIN_URL + item_brand.find('img', src=True).get('src')
})
print(brands)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
get_brands()
else:
print('ERROR. Status code not equal 200')
parse()