Parsing JS dynamicpage with PyQt5


I am trying to parse the js dynamic page with PyQt5. After execution I get a following error: Process finished with exit code -1073741819 (0xC0000005). Code is here:

import sys
import requests
from bs4 import BeautifulSoup
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
import pandas as pd

class Page(QWebEnginePage):
    def __init__(self, url): = QApplication(sys.argv)
        self.html = ''

    def _on_load_finished(self):
        self.html = self.toHtml(self.Callable)
        print('Load finished')

    def Callable(self, html_str):
        self.html = html_str

url = ''

page = Page(url)
soup = BeautifulSoup(page.html, 'html.parser')
courses = soup.find_all('a', {'class': 'hidden-sm-down rh-cardsMatrix__courseTitle ui-link'})
for course in courses:
    CurrentCourse = course.text.strip()
    Courses_URL = url + course.get('href')
    response1 = requests.get(Courses_URL)
    soup1 = BeautifulSoup(response1.text, 'html.parser')
    horses = soup1.find_all('a', {'class': 'RC-runnerName'})
    for horse in horses:
        horses_url = url + horse.get('href')
        page1 = Page(horses_url)
        soup2 = BeautifulSoup(page1.html, 'html.parser')
        data = soup2.find_all('a', {'class': 'ui-link ui-link_table js-popupLink'})

could you please help me with how to rework, or tweak this code in order to get proper info I want.


0 Answers

Nobody has answered this question yet.

User contributions licensed under CC BY-SA 3.0