I'm trying to use this code to collect reviews from the Consumer Affairs review site. But I kept getting errors, specifically in the dateElements & jsonData section. Could someone help me fix this code to be compatible with the site I'm going to web scrape?
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
print ('all imported successfuly')
# Initialize an empty dataframe
df = pd.DataFrame()
for x in range(1, 5):
names = []
headers = []
bodies = []
ratings = []
published = []
updated = []
reported = []
link = (f'https://www.consumeraffairs.com/online/allure-beauty-box.html?page={x}')
print (link)
req = requests.get(link)
content = req.content
soup = BeautifulSoup(content, "lxml")
articles = soup.find_all('div', {'class':'rvw js-rvw'})
for article in articles:
names.append(article.find('strong', attrs={'class': 'rvw-aut__inf-nm'}).text.strip())
try:
bodies.append(article.find('p', attrs={'class':'rvw-bd'}).text.strip())
except:
bodies.append('')
try:
ratings.append(article.find('div', attrs={'class':'stars-rtg stars-rtg--sm'}).text.strip())
except:
ratings.append('')
dateElements = article.find('span', attrs={'class':'ca-txt-cpt'}).text.strip()
jsonData = json.loads(dateElements)
published.append(jsonData['publishedDate'])
updated.append(jsonData['updatedDate'])
reported.append(jsonData['reportedDate'])
# Create your temporary dataframe of the first iteration, then append that into your "final" dataframe
temp_df = pd.DataFrame({'User Name': names, 'Body': bodies, 'Rating': ratings, 'Published Date': published, 'Updated Date':updated, 'Reported Date':reported})
df = df.append(temp_df, sort=False).reset_index(drop=True)
print ('pass1')
df.to_csv('AllureReviews.csv', index=False, encoding='utf-8')
print ('excel done')
This is the error I'm getting
Traceback (most recent call last): File "C:/Users/Sara Jitkresorn/PycharmProjects/untitled/venv/Caffairs.py", line 37, in jsonData = json.loads(dateElements) File "C:\Users\Sara Jitkresorn\AppData\Local\Programs\Python\Python37\lib\json__init__.py", line 348, in loads return _default_decoder.decode(s) File "C:\Users\Sara Jitkresorn\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "C:\Users\Sara Jitkresorn\AppData\Local\Programs\Python\Python37\lib\json\decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)