0

Using beautifulsoup and selenium in python, I am trying to scroll down a list of songs in a playlist to parse the song names. The code however will not get past the first 30 songs and scroll down further. Why is that?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from bs4 import BeautifulSoup

# Initialize the WebDriver
driver = webdriver.Chrome()

# Open the website
driver.get("SPOTIFY PLAYLIST URL")

# Scroll to the bottom of the page using PAGE_DOWN key
body = driver.find_element(By.TAG_NAME, "body")
for _ in range(30):  # Adjust the range as needed
    body.send_keys(Keys.PAGE_DOWN)
    time.sleep(1)  # Adjust the sleep time as needed

# Parse the page source with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Find song elements
songs = soup.find_all("div", class_="btE2c3IKaOXZ4VNAb8WQ")

# Check if songs are found
if not songs:
    print("No songs found. Please check the class name or the page structure.")
else:
    print(f"Found {len(songs)} songs.")

# Write to CSV
with open('songs.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['SONGS'])
    for song in songs:
        song_text = song.get_text(strip=True)
        print(song_text)
        writer.writerow([song_text])

# Close the WebDriver
driver.quit()

1 Answer 1

0

You are sending the page down action to the wrong part of the web page. In the playlist, you have to scroll down the playlist to load all the songs in the grid.

instead of using

body = driver.find_element(By.TAG_NAME, "body")

try this

playlist = driver.find_element(By.CSS_SELECTOR, "div[data-testid='playlist-tracklist']")

So your code becomes

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from bs4 import BeautifulSoup

# Initialize the WebDriver
driver = webdriver.Chrome()

# Open the website
driver.get("SPOTIFY PLAYLIST URL")

# Scroll to the bottom of the page using PAGE_DOWN key
playlist = driver.find_element(By.CSS_SELECTOR, "div[data-testid='playlist-tracklist']")
for _ in range(30):  # Adjust the range as needed
    playlist.send_keys(Keys.PAGE_DOWN)
    time.sleep(1)  # Adjust the sleep time as needed

# Parse the page source with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Find song elements
songs = soup.find_all("div", class_="btE2c3IKaOXZ4VNAb8WQ")

# Check if songs are found
if not songs:
    print("No songs found. Please check the class name or the page structure.")
else:
    print(f"Found {len(songs)} songs.")

# Write to CSV
with open('songs.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['SONGS'])
    for song in songs:
        song_text = song.get_text(strip=True)
        print(song_text)
        writer.writerow([song_text])

# Close the WebDriver
driver.quit()
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.