- import time, traceback, sys
- import requests
- import speech_recognition as sr
- from random import uniform
- from moviepy.editor import AudioFileClip
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- """
- You have to have this dependencies installed:
- /* pip install speech_recognition, moviepy, undetected-chromedriver */ (undetected-chrodriver is a selenium-based framework with few anti-detect features)
- """
- def solve_via_audio(self, wait: WebDriverWait):
- # self and wait are Selenium objects that are supposed to be created before the function
- # 1. GETTING RECAPTCHA IFRAME IN PAGE's HTML
- RECAPTCHA_MAIN_FRAME = wait.until(
- EC.presence_of_element_located((By.ID, "recaptcha-iframe"))
- )
- self.driver.switch_to.frame(RECAPTCHA_MAIN_FRAME)
- print("RECAPTCHA_MAIN_FRAME OK")
- # 2. GETTING RECAPTCHA CHILD IFRAME IN PAGE's HTML (it is kinda wrapped in multiple layers)
- RECAPTCHA_CHILD_FRAME = wait.until(
- EC.presence_of_element_located((By.CSS_SELECTOR, 'iframe[title="reCAPTCHA"]'))
- )
- self.driver.switch_to.frame(RECAPTCHA_CHILD_FRAME)
- print("RECAPTCHA_CHILD_FRAME OK")
- # 3. CLICK BUTTON THAT WE WANT TO SOLVE AUDIO CAPTCHA INSTEAD A PUZZLE ONE
- recaptcha_anchor = wait.until(
- EC.presence_of_element_located((By.ID, "recaptcha-anchor"))
- )
- recaptcha_anchor.click()
- print("recaptcha_anchor OK")
- time.sleep(uniform(0.231, 0.836))
- # 4. THEN WE HAVE TO SWTICH BACK TO ROOT CAPTCHA IFRAME
- self.driver.switch_to.default_content()
- self.driver.switch_to.frame(RECAPTCHA_MAIN_FRAME)
- # 5. SWITCH TO FRAME WHERE THAT NEW APPEARED AUDIO CAPTCHA LOCATED
- RECAPTCHA_CHILD_2_FRAME = wait.until(
- EC.presence_of_element_located((By.CSS_SELECTOR, 'iframe[src*="https://www.google.com/recaptcha/api2/bframe"]'))
- )
- self.driver.switch_to.frame(RECAPTCHA_CHILD_2_FRAME)
- print("RECAPTCHA_CHILD_2_FRAME OK")
- # 6. CLICK AUDIO CAPTCHA BUTTON
- audio_button = wait.until(
- EC.presence_of_element_located((By.ID, "recaptcha-audio-button"))
- )
- audio_button.click()
- time.sleep(uniform(0.231, 0.836))
- # 7. EXTRACT A LINK WITH ORIGINAL CAPTCHA AUDIO FILE
- download_link = wait.until(
- EC.presence_of_element_located((By.CSS_SELECTOR, 'a.rc-audiochallenge-tdownload-link'))
- )
- audio_url = download_link.get_attribute('href')
- headers = {
- "user-agent": self.ua
- }
- # 7. DOWNLOAD THAT FKNG AUDIO
- response = requests.get(audio_url, headers=headers)
- if response.status_code == 200:
- AUDIO_FILE = "audio.mp3"
- AUDIO_FILE_WAV = "audio.wav"
- # 8. SAVE AS MP3
- with open(AUDIO_FILE, "wb") as f:
- f.write(response.content)
- # 9. CONVERT MP3 to WAV
- with AudioFileClip(AUDIO_FILE, fps=44100) as snd: # from a numeric array
- snd.write_audiofile(AUDIO_FILE_WAV)
- recognizer = sr.Recognizer()
- try:
- with sr.AudioFile(AUDIO_FILE_WAV) as source:
- audio_data = recognizer.record(source)
- # 10. RECOGNIZE TEXT
- text = recognizer.recognize_google(audio_data)
- print('Transcribed text: ', text)
- except Exception as e:
- traceback.print_exc()
- print(e)
- sys.exit()
- else:
- print('Failed to download the audio file.')
- # 11. NOW PUT THE TEXT IN THE CAPTCHA FORM
- input_text = self.driver.find_element(By.ID, "audio-response")
- input_text.send_keys(text)
- time.sleep(uniform(0.231, 0.836))
- # 12. CLICK VERIFY BUTTON
- verify_button = self.driver.find_element(By.ID, "recaptcha-verify-button")
- verify_button.click()
- time.sleep(uniform(0.231, 0.836))