使用Python & Selenium 录屏网页 – 梁工微型SaaS

1. 使用Selenium进行网站截图，使用save_screenshot方法。

from selenium import webdriver

driver.save_screenshot("screenshot.png")

# from selenium import webdriver driver.save_screenshot("screenshot.png") #

#

from selenium import webdriver

  driver.save_screenshot("screenshot.png")
#

完整代码，参考：https://github.com/hassancs91/WebsiteScreenshotPython/blob/main/captureWebPage.py

#start

from optparse import OptParseError

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

import time

start_time = time.time()

options = Options()

options.add_argument('--headless')

driver = webdriver.Chrome(

"E:\TechVideos\Python\chromedriver.exe" , chrome_options=options)

url = "https://learnwithhasan.com"

#Now for opening the URL

driver.get(url)

#After Opening we’ll take the screenshot with the following method.

#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.

driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png

#Then we want to calculate the time elapsed for the overall process of taking the screenshot.

elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time

#Now let’s print the elapsed time on the screen

print("Done in" + elapsed)

#end

#start from optparse import OptParseError from selenium import webdriver from selenium.webdriver.chrome.options import Options import time start_time = time.time() options = Options() options.add_argument('--headless') driver = webdriver.Chrome( "E:\TechVideos\Python\chromedriver.exe" , chrome_options=options) url = "https://learnwithhasan.com" #Now for opening the URL driver.get(url) #After Opening we’ll take the screenshot with the following method. #Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method. driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png #Then we want to calculate the time elapsed for the overall process of taking the screenshot. elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time #Now let’s print the elapsed time on the screen print("Done in" + elapsed) #end

#start
from optparse import OptParseError
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
 
start_time = time.time()
 
options = Options()
options.add_argument('--headless')
 
driver = webdriver.Chrome(
    "E:\TechVideos\Python\chromedriver.exe" , chrome_options=options)
 
url = "https://learnwithhasan.com"
 
#Now for opening the URL
 
driver.get(url)
 
#After Opening we’ll take the screenshot with the following method.
 
#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.
 
 
driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png
 
#Then we want to calculate the time elapsed for the overall process of taking the screenshot.
 
elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time
 
#Now let’s print the elapsed time on the screen
 
print("Done in" + elapsed)
 
#end

2. 浏览器模拟访问网页会遇到屏蔽和验证：

解决：加上 user_agent() 和 refer 可以避开很多验证

options.add_argument(

'user-agent-Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'

)

# options.add_argument( 'user-agent-Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' ) #

#
options.add_argument(
    'user-agent-Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
)
#

加上refer避开验证：

以下是修改后的Python代码，添加了 referer 避开验证的功能：

```python

from optparse import OptParseError

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.common.by import By

from selenium.webdriver.chrome.service import Service

import time

start_time = time.time()

options = Options()

options.add_argument('--headless')

#Setting referer to avoid validation

options.add_argument("referer=https://www.google.com/")

driver = webdriver.Chrome(service=Service("E:\TechVideos\Python\chromedriver.exe"), options=options)

url = "https://learnwithhasan.com"

#Now for opening the URL

driver.get(url)

#After Opening we’ll take the screenshot with the following method.

#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.

driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png

#Then we want to calculate the time elapsed for the overall process of taking the screenshot.

elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time

#Now let’s print the elapsed time on the screen

print("Done in" + elapsed)

```

此代码在使用 Selenium 进行网页截图时，添加了 referer 头信息来避开网站验证。您可以部署这段代码并执行，以获取带有 referer 的网站截图并打印出整个过程所花费的时间。

以下是修改后的Python代码，添加了 referer 避开验证的功能： ```python from optparse import OptParseError from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service import time start_time = time.time() options = Options() options.add_argument('--headless') #Setting referer to avoid validation options.add_argument("referer=https://www.google.com/") driver = webdriver.Chrome(service=Service("E:\TechVideos\Python\chromedriver.exe"), options=options) url = "https://learnwithhasan.com" #Now for opening the URL driver.get(url) #After Opening we’ll take the screenshot with the following method. #Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method. driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png #Then we want to calculate the time elapsed for the overall process of taking the screenshot. elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time #Now let’s print the elapsed time on the screen print("Done in" + elapsed) ``` 此代码在使用 Selenium 进行网页截图时，添加了 referer 头信息来避开网站验证。您可以部署这段代码并执行，以获取带有 referer 的网站截图并打印出整个过程所花费的时间。

以下是修改后的Python代码，添加了 referer 避开验证的功能：

```python
from optparse import OptParseError
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import time

start_time = time.time()

options = Options()
options.add_argument('--headless')

#Setting referer to avoid validation
options.add_argument("referer=https://www.google.com/")

driver = webdriver.Chrome(service=Service("E:\TechVideos\Python\chromedriver.exe"), options=options)

url = "https://learnwithhasan.com"

#Now for opening the URL

driver.get(url)

#After Opening we’ll take the screenshot with the following method.

#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.


driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png

#Then we want to calculate the time elapsed for the overall process of taking the screenshot.

elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time

#Now let’s print the elapsed time on the screen

print("Done in" + elapsed)
```

此代码在使用 Selenium 进行网页截图时，添加了 referer 头信息来避开网站验证。您可以部署这段代码并执行，以获取带有 referer 的网站截图并打印出整个过程所花费的时间。

3, 更强大的避开反爬虫方法： Selenium Stealth

Selenium Stealth 是一个 Python 库，可以帮助用户在使用 Selenium 自动化测试时避开网站的反爬虫策略。该库的主要目的是使 Selenium 操作尽可能接近人类用户的行为，从而降低被检测出的风险。Selenium Stealth 提供了一系列功能，包括更改用户代理、模拟人类的鼠标移动轨迹、模拟键盘输入、在请求中添加 referrer 等功能，这些功能可以模拟真实用户的行为，并增加通过自动化脚本访问网站的成功率。

使用 Selenium Stealth 可以帮助用户避免被网站检测出是自动化操作，并提高自动化测试的成功率。通过模拟人类用户的操作行为，可以降低被封禁或限制访问的风险，有效地完成自动化测试任务。要使用 Selenium Stealth，用户可以将其集成到他们的 Selenium 脚本中，并根据需要配置各种选项来模拟更真实的用户行为。

具体代码示例如下：

#start

from optparse import OptParseError

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

import time

from selenium_stealth import stealth

start_time = time.time()

options = webdriver.ChromeOptions()

options.add_argument("start-maximized")

options.add_argument("--headless")

options.add_experimental_option("excludeSwitches", ["enable-automation"])

options.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome(

options=options, executable_path="E:\TechVideos\Python\chromedriver.exe")

stealth(driver,

languages=["en-US", "en"],

vendor="Google Inc.",

platform="Win32",

webgl_vendor="Intel Inc.",

renderer="Intel Iris OpenGL Engine",

fix_hairline=True,

)

url = "https://www.neilpatel.com"

#Now for opening the URL

driver.get(url)

#After Opening we’ll take the screenshot with the following method.

#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.

driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png

#Then we want to calculate the time elapsed for the overall process of taking the screenshot.

elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time

#Now let’s print the elapsed time on the screen

print("Done in" + elapsed)

#end

#start from optparse import OptParseError from selenium import webdriver from selenium.webdriver.chrome.options import Options import time from selenium_stealth import stealth start_time = time.time() options = webdriver.ChromeOptions() options.add_argument("start-maximized") options.add_argument("--headless") options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option('useAutomationExtension', False) driver = webdriver.Chrome( options=options, executable_path="E:\TechVideos\Python\chromedriver.exe") stealth(driver, languages=["en-US", "en"], vendor="Google Inc.", platform="Win32", webgl_vendor="Intel Inc.", renderer="Intel Iris OpenGL Engine", fix_hairline=True, ) url = "https://www.neilpatel.com" #Now for opening the URL driver.get(url) #After Opening we’ll take the screenshot with the following method. #Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method. driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png #Then we want to calculate the time elapsed for the overall process of taking the screenshot. elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time #Now let’s print the elapsed time on the screen print("Done in" + elapsed) #end

#start
from optparse import OptParseError
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from selenium_stealth import stealth
 
start_time = time.time()
 
 
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(
    options=options, executable_path="E:\TechVideos\Python\chromedriver.exe")
 
stealth(driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )
 
 
url = "https://www.neilpatel.com"
 
#Now for opening the URL
 
driver.get(url)
 
#After Opening we’ll take the screenshot with the following method.
 
#Remember to pass your name, with which you want your screenshot to be saved, in the arguments of the following method.
 
 
driver.save_screenshot("test.png") # i want my screenshot to be saved as test.png
 
#Then we want to calculate the time elapsed for the overall process of taking the screenshot.
 
elapsed = "%s seconds" % (time.time() - start_time) # the time now minus the start time
 
#Now let’s print the elapsed time on the screen
 
print("Done in" + elapsed)
 
#end