https://render-html.com/render
. It expects request parameters
in the query string and some of the request headers are forwarded to the scraped URL. An endpoint
to test the integration is https://render-html.com/test
.
RENDER_HTML_KEY
be set.#!/usr/bin/env sh url=https://render-html.com/test?redirect options="\ &render_mode=screenshot\ &format=content\ &viewport_width=800\ &viewport_height=800\ " api_url="https://render-html.com/render?$options&url_raw=$url" curl --fail-with-body --no-progress-meter $api_url \ -H "authorization:Bearer $RENDER_HTML_API_KEY" \ > curl_output || { cat curl_output; exit;} xdg-open curl_output
#!/usr/bin/env python import re import os import scrapy from dataclasses import dataclass from scrapy.crawler import CrawlerProcess @dataclass class RenderHtmlResponse: status: bytes url: bytes original_url: None|bytes = None content_type: None|bytes = None raw_data: None|bytes = None browser_html: None|bytes = None screenshot: None|bytes = None @staticmethod def deserialize(body: bytes): return RenderHtmlResponse(**{ key.decode(): value.replace(b'\n ', b'\n') for key, value in re.findall(rb'(.*?)(?:=|\n )([\s\S]*?)(?:\n(?! )|$)', body) }) class RenderHtmlMiddleware: API_BASE_URL = 'https://render-html.com/render?' def process_request(self, request, spider): options = request.meta.get('render_html_options', '') if options is False: return if request.url.startswith(self.API_BASE_URL): return api_key = spider.settings['RENDER_HTML_API_KEY'] return request.replace( url=self.API_BASE_URL + f'{options}&url_raw={request.url}', headers={**request.headers, 'authorization': b'Bearer ' + api_key} ) def process_response(self, response, request, spider): if not request.url.startswith(self.API_BASE_URL): return if response.status != 200: raise Exception(response.body) rhr = RenderHtmlResponse.deserialize(response.body) response.headers.update({'content-type': rhr.content_type}) response = response.replace( url=rhr.url.decode(), body=rhr.raw_data, status=rhr.status, request=request.replace(url=rhr.original_url.decode()) ) response.render_html_response = rhr return response class ExampleSpider(scrapy.Spider): name = 'ExamplesSpider' custom_settings = { 'DOWNLOADER_MIDDLEWARES': {RenderHtmlMiddleware: 0} } def start_requests(self): yield scrapy.Request('https://render-html.com/test?redirect') def parse(self, response): print(response.request) print(response) print(response.headers) print(response.body.decode()) def main(): render_html_api_key=os.environ['RENDER_HTML_API_KEY'].encode() process = CrawlerProcess({ 'LOG_LEVEL': 'ERROR', 'RENDER_HTML_API_KEY': render_html_api_key }) process.crawl(ExampleSpider) process.start() if __name__ == '__main__': main()
#!/usr/bin/env php <?php declare(strict_types=1); class RenderHtmlException extends Exception {} class RenderHtmlResponse { function __construct( public ?string $original_url = null, public ?string $url = null, public ?string $status = null, public ?string $content_type = null, public ?string $raw_data = null, public ?string $browser_html = null, public ?string $screenshot = null, ...$args ) {} static function deserialize(string $body): RenderHtmlResponse { preg_match_all('/(.*?)(?:=|\n )([\s\S]*?)(?:\n(?! )|$)/', $body, $matches); return new RenderHtmlResponse(...array_combine( $matches[1], array_map(fn ($x) => str_replace("\n ", "\n", $x), $matches[2]) )); } } function make_render_html_request(string $api_key, string $url, string $options=''): RenderHtmlResponse { $url = "https://render-html.com/render?{$options}&url_raw=$url"; $curl = curl_init(); try { curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_HTTPHEADER, ["authorization:Bearer $api_key", 'accept:']); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); $body = curl_exec($curl); if ($body === false) { throw new Exception(curl_error($curl)); } $status = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($status !== 200) { throw new RenderHtmlException($body); } return RenderHtmlResponse::deserialize($body); } finally { curl_close($curl); } } function main() { $render_html_api_key = getenv('RENDER_HTML_API_KEY'); try { $response = make_render_html_request( $render_html_api_key, 'http://example.org', 'render_mode=raw_data,screenshot&viewport_width=200&viewport_height=800' ); } catch (RenderHtmlException $e) { printf("Failed to fetch the response: " . $e->getMessage() . "\n"); return; } foreach ($response as $key => $value) { $value = $value ?? '(null)'; print("\e[1m$key:\e[0m\n$value\n\n"); } } main();
#!/usr/bin/env node const crawlee = require('crawlee'); class RenderHtmlCrawler extends crawlee.BasicCrawler { constructor({render_html_api_key, requestHandler}) { super({requestHandler}); this.render_html_api_key = render_html_api_key; } async _runRequestHandler(ctx) { ctx.request.headers['user-agent'] = 'Crawlee'; ctx.request.url = `https://render-html.com/render?` + `render_mode=screenshot&viewport_width=500&url_raw=${ctx.request.url}`; ctx.request.headers['authorization'] = 'Bearer ' + this.render_html_api_key; await super._runRequestHandler(ctx); } } async function requestHandler({request, sendRequest}) { const response = await sendRequest(); if (response.statusCode !== 200) { throw response.body; } let data = Object.fromEntries( response.rawBody.toString('binary').matchAll(/(.*?)(?:=|\n )([\s\S]*?)(?:\n(?! )|$)/g) .map(x => [x[1], x[2].replaceAll('\n ', '\n')]) ); const fs = require('node:fs'); fs.writeFileSync('screenshot.png', Buffer.from(data['screenshot'], 'binary')); } function main() { const crawler = new RenderHtmlCrawler({ render_html_api_key: process.env.RENDER_HTML_API_KEY, requestHandler }); crawler.run([ new Request('https://render-html.com/test?redirect') ]); } main();
url_raw
option, which if used must come last and is not encoded.user-agent
, referer
,
cookie
, authorization
, proxy-authorization
.render_mode:
Comma-separated list of some of the values raw_data
, browser_html
,
screenshot
. The default value is raw_data
and the other modes are only
supported for the GET request method. Our unblocking capabilities are the same
for all render modes.
The loading of third-party iframes is disabled except for captcha solving purposes.
url
or url_raw
:
The target URL. url_raw
expects no additional percent-encoding, so it
can be conveniently copied directly from the browser’s address bar, and it must be the last
parameter, because everything that follows is treated as part of the target URL.
viewport_width:
The width of the screenshot in pixel. Mandatory when using the screenshot render mode.
viewport_height:
The height of the screenshot in pixel. When omitted, it will make a full-page screenshot, which
means to use the height of the scrollable area.
format:
Can be key_value
(default) or content
.
With content
one obtains the data of a single render mode in the
response body.
With key_value
, the content is a repetition of the sequence
key=value
where newlines in the value are escaped by inserting a space after them.
The values for the keys raw_data
and screenshot
are in general binary,
whereas the value for browser_html
always has the UTF-8 encoding.
raw_data
: 2.00 € per 1000
successful requests.browser_html
or screenshot
or multiple render
modes combined: 6.00 € per 1000 successful requests.
contact@render-html.com
.