Skip to content

Commit 453f13b

Browse files
anoadragon453phil-flex
authored andcommitted
Allow specifying the value of Accept-Language header for URL previews (matrix-org#7265)
1 parent 3801bbc commit 453f13b

File tree

5 files changed

+116
-2
lines changed

5 files changed

+116
-2
lines changed

changelog.d/7265.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a config option for specifying the value of the Accept-Language HTTP header when generating URL previews.

docs/sample_config.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store"
859859
#
860860
#max_spider_size: 10M
861861

862+
# A list of values for the Accept-Language HTTP header used when
863+
# downloading webpages during URL preview generation. This allows
864+
# Synapse to specify the preferred languages that URL previews should
865+
# be in when communicating with remote servers.
866+
#
867+
# Each value is a IETF language tag; a 2-3 letter identifier for a
868+
# language, optionally followed by subtags separated by '-', specifying
869+
# a country or region variant.
870+
#
871+
# Multiple values can be provided, and a weight can be added to each by
872+
# using quality value syntax (;q=). '*' translates to any language.
873+
#
874+
# Defaults to "en".
875+
#
876+
# Example:
877+
#
878+
# url_preview_accept_language:
879+
# - en-UK
880+
# - en-US;q=0.9
881+
# - fr;q=0.8
882+
# - *;q=0.7
883+
#
884+
url_preview_accept_language:
885+
# - en
886+
862887

863888
## Captcha ##
864889
# See docs/CAPTCHA_SETUP for full details of configuring this.

synapse/config/repository.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ def read_config(self, config, **kwargs):
192192

193193
self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ())
194194

195+
self.url_preview_accept_language = config.get(
196+
"url_preview_accept_language"
197+
) or ["en"]
198+
195199
def generate_config_section(self, data_dir_path, **kwargs):
196200
media_store = os.path.join(data_dir_path, "media_store")
197201
uploads_path = os.path.join(data_dir_path, "uploads")
@@ -329,6 +333,31 @@ def generate_config_section(self, data_dir_path, **kwargs):
329333
# The largest allowed URL preview spidering size in bytes
330334
#
331335
#max_spider_size: 10M
336+
337+
# A list of values for the Accept-Language HTTP header used when
338+
# downloading webpages during URL preview generation. This allows
339+
# Synapse to specify the preferred languages that URL previews should
340+
# be in when communicating with remote servers.
341+
#
342+
# Each value is a IETF language tag; a 2-3 letter identifier for a
343+
# language, optionally followed by subtags separated by '-', specifying
344+
# a country or region variant.
345+
#
346+
# Multiple values can be provided, and a weight can be added to each by
347+
# using quality value syntax (;q=). '*' translates to any language.
348+
#
349+
# Defaults to "en".
350+
#
351+
# Example:
352+
#
353+
# url_preview_accept_language:
354+
# - en-UK
355+
# - en-US;q=0.9
356+
# - fr;q=0.8
357+
# - *;q=0.7
358+
#
359+
url_preview_accept_language:
360+
# - en
332361
"""
333362
% locals()
334363
)

synapse/rest/media/v1/preview_url_resource.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def __init__(self, hs, media_repo, media_storage):
8686
self.media_storage = media_storage
8787

8888
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
89+
self.url_preview_accept_language = hs.config.url_preview_accept_language
8990

9091
# memory cache mapping urls to an ObservableDeferred returning
9192
# JSON-encoded OG metadata
@@ -315,9 +316,12 @@ async def _download_url(self, url, user):
315316

316317
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
317318
try:
318-
logger.debug("Trying to get url '%s'", url)
319+
logger.debug("Trying to get preview for url '%s'", url)
319320
length, headers, uri, code = await self.client.get_file(
320-
url, output_stream=f, max_size=self.max_spider_size
321+
url,
322+
output_stream=f,
323+
max_size=self.max_spider_size,
324+
headers={"Accept-Language": self.url_preview_accept_language},
321325
)
322326
except SynapseError:
323327
# Pass SynapseErrors through directly, so that the servlet

tests/rest/media/v1/test_url_preview.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ def make_homeserver(self, reactor, clock):
7474
)
7575
config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
7676
config["url_preview_url_blacklist"] = []
77+
config["url_preview_accept_language"] = [
78+
"en-UK",
79+
"en-US;q=0.9",
80+
"fr;q=0.8",
81+
"*;q=0.7",
82+
]
7783

7884
self.storage_path = self.mktemp()
7985
self.media_store_path = self.mktemp()
@@ -507,3 +513,52 @@ def test_OPTIONS(self):
507513
self.pump()
508514
self.assertEqual(channel.code, 200)
509515
self.assertEqual(channel.json_body, {})
516+
517+
def test_accept_language_config_option(self):
518+
"""
519+
Accept-Language header is sent to the remote server
520+
"""
521+
self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")]
522+
523+
# Build and make a request to the server
524+
request, channel = self.make_request(
525+
"GET", "url_preview?url=http://example.com", shorthand=False
526+
)
527+
request.render(self.preview_url)
528+
self.pump()
529+
530+
# Extract Synapse's tcp client
531+
client = self.reactor.tcpClients[0][2].buildProtocol(None)
532+
533+
# Build a fake remote server to reply with
534+
server = AccumulatingProtocol()
535+
536+
# Connect the two together
537+
server.makeConnection(FakeTransport(client, self.reactor))
538+
client.makeConnection(FakeTransport(server, self.reactor))
539+
540+
# Tell Synapse that it has received some data from the remote server
541+
client.dataReceived(
542+
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
543+
% (len(self.end_content),)
544+
+ self.end_content
545+
)
546+
547+
# Move the reactor along until we get a response on our original channel
548+
self.pump()
549+
self.assertEqual(channel.code, 200)
550+
self.assertEqual(
551+
channel.json_body, {"og:title": "~matrix~", "og:description": "hi"}
552+
)
553+
554+
# Check that the server received the Accept-Language header as part
555+
# of the request from Synapse
556+
self.assertIn(
557+
(
558+
b"Accept-Language: en-UK\r\n"
559+
b"Accept-Language: en-US;q=0.9\r\n"
560+
b"Accept-Language: fr;q=0.8\r\n"
561+
b"Accept-Language: *;q=0.7"
562+
),
563+
server.data,
564+
)

0 commit comments

Comments
 (0)