summaryrefslogtreecommitdiff
path: root/CVE-2023-38745.patch
blob: dd4354f255820a35fee2a8c57c848c5cc800fa22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
From eddedbfc14916aa06fc01ff04b38aeb30ae2e625 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 Jul 2023 09:26:38 -0700
Subject: [PATCH] Fix new variant of the vulnerability in CVE-2023-35936.

Guilhem Moulin noticed that the fix to CVE-2023-35936 was incomplete.
An attacker could get around it by double-encoding the malicious
extension to create or override arbitrary files.

    $ echo '![](data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua)' >b.md
    $ .cabal/bin/pandoc b.md --extract-media=bar
    <p><img
    src="bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+%2f%2e%2e%2f%2e%2e%2fb%2elua" /></p>
    $ cat b.lua
    print "hello"
    $ find bar
    bar/
    bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+

This commit adds a test case for this more complex attack and fixes
the vulnerability.  (The fix is quite simple: if the URL-unescaped
filename or extension contains a '%', we just use the sha1 hash of the
contents as the canonical name, just as we do if the filename contains
'..'.)
---
 src/Text/Pandoc/Class/IO.hs |  2 ++
 src/Text/Pandoc/MediaBag.hs |  7 ++++---
 test/Tests/MediaBag.hs      | 12 +++++++++++-
 3 files changed, 17 insertions(+), 4 deletions(-)

Index: pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs
===================================================================
--- pandoc-3.1.3.orig/src/Text/Pandoc/Class/IO.hs	2023-09-21 09:24:23.311539088 +0000
+++ pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs	2023-09-21 09:27:24.005959930 +0000
@@ -224,6 +224,8 @@ writeMedia :: (PandocMonad m, MonadIO m)
            -> m ()
 writeMedia dir (fp, _mt, bs) = do
   -- we normalize to get proper path separators for the platform
+  -- we unescape URI encoding, but given how insertMedia
+  -- is written, we shouldn't have any % in a canonical media name...
   let fullpath = normalise $ dir </> fp
   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
   logIOError $ BL.writeFile fullpath bs
Index: pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs
===================================================================
--- pandoc-3.1.3.orig/src/Text/Pandoc/MediaBag.hs	2023-09-21 09:24:23.311539088 +0000
+++ pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs	2023-09-21 09:27:24.006959920 +0000
@@ -89,16 +89,17 @@ insertMedia fp mbMime contents (MediaBag
                        && Windows.isRelative fp''
                        && isNothing uri
                        && not (".." `T.isInfixOf` fp')
+                       && '%' `notElem` fp''
                      then fp''
-                     else showDigest (sha1 contents) <> "." <> ext
+                     else showDigest (sha1 contents) <> ext
         fallback = case takeExtension fp'' of
                         ".gz" -> getMimeTypeDef $ dropExtension fp''
                         _     -> getMimeTypeDef fp''
         mt = fromMaybe fallback mbMime
         path = maybe fp'' (unEscapeString . uriPath) uri
         ext = case takeExtension path of
-                '.':e -> e
-                _ -> maybe "" T.unpack $ extensionFromMimeType mt
+                '.':e | '%' `notElem` e -> '.':e
+                _ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType mt
 
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
 lookupMedia :: FilePath