Documentation for this module may be created at Module:Delink/doc

  1. -- This module de-links most wikitext.
  2.  
  3. require('Module:No globals')
  4.  
  5. local p = {}
  6.  
  7. local getArgs
  8.  
  9. local function delinkReversePipeTrick(s)
  10. if s:match("^%[%[|.*[|\n]") then -- Check for newlines or multiple pipes.
  11. return s
  12. else
  13. return s:match("%[%[|(.*)%]%]")
  14. end
  15. end
  16.  
  17. local function delinkPipeTrick(s)
  18. local linkarea, display = "", ""
  19. -- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]].
  20. -- First, remove the text before the first colon, if any.
  21. if s:match(":") then
  22. s = s:match("%[%[.-:(.*)|%]%]")
  23. -- If there are no colons, grab all of the text apart from the square brackets and the pipe.
  24. else
  25. s = s:match("%[%[(.*)|%]%]")
  26. end
  27. -- Next up, brackets and commas.
  28. if s:match("%(.-%)$") then -- Brackets trump commas.
  29. s = s:match("(.-) ?%(.-%)$")
  30. elseif s:match(",") then -- If there are no brackets, display only the text before the first comma.
  31. s = s:match("(.-),.*$")
  32. end
  33. return s
  34. end
  35.  
  36. local function delinkWikilink(s)
  37. local result = s
  38. -- Deal with the reverse pipe trick.
  39. if result:match("%[%[|") then
  40. return delinkReversePipeTrick(result)
  41. end
  42. result = mw.uri.decode(result, "PATH") -- decode percent-encoded entities. Leave underscores and plus signs.
  43. result = mw.text.decode(result, true) -- decode HTML entities.
  44. -- Check for bad titles. To do this we need to find the
  45. -- title area of the link, i.e. the part before any pipes.
  46. local titlearea
  47. if result:match("|") then -- Find if we're dealing with a piped link.
  48. titlearea = result:match("^%[%[(.-)|.*%]%]")
  49. else
  50. titlearea = result:match("^%[%[(.-)%]%]")
  51. end
  52. -- Check for bad characters.
  53. if mw.ustring.match(titlearea, "[%[%]<>{}%%%c\n]") then
  54. return s
  55. end
  56. -- Check for categories, interwikis, and files.
  57. local colonprefix = result:match("%[%[(.-):.*%]%]") or "" -- Get the text before the first colon.
  58. local ns = mw.site.namespaces[colonprefix] -- see if this is a known namespace
  59. if mw.language.isKnownLanguageTag(colonprefix)
  60. or ( ns and ( ns.canonicalName == "File" or ns.canonicalName == "Category" ) ) then
  61. return ""
  62. end
  63. -- Remove the colon if the link is using the [[Help:Colon trick]].
  64. if result:match("%[%[:") then
  65. result = "[[" .. result:match("%[%[:(.*%]%])")
  66. end
  67. -- Deal with links using the [[Help:Pipe trick]].
  68. if mw.ustring.match(result, "^%[%[[^|]*|%]%]") then
  69. return delinkPipeTrick(result)
  70. end
  71. -- Find the display area of the wikilink
  72. if result:match("|") then -- Find if we're dealing with a piped link.
  73. result = result:match("^%[%[.-|(.+)%]%]")
  74. -- Remove new lines from the display of multiline piped links,
  75. -- where the pipe is before the first new line.
  76. result = result:gsub("\n", "")
  77. else
  78. result = result:match("^%[%[(.-)%]%]")
  79. end
  80.  
  81. return result
  82. end
  83.  
  84. local function delinkURL(s)
  85. -- Assume we have already delinked internal wikilinks, and that
  86. -- we have been passed some text between two square brackets [foo].
  87. -- If the text contains a line break it is not formatted as a URL, regardless of other content.
  88. if s:match("\n") then
  89. return s
  90. end
  91. -- Check if the text has a valid URL prefix and at least one valid URL character.
  92. local valid_url_prefixes = {"//", "http://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"}
  93. local url_prefix
  94. for i,v in ipairs(valid_url_prefixes) do
  95. if mw.ustring.match(s, '^%[' .. v ..'[^"%s].*%]' ) then
  96. url_prefix = v
  97. break
  98. end
  99. end
  100. -- Get display text
  101. if not url_prefix then
  102. return s
  103. end
  104. s = s:match("^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket.
  105. s = s:match('^.-(["<> ].*)') or "" -- Grab all of the text after the first URL separator character ("<> ).
  106. s = mw.ustring.match(s, "^%s*(%S.*)$") or "" -- If the separating character was a space, trim it off.
  107. local s_decoded = mw.text.decode(s, true)
  108. if mw.ustring.match(s_decoded, "%c") then
  109. return s
  110. else
  111. return s_decoded
  112. end
  113. end
  114.  
  115. local function delinkLinkClass(s, pattern, delinkFunction)
  116. if not type(s) == "string" then
  117. error("Attempt to de-link non-string input.", 2)
  118. end
  119. if not ( type(pattern) == "string" and mw.ustring.sub(pattern, 1, 1) == "^" ) then
  120. error('Invalid pattern detected. Patterns must begin with "^".', 2)
  121. end
  122. -- Iterate over the text string, and replace any matched text. using the
  123. -- delink function. We need to iterate character by character rather
  124. -- than just use gsub, otherwise nested links aren't detected properly.
  125. local result = ""
  126. while s ~= '' do
  127. -- Replace text using one iteration of gsub.
  128. s = mw.ustring.gsub(s, pattern, delinkFunction, 1)
  129. -- Append the left-most character to the result string.
  130. result = result .. mw.ustring.sub(s, 1, 1)
  131. s = mw.ustring.sub(s, 2, -1)
  132. end
  133. return result
  134. end
  135.  
  136. function p._delink(args)
  137. local text = args[1] or ""
  138. if args.refs == "yes" then
  139. -- Remove any [[Help:Strip markers]] representing ref tags. In most situations
  140. -- this is not a good idea - only use it if you know what you are doing!
  141. text = mw.ustring.gsub(text, "UNIQ%w*%-ref%-%d*%-QINU", "")
  142. end
  143. if not (args.comments == "no") then
  144. text = text:gsub("<!%-%-.-%-%->", "") -- Remove html comments.
  145. end
  146. if not (args.wikilinks == "no") then
  147. text = delinkLinkClass(text, "^%[%[.-%]%]", delinkWikilink) -- De-link wikilinks.
  148. end
  149. if not (args.urls == "no") then
  150. text = delinkLinkClass(text, "^%[.-%]", delinkURL) -- De-link URLs.
  151. end
  152. if not (args.whitespace == "no") then
  153. -- Replace single new lines with a single space, but leave double new lines
  154. -- and new lines only containing spaces or tabs before a second new line.
  155. text = mw.ustring.gsub(text, "([^\n \t][ \t]*)\n([ \t]*[^\n \t])", "%1 %2")
  156. text = text:gsub("[ \t]+", " ") -- Remove extra tabs and spaces.
  157. end
  158. return text
  159. end
  160.  
  161. function p.delink(frame)
  162. if not getArgs then
  163. getArgs = require('Module:Arguments').getArgs
  164. end
  165. return p._delink(getArgs(frame, {wrappers = 'Template:Delink'}))
  166. end
  167.  
  168. return p