I wrote some code to parse Lua 5.2 strings in Lua 5.1, using patterns. It's a bit too slow, but it works fine as far as I tested.
local squote = "'"
local dquote = '"'
-- escape "sequences"
local escapeSequences = {
a = '\a',
b = '\b',
f = '\f',
r = '\r',
n = '\n',
t = '\t',
v = '\v',
['"'] = '"',
["'"] = "'",
['\\'] = '\\'
}
local pads = {
z = "\\z",
x = "\\x",
['0'] = '\\0',
['1'] = '\\1',
['2'] = '\\2',
['3'] = '\\3',
['4'] = '\\4',
['5'] = '\\5',
['6'] = '\\6',
['7'] = '\\7',
['8'] = '\\8',
['9'] = '\\9'
}
setmetatable(pads, {
__index = function(t,k)
return "\\v" .. k .. "/"
end
})
-- Parse a string like it's a Lua 5.2 string.
local function parseString52(s)
-- "validate" string
local startChar = string.sub(s,1,1)
assert(startChar==squote or startChar==dquote)
assert(string.sub(s, -1, -1) == startChar)
-- remove quotes
local str = string.sub(s, 2, -2)
-- TODO check for unescaped quotes
-- replace "normal" escapes with a padded escape
str = string.gsub(str, "\\(.)", function(c)
-- swap startChar with some invalid escape
if c == startChar then
c = "m"
-- swap the invalid escape with startChar
elseif c == "m" then
c = startChar
end
return pads[c]
end)
-- check for a padded escape for startChar - remember this is actually our invalid escape
assert(not string.find(str, "\\v" .. startChar .. "/"), "invalid escape sequence near '\\m'")
-- then check for non-escaped startChar
assert(not string.find(str, startChar), "unfinished string")
-- pad 1-digit numerical escapes
str = string.gsub(str, "\\([0-9])[^0-9]", "\\00%1")
-- pad 2-digit numerical escapes
str = string.gsub(str, "\\([0-9][0-9])[^0-9]", "\\0%1")
local t = {}
local i = 1
local last = 1
-- split on \z
for from,to in function(x,y) return string.find(x, "\\z", y+1) end, str, 0 do
t[i] = string.sub(str, last, from - 1)
last = to+1
i = i + 1
end
t[i] = string.sub(str, last, #str)
-- parse results
local nt = {}
for x,y in ipairs(t) do
nt[x] = string.gsub(y, "\\(([vx0-9])((.).))",
function(a,b,c,d)
if b == "v" then
return escapeSequences[d] or (d == "m" and startChar or assert(false, "invalid escape sequence near '\\" .. d .. "'"))
elseif b == "x" then
local n = tonumber(c, 16)
assert(n, "hexadecimal digit expected near '\\x" .. c .. "'")
return string.char(n)
else
local n = tonumber(a)
assert(n < 256, "decimal escape too large near '\\" .. a .. "'")
return string.char(n)
end
end)
if x > 1 then
-- handle \z
nt[x] = string.gsub(nt[x], "^[%s]*", "")
end
end
-- merge
return table.concat(nt, "")
end
return {
parse52 = parseString52,
}
(For test cases see here, I decided to keep them separate so to not bloat up the post)
__index
for the cases you need to handle specially). – Etan Reisner Dec 12 '14 at 15:31