Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I wrote some code to parse Lua 5.2 strings in Lua 5.1, using patterns. It's a bit too slow, but it works fine as far as I tested.

local squote = "'"
local dquote = '"'

-- escape "sequences"
local escapeSequences = {
  a = '\a',
  b = '\b',
  f = '\f',
  r = '\r',
  n = '\n',
  t = '\t',
  v = '\v',
  ['"'] = '"',
  ["'"] = "'",
  ['\\'] = '\\'
}

local pads = {
  z = "\\z",
  x = "\\x",
  ['0'] = '\\0',
  ['1'] = '\\1',
  ['2'] = '\\2',
  ['3'] = '\\3',
  ['4'] = '\\4',
  ['5'] = '\\5',
  ['6'] = '\\6',
  ['7'] = '\\7',
  ['8'] = '\\8',
  ['9'] = '\\9'
}

setmetatable(pads, {
    __index = function(t,k)
      return "\\v" .. k .. "/"
    end
  })

-- Parse a string like it's a Lua 5.2 string.
local function parseString52(s)
  -- "validate" string
  local startChar = string.sub(s,1,1)
  assert(startChar==squote or startChar==dquote)
  assert(string.sub(s, -1, -1) == startChar)

  -- remove quotes
  local str = string.sub(s, 2, -2)

  -- TODO check for unescaped quotes

  -- replace "normal" escapes with a padded escape
  str = string.gsub(str, "\\(.)", function(c)
      -- swap startChar with some invalid escape
      if c == startChar then
        c = "m"
        -- swap the invalid escape with startChar
      elseif c == "m" then
        c = startChar
      end
      return pads[c]
    end)

  -- check for a padded escape for startChar - remember this is actually our invalid escape
  assert(not string.find(str, "\\v" .. startChar .. "/"), "invalid escape sequence near '\\m'")

  -- then check for non-escaped startChar
  assert(not string.find(str, startChar), "unfinished string")

  -- pad 1-digit numerical escapes
  str = string.gsub(str, "\\([0-9])[^0-9]", "\\00%1")

  -- pad 2-digit numerical escapes
  str = string.gsub(str, "\\([0-9][0-9])[^0-9]", "\\0%1")

  local t = {}
  local i = 1
  local last = 1
  -- split on \z
  for from,to in function(x,y) return string.find(x, "\\z", y+1) end, str, 0 do
    t[i] = string.sub(str, last, from - 1)
    last = to+1
    i = i + 1
  end
  t[i] = string.sub(str, last, #str)

  -- parse results
  local nt = {}
  for x,y in ipairs(t) do
    nt[x] = string.gsub(y, "\\(([vx0-9])((.).))",
      function(a,b,c,d)
        if b == "v" then
          return escapeSequences[d] or (d == "m" and startChar or assert(false, "invalid escape sequence near '\\" .. d .. "'"))
        elseif b == "x" then
          local n = tonumber(c, 16)
          assert(n, "hexadecimal digit expected near '\\x" .. c .. "'")
          return string.char(n)
        else
          local n = tonumber(a)
          assert(n < 256, "decimal escape too large near '\\" .. a .. "'")
          return string.char(n)
        end
      end)
    if x > 1 then
      -- handle \z
      nt[x] = string.gsub(nt[x], "^[%s]*", "")
    end
  end
  -- merge
  return table.concat(nt, "")
end

return {
  parse52 = parseString52,
}

(For test cases see here, I decided to keep them separate so to not bloat up the post)

share|improve this question
    
Have you run a profiler (etc.) on this to see where the time is being taken up? I imagine (though I don't know) that a function gsub is slower than a table gsub. So it might make sense to see about converting that first gsub pass to a table (with __index for the cases you need to handle specially). –  Etan Reisner Dec 12 '14 at 15:31
    
See string.gsub in lua.org/manual/5.0/manual.html#5.3 (let's just say that Lua 5.0 compatibility is a bonus) –  SoniEx2 Dec 12 '14 at 16:10

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.