def tokenise(code)
i = 0
while i < code.size
chunk = code[i..-1]
found = false
KNOWN_TOKENS.each do |type, regex|
if value = chunk[regex, 1]
length = value.size
if type == :NAME
if KEYWORDS.include? value
tokens << new_token(value.upcase.to_sym, value, length)
else
tokens << new_token(type, value, length)
end
else
tokens << new_token(type, value, length)
end
i += length
found = true
break
end
end
unless found
if var_name = chunk[/\A\$((::)?([\w-]+::)*[\w-]+(\[.+?\])*)/, 1]
length = var_name.size + 1
tokens << new_token(:VARIABLE, var_name, length)
elsif chunk.match(/\A'(.*?)'/m)
str_content = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*'/m)
length = str_content.size + 1
tokens << new_token(:SSTRING, str_content[0..-2], length)
elsif chunk.match(/\A"/)
str_contents = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*"/m)
_ = code[0..i].split("\n")
interpolate_string(str_contents, _.count, _.last.length)
length = str_contents.size + 1
elsif comment = chunk[/\A(#.*)/, 1]
length = comment.size
comment.sub!(/#/, '')
tokens << new_token(:COMMENT, comment, length)
elsif slash_comment = chunk[/\A(\/\/.*)/, 1]
length = slash_comment.size
slash_comment.sub!(/\/\//, '')
tokens << new_token(:SLASH_COMMENT, slash_comment, length)
elsif mlcomment = chunk[/\A(\/\*.*?\*\/)/m, 1]
length = mlcomment.size
mlcomment.sub!(/\A\/\* ?/, '')
mlcomment.sub!(/ ?\*\/\Z/, '')
mlcomment.gsub!(/ *\* ?/, '')
mlcomment.strip!
tokens << new_token(:MLCOMMENT, mlcomment, length)
elsif chunk.match(/\A\/.*?\//) && possible_regex?
str_content = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*\//m)
length = str_content.size + 1
tokens << new_token(:REGEX, str_content[0..-2], length)
elsif eolindent = chunk[/\A((\r\n|\r|\n)[ \t]+)/m, 1]
eol = eolindent[/\A([\r\n]+)/m, 1]
indent = eolindent[/\A[\r\n]+([ \t]+)/m, 1]
tokens << new_token(:NEWLINE, eol, eol.size)
tokens << new_token(:INDENT, indent, indent.size)
length = indent.size + eol.size
elsif whitespace = chunk[/\A([ \t]+)/, 1]
length = whitespace.size
tokens << new_token(:WHITESPACE, whitespace, length)
elsif eol = chunk[/\A(\r\n|\r|\n)/, 1]
length = eol.size
tokens << new_token(:NEWLINE, eol, length)
elsif chunk.match(/\A\//)
length = 1
tokens << new_token(:DIV, '/', length)
else
raise PuppetLint::LexerError.new(@line_no, @column)
end
i += length
end
end
tokens
end