def tokenise(code)
code.chomp!
i = 0
while i < code.size
chunk = code[i..-1]
found = false
KNOWN_TOKENS.each do |type, regex|
if value = chunk[regex, 1]
if type == :NAME
if KEYWORDS.include? value
tokens << new_token(value.upcase.to_sym, value, :chunk => code[0..i])
else
tokens << new_token(type, value, :chunk => code[0..i])
end
else
tokens << new_token(type, value, :chunk => code[0..i])
end
i += value.size
found = true
break
end
end
unless found
if var_name = chunk[/\A\$((::)?([\w-]+::)*[\w-]+)/, 1]
tokens << new_token(:VARIABLE, var_name, :chunk => code[0..i])
i += var_name.size + 1
elsif chunk.match(/\A'(.*?)'/m)
str_content = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*'/m)
tokens << new_token(:SSTRING, str_content[0..-2], :chunk => code[0..i])
i += str_content.size + 1
elsif chunk.match(/\A"/)
str_contents = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*"/m)
_ = code[0..i].split("\n")
interpolate_string(str_contents, _.count, _.last.length)
i += str_contents.size + 1
elsif comment = chunk[/\A(#.*)/, 1]
comment_size = comment.size
comment.sub!(/# ?/, '')
tokens << new_token(:COMMENT, comment, :chunk => code[0..i])
i += comment_size
elsif slash_comment = chunk[/\A(\/\/.*)/, 1]
slash_comment_size = slash_comment.size
slash_comment.sub!(/\/\/ ?/, '')
tokens << new_token(:SLASH_COMMENT, slash_comment, :chunk => code[0..i])
i += slash_comment_size
elsif mlcomment = chunk[/\A(\/\*.*?\*\/)/m, 1]
mlcomment_size = mlcomment.size
mlcomment.sub!(/\A\/\* ?/, '')
mlcomment.sub!(/ ?\*\/\Z/, '')
mlcomment.gsub!(/^ ?\* ?/, '')
mlcomment.gsub!(/\n/, ' ')
mlcomment.strip!
tokens << new_token(:MLCOMMENT, mlcomment, :chunk => code[0..i])
i += mlcomment_size
elsif chunk.match(/\A\/.*?\//) && possible_regex?
str_content = StringScanner.new(code[i+1..-1]).scan_until(/(\A|[^\\])(\\\\)*\//m)
tokens << new_token(:REGEX, str_content[0..-2], :chunk => code[0..i])
i += str_content.size + 1
elsif indent = chunk[/\A\n([ \t]+)/m, 1]
tokens << new_token(:NEWLINE, '\n', :chunk => code[0..i])
tokens << new_token(:INDENT, indent, :chunk => code[0..i+1])
i += indent.size + 1
elsif whitespace = chunk[/\A([ \t]+)/, 1]
tokens << new_token(:WHITESPACE, whitespace, :chunk => code[0..i])
i += whitespace.size
elsif chunk.match(/\A\n/)
tokens << new_token(:NEWLINE, '\n', :chunk => code[0..i])
i += 1
elsif chunk.match(/\A\//)
tokens << new_token(:DIV, '/', :chunk => code[0..i])
i += 1
else
raise PuppetLint::LexerError.new(code, i)
end
end
end
tokens
end