5
Note that this requires os.clock(), debug.sethook(),
6
and debug.getinfo() or your equivalent replacements to
7
be available if this is an embedded application.
11
profiler = newProfiler()
14
< call some functions that take time >
18
local outfile = io.open( "profile.txt", "w+" )
19
profiler:report( outfile )
22
== Optionally choosing profiling method ==
24
The rest of this comment can be ignored if you merely want a good profiler.
26
newProfiler(method, sampledelay):
28
If method is omitted or "time", will profile based on real performance.
29
optionally, frequency can be provided to control the number of opcodes
30
per profiling tick. By default this is 100000, which (on my system) provides
31
one tick approximately every 2ms and reduces system performance by about 10%.
32
This can be reduced to increase accuracy at the cost of performance, or
33
increased for the opposite effect.
35
If method is "call", will profile based on function calls. Frequency is
39
"time" may bias profiling somewhat towards large areas with "simple opcodes",
40
as the profiling function (which introduces a certain amount of unavoidable
41
overhead) will be called more often. This can be minimized by using a larger
42
sample delay - the default should leave any error largely overshadowed by
43
statistical noise. With a delay of 1000 I was able to achieve inaccuray of
44
approximately 25%. Increasing the delay to 100000 left inaccuracy below my
47
"call" may bias profiling heavily towards areas with many function calls.
48
Testing found a degenerate case giving a figure inaccurate by approximately
49
20,000%. (Yes, a multiple of 200.) This is, however, more directly comparable
50
to common profilers (such as gprof) and also gives accurate function call
51
counts, which cannot be retrieved from "time".
53
I strongly recommend "time" mode, and it is now the default.
57
2008-09-16 - Time-based profiling and conversion to Lua 5.1
58
by Ben Wilhelm ( zorba-pepperfish@pavlovian.net ).
59
Added the ability to optionally choose profiling methods, along with a new
62
Converted to Lua 5, a few improvements, and
63
additional documentation by Tom Spilman ( tom@sickheadgames.com )
65
Additional corrections and tidying by original author
66
Daniel Silverstone ( dsilvers@pepperfish.net )
70
Daniel Silverstone is no longer using this code, and judging by how long it's
71
been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
72
perfectly willing to take on maintenance, so if you have problems or
73
questions, go ahead and email me :)
77
Lua profiler - Copyright Pepperfish 2002,2003,2004
79
Permission is hereby granted, free of charge, to any person obtaining a copy
80
of this software and associated documentation files (the "Software"), to
81
deal in the Software without restriction, including without limitation the
82
rights to use, copy, modify, merge, publish, distribute, and/or sell copies
83
of the Software, and to permit persons to whom the Software is furnished to
84
do so, subject to the following conditions:
86
The above copyright notice and this permission notice shall be included in
87
all copies or substantial portions of the Software.
89
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
90
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
91
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
92
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
93
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
94
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
107
function newProfiler(variant, sampledelay)
108
if _profiler.running then
109
print("Profiler already running.")
113
variant = variant or "time"
115
if variant ~= "time" and variant ~= "call" then
116
print("Profiler method must be 'time' or 'call'.")
121
for k,v in pairs(_profiler) do
124
newprof.variant = variant
125
newprof.sampledelay = sampledelay or 100000
132
function _profiler.start(self)
133
if _profiler.running then
136
-- Start the profiler. This begins by setting up internal profiler state
137
_profiler.running = self
140
if self.variant == "time" then
141
self.lastclock = os.clock()
142
debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
143
elseif self.variant == "call" then
144
debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
146
print("Profiler method must be 'time' or 'call'.")
154
function _profiler.stop(self)
155
if _profiler.running ~= self then
158
-- Stop the profiler.
160
_profiler.running = nil
166
function _profiler_hook_wrapper_by_call(action)
167
if _profiler.running == nil then
170
_profiler.running:_internal_profile_by_call(action)
172
function _profiler_hook_wrapper_by_time(action)
173
if _profiler.running == nil then
176
_profiler.running:_internal_profile_by_time(action)
182
function _profiler._internal_profile_by_call(self,action)
183
-- Since we can obtain the 'function' for the item we've had call us, we
185
local caller_info = debug.getinfo( 3 )
186
if caller_info == nil then
187
print "No caller_info"
191
--SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
193
-- Retrieve the most recent activation record...
194
local latest_ar = nil
195
if table.getn(self.callstack) > 0 then
196
latest_ar = self.callstack[table.getn(self.callstack)]
199
-- Are we allowed to profile this function?
200
local should_not_profile = 0
201
for k,v in pairs(self.prevented_functions) do
202
if k == caller_info.func then
203
should_not_profile = v
206
-- Also check the top activation record...
208
if latest_ar.should_not_profile == 2 then
209
should_not_profile = 2
213
-- Now then, are we in 'call' or 'return' ?
214
-- print("Profile:", caller_info.name, "SNP:", should_not_profile,
215
-- "Action:", action )
216
if action == "call" then
219
this_ar.should_not_profile = should_not_profile
220
this_ar.parent_ar = latest_ar
221
this_ar.anon_child = 0
222
this_ar.name_child = 0
223
this_ar.children = {}
224
this_ar.children_time = {}
225
this_ar.clock_start = os.clock()
226
-- Last thing to do on a call is to insert this onto the ar stack...
227
table.insert( self.callstack, this_ar )
229
local this_ar = latest_ar
230
if this_ar == nil then
231
return -- No point in doing anything if no upper activation record
234
-- Right, calculate the time in this function...
235
this_ar.clock_end = os.clock()
236
this_ar.this_time = this_ar.clock_end - this_ar.clock_start
238
-- Now, if we have a parent, update its call info...
239
if this_ar.parent_ar then
240
this_ar.parent_ar.children[caller_info.func] =
241
(this_ar.parent_ar.children[caller_info.func] or 0) + 1
242
this_ar.parent_ar.children_time[caller_info.func] =
243
(this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
245
if caller_info.name == nil then
246
this_ar.parent_ar.anon_child =
247
this_ar.parent_ar.anon_child + this_ar.this_time
249
this_ar.parent_ar.name_child =
250
this_ar.parent_ar.name_child + this_ar.this_time
253
-- Now if we're meant to record information about ourselves, do so...
254
if this_ar.should_not_profile == 0 then
255
local inforec = self:_get_func_rec(caller_info.func,1)
256
inforec.count = inforec.count + 1
257
inforec.time = inforec.time + this_ar.this_time
258
inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
259
inforec.name_child_time = inforec.name_child_time + this_ar.name_child
260
inforec.func_info = caller_info
261
for k,v in pairs(this_ar.children) do
262
inforec.children[k] = (inforec.children[k] or 0) + v
263
inforec.children_time[k] =
264
(inforec.children_time[k] or 0) + this_ar.children_time[k]
268
-- Last thing to do on return is to drop the last activation record...
269
table.remove( self.callstack, table.getn( self.callstack ) )
276
function _profiler._internal_profile_by_time(self,action)
277
-- we do this first so we add the minimum amount of extra time to this call
278
local timetaken = os.clock() - self.lastclock
283
local caller = debug.getinfo(depth)
285
if not caller.func then caller.func = "(tail call)" end
286
if self.prevented_functions[caller.func] == nil then
287
local info = self:_get_func_rec(caller.func, 1, caller)
288
info.count = info.count + 1
289
info.time = info.time + timetaken
291
-- we're not the head, so update the "children" times also
292
if last_caller.name then
293
info.name_child_time = info.name_child_time + timetaken
295
info.anon_child_time = info.anon_child_time + timetaken
297
info.children[last_caller.func] =
298
(info.children[last_caller.func] or 0) + 1
299
info.children_time[last_caller.func] =
300
(info.children_time[last_caller.func] or 0) + timetaken
305
caller = debug.getinfo(depth)
308
self.lastclock = os.clock()
314
function _profiler._get_func_rec(self,func,force,info)
315
-- Find the function ref for 'func' (if force and not present, create one)
316
local ret = self.rawstats[func]
317
if ret == nil and force ~= 1 then
321
-- Build a new function statistics table
326
ret.anon_child_time = 0
327
ret.name_child_time = 0
329
ret.children_time = {}
331
self.rawstats[func] = ret
339
function _profiler.report( self, outfile, sort_by_total_time )
342
[[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
346
-- This is pretty awful.
348
if self.variant == "time" then
349
terms.capitalized = "Sample"
350
terms.single = "sample"
351
terms.pastverb = "sampled"
352
elseif self.variant == "call" then
353
terms.capitalized = "Call"
354
terms.single = "call"
355
terms.pastverb = "called"
362
for func,record in pairs(self.rawstats) do
363
table.insert(ordering, func)
366
if sort_by_total_time then
367
table.sort( ordering,
368
function(a,b) return self.rawstats[a].time > self.rawstats[b].time end
371
table.sort( ordering,
373
local arec = self.rawstats[a]
374
local brec = self.rawstats[b]
375
local atime = arec.time - (arec.anon_child_time + arec.name_child_time)
376
local btime = brec.time - (brec.anon_child_time + brec.name_child_time)
382
for i=1,table.getn(ordering) do
383
local func = ordering[i]
384
local record = self.rawstats[func]
385
local thisfuncname = " " .. self:_pretty_name(func) .. " "
386
if string.len( thisfuncname ) < 42 then
388
string.rep( "-", (42 - string.len(thisfuncname))/2 ) .. thisfuncname
390
thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
393
total_time = total_time + ( record.time - ( record.anon_child_time +
394
record.name_child_time ) )
395
outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
396
string.rep( "-", 19 ) .. "\n" )
397
outfile:write( terms.capitalized.." count: " ..
398
string.format( "%4d", record.count ) .. "\n" )
399
outfile:write( "Time spend total: " ..
400
string.format( "%4.3f", record.time ) .. "s\n" )
401
outfile:write( "Time spent in children: " ..
402
string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
405
record.time - (record.anon_child_time + record.name_child_time)
406
outfile:write( "Time spent in self: " ..
407
string.format("%4.3f", timeinself) .. "s\n" )
408
outfile:write( "Time spent per " .. terms.single .. ": " ..
409
string.format("%4.5f", record.time/record.count) ..
410
"s/" .. terms.single .. "\n" )
411
outfile:write( "Time spent in self per "..terms.single..": " ..
412
string.format( "%4.5f", timeinself/record.count ) .. "s/" ..
415
-- Report on each child in the form
416
-- Child <funcname> called n times and took a.bs
417
local added_blank = 0
418
for k,v in pairs(record.children) do
419
if self.prevented_functions[k] == nil or
420
self.prevented_functions[k] == 0
422
if added_blank == 0 then
423
outfile:write( "\n" ) -- extra separation line
426
outfile:write( "Child " .. self:_pretty_name(k) ..
427
string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " ..
428
terms.pastverb.." " .. string.format("%6d", v) )
429
outfile:write( " times. Took " ..
430
string.format("%4.2f", record.children_time[k] ) .. "s\n" )
434
outfile:write( "\n" ) -- extra separation line
437
outfile:write( "\n\n" )
438
outfile:write( "Total time spent in profiled functions: " ..
439
string.format("%5.3g",total_time) .. "s\n" )
450
function _profiler.lua_report(self,outfile)
451
-- Purpose: Write out the entire raw state in a cross-referenceable form.
454
for func,record in pairs(self.rawstats) do
455
table.insert(ordering, func)
456
functonum[func] = table.getn(ordering)
460
"-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
461
outfile:write( "-- Function names\nfuncnames = {}\n" )
462
for i=1,table.getn(ordering) do
463
local thisfunc = ordering[i]
464
outfile:write( "funcnames[" .. i .. "] = " ..
465
string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
467
outfile:write( "\n" )
468
outfile:write( "-- Function times\nfunctimes = {}\n" )
469
for i=1,table.getn(ordering) do
470
local thisfunc = ordering[i]
471
local record = self.rawstats[thisfunc]
472
outfile:write( "functimes[" .. i .. "] = { " )
473
outfile:write( "tot=" .. record.time .. ", " )
474
outfile:write( "achild=" .. record.anon_child_time .. ", " )
475
outfile:write( "nchild=" .. record.name_child_time .. ", " )
476
outfile:write( "count=" .. record.count .. " }\n" )
478
outfile:write( "\n" )
479
outfile:write( "-- Child links\nchildren = {}\n" )
480
for i=1,table.getn(ordering) do
481
local thisfunc = ordering[i]
482
local record = self.rawstats[thisfunc]
483
outfile:write( "children[" .. i .. "] = { " )
484
for k,v in pairs(record.children) do
485
if functonum[k] then -- non-recorded functions will be ignored now
486
outfile:write( functonum[k] .. ", " )
489
outfile:write( "}\n" )
491
outfile:write( "\n" )
492
outfile:write( "-- Child call counts\nchildcounts = {}\n" )
493
for i=1,table.getn(ordering) do
494
local thisfunc = ordering[i]
495
local record = self.rawstats[thisfunc]
496
outfile:write( "children[" .. i .. "] = { " )
497
for k,v in record.children do
498
if functonum[k] then -- non-recorded functions will be ignored now
499
outfile:write( v .. ", " )
502
outfile:write( "}\n" )
504
outfile:write( "\n" )
505
outfile:write( "-- Child call time\nchildtimes = {}\n" )
506
for i=1,table.getn(ordering) do
507
local thisfunc = ordering[i]
508
local record = self.rawstats[thisfunc];
509
outfile:write( "children[" .. i .. "] = { " )
510
for k,v in pairs(record.children) do
511
if functonum[k] then -- non-recorded functions will be ignored now
512
outfile:write( record.children_time[k] .. ", " )
515
outfile:write( "}\n" )
517
outfile:write( "\n\n-- That is all.\n\n" )
521
function _profiler._pretty_name(self,func)
523
-- Only the data collected during the actual
524
-- run seems to be correct.... why?
525
local info = self.rawstats[ func ].func_info
526
-- local info = debug.getinfo( func )
529
if info.what == "Lua" then
532
if info.what == "C" then
535
if info.what == "main" then
539
if info.name == nil then
540
name = name .. "<"..tostring(func) .. ">"
542
name = name .. info.name
546
name = name .. "@" .. info.source
548
if info.what == "C" then
551
name = name .. "@<string>"
555
if info.what == "C" then
558
name = name .. info.linedefined
568
function _profiler.prevent(self, func, level)
569
self.prevented_functions[func] = (level or 1)
573
_profiler.prevented_functions = {
574
[_profiler.start] = 2,
575
[_profiler.stop] = 2,
576
[_profiler._internal_profile_by_time] = 2,
577
[_profiler._internal_profile_by_call] = 2,
578
[_profiler_hook_wrapper_by_time] = 2,
579
[_profiler_hook_wrapper_by_call] = 2,
580
[_profiler.prevent] = 2,
581
[_profiler._get_func_rec] = 2,
582
[_profiler.report] = 2,
583
[_profiler.lua_report] = 2,
584
[_profiler._pretty_name] = 2