5
Note that this requires os.clock(), debug.sethook(),
6
and debug.getinfo() or your equivalent replacements to
7
be available if this is an embedded application.
11
profiler = newProfiler()
14
< call some functions that take time >
18
local outfile = io.open( "profile.txt", "w+" )
19
profiler:report( outfile )
22
== Optionally choosing profiling method ==
24
The rest of this comment can be ignored if you merely want a good profiler.
26
newProfiler(method, sampledelay):
28
If method is omitted or "time", will profile based on real performance.
29
optionally, frequency can be provided to control the number of opcodes
30
per profiling tick. By default this is 100000, which (on my system) provides
31
one tick approximately every 2ms and reduces system performance by about 10%.
32
This can be reduced to increase accuracy at the cost of performance, or
33
increased for the opposite effect.
35
If method is "call", will profile based on function calls. Frequency is
39
"time" may bias profiling somewhat towards large areas with "simple opcodes",
40
as the profiling function (which introduces a certain amount of unavoidable
41
overhead) will be called more often. This can be minimized by using a larger
42
sample delay - the default should leave any error largely overshadowed by
43
statistical noise. With a delay of 1000 I was able to achieve inaccuray of
44
approximately 25%. Increasing the delay to 100000 left inaccuracy below my
47
"call" may bias profiling heavily towards areas with many function calls.
48
Testing found a degenerate case giving a figure inaccurate by approximately
49
20,000%. (Yes, a multiple of 200.) This is, however, more directly comparable
50
to common profilers (such as gprof) and also gives accurate function call
51
counts, which cannot be retrieved from "time".
53
I strongly recommend "time" mode, and it is now the default.
57
2008-09-16 - Time-based profiling and conversion to Lua 5.1
58
by Ben Wilhelm ( zorba-pepperfish@pavlovian.net ).
59
Added the ability to optionally choose profiling methods, along with a new
62
Converted to Lua 5, a few improvements, and
63
additional documentation by Tom Spilman ( tom@sickheadgames.com )
65
Additional corrections and tidying by original author
66
Daniel Silverstone ( dsilvers@pepperfish.net )
70
Daniel Silverstone is no longer using this code, and judging by how long it's
71
been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
72
perfectly willing to take on maintenance, so if you have problems or
73
questions, go ahead and email me :)
74
-- Ben Wilhelm ( zorba-pepperfish@pavlovian.net ) '
78
Lua profiler - Copyright Pepperfish 2002,2003,2004
80
Permission is hereby granted, free of charge, to any person obtaining a copy
81
of this software and associated documentation files (the "Software"), to
82
deal in the Software without restriction, including without limitation the
83
rights to use, copy, modify, merge, publish, distribute, and/or sell copies
84
of the Software, and to permit persons to whom the Software is furnished to
85
do so, subject to the following conditions:
87
The above copyright notice and this permission notice shall be included in
88
all copies or substantial portions of the Software.
90
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
91
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
92
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
93
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
94
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
95
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
102
-- All profiler related stuff is stored in the top level table '_profiler'
108
-- newProfiler() creates a new profiler object for managing
109
-- the profiler and storing state. Note that only one profiler
110
-- object can be executing at one time.
112
function newProfiler(variant, sampledelay)
113
if _profiler.running then
114
print("Profiler already running.")
118
variant = variant or "time"
120
if variant ~= "time" and variant ~= "call" then
121
print("Profiler method must be 'time' or 'call'.")
126
for k,v in pairs(_profiler) do
129
newprof.variant = variant
130
newprof.sampledelay = sampledelay or 100000
136
-- This function starts the profiler. It will do nothing
137
-- if this (or any other) profiler is already running.
139
function _profiler.start(self)
140
if _profiler.running then
143
-- Start the profiler. This begins by setting up internal profiler state
144
_profiler.running = self
147
if self.variant == "time" then
148
self.lastclock = os.clock()
149
debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
150
elseif self.variant == "call" then
151
debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
153
print("Profiler method must be 'time' or 'call'.")
160
-- This function stops the profiler. It will do nothing
161
-- if a profiler is not running, and nothing if it isn't
162
-- the currently running profiler.
164
function _profiler.stop(self)
165
if _profiler.running ~= self then
168
-- Stop the profiler.
170
_profiler.running = nil
175
-- Simple wrapper to handle the hook. You should not
176
-- be calling this directly. Duplicated to reduce overhead.
178
function _profiler_hook_wrapper_by_call(action)
179
if _profiler.running == nil then
182
_profiler.running:_internal_profile_by_call(action)
184
function _profiler_hook_wrapper_by_time(action)
185
if _profiler.running == nil then
188
_profiler.running:_internal_profile_by_time(action)
193
-- This is the main by-function-call function of the profiler and should not
194
-- be called except by the hook wrapper
196
function _profiler._internal_profile_by_call(self,action)
197
-- Since we can obtain the 'function' for the item we've had call us, we
199
local caller_info = debug.getinfo( 3 )
200
if caller_info == nil then
201
print "No caller_info"
205
--SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
207
-- Retrieve the most recent activation record...
208
local latest_ar = nil
209
if table.getn(self.callstack) > 0 then
210
latest_ar = self.callstack[table.getn(self.callstack)]
213
-- Are we allowed to profile this function?
214
local should_not_profile = 0
215
for k,v in pairs(self.prevented_functions) do
216
if k == caller_info.func then
217
should_not_profile = v
220
-- Also check the top activation record...
222
if latest_ar.should_not_profile == 2 then
223
should_not_profile = 2
227
-- Now then, are we in 'call' or 'return' ?
228
-- print("Profile:", caller_info.name, "SNP:", should_not_profile,
229
-- "Action:", action )
230
if action == "call" then
233
this_ar.should_not_profile = should_not_profile
234
this_ar.parent_ar = latest_ar
235
this_ar.anon_child = 0
236
this_ar.name_child = 0
237
this_ar.children = {}
238
this_ar.children_time = {}
239
this_ar.clock_start = os.clock()
240
-- Last thing to do on a call is to insert this onto the ar stack...
241
table.insert( self.callstack, this_ar )
243
local this_ar = latest_ar
244
if this_ar == nil then
245
return -- No point in doing anything if no upper activation record
248
-- Right, calculate the time in this function...
249
this_ar.clock_end = os.clock()
250
this_ar.this_time = this_ar.clock_end - this_ar.clock_start
252
-- Now, if we have a parent, update its call info...
253
if this_ar.parent_ar then
254
this_ar.parent_ar.children[caller_info.func] =
255
(this_ar.parent_ar.children[caller_info.func] or 0) + 1
256
this_ar.parent_ar.children_time[caller_info.func] =
257
(this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
259
if caller_info.name == nil then
260
this_ar.parent_ar.anon_child =
261
this_ar.parent_ar.anon_child + this_ar.this_time
263
this_ar.parent_ar.name_child =
264
this_ar.parent_ar.name_child + this_ar.this_time
267
-- Now if we're meant to record information about ourselves, do so...
268
if this_ar.should_not_profile == 0 then
269
local inforec = self:_get_func_rec(caller_info.func,1)
270
inforec.count = inforec.count + 1
271
inforec.time = inforec.time + this_ar.this_time
272
inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
273
inforec.name_child_time = inforec.name_child_time + this_ar.name_child
274
inforec.func_info = caller_info
275
for k,v in pairs(this_ar.children) do
276
inforec.children[k] = (inforec.children[k] or 0) + v
277
inforec.children_time[k] =
278
(inforec.children_time[k] or 0) + this_ar.children_time[k]
282
-- Last thing to do on return is to drop the last activation record...
283
table.remove( self.callstack, table.getn( self.callstack ) )
289
-- This is the main by-time internal function of the profiler and should not
290
-- be called except by the hook wrapper
292
function _profiler._internal_profile_by_time(self,action)
293
-- we do this first so we add the minimum amount of extra time to this call
294
local timetaken = os.clock() - self.lastclock
299
local caller = debug.getinfo(depth)
301
if not caller.func then caller.func = "(tail call)" end
302
if self.prevented_functions[caller.func] == nil then
303
local info = self:_get_func_rec(caller.func, 1, caller)
304
info.count = info.count + 1
305
info.time = info.time + timetaken
307
-- we're not the head, so update the "children" times also
308
if last_caller.name then
309
info.name_child_time = info.name_child_time + timetaken
311
info.anon_child_time = info.anon_child_time + timetaken
313
info.children[last_caller.func] =
314
(info.children[last_caller.func] or 0) + 1
315
info.children_time[last_caller.func] =
316
(info.children_time[last_caller.func] or 0) + timetaken
321
caller = debug.getinfo(depth)
324
self.lastclock = os.clock()
329
-- This returns a (possibly empty) function record for
330
-- the specified function. It is for internal profiler use.
332
function _profiler._get_func_rec(self,func,force,info)
333
-- Find the function ref for 'func' (if force and not present, create one)
334
local ret = self.rawstats[func]
335
if ret == nil and force ~= 1 then
339
-- Build a new function statistics table
344
ret.anon_child_time = 0
345
ret.name_child_time = 0
347
ret.children_time = {}
349
self.rawstats[func] = ret
356
-- This writes a profile report to the output file object. If
357
-- sort_by_total_time is nil or false the output is sorted by
358
-- the function time minus the time in it's children.
360
function _profiler.report( self, outfile, sort_by_total_time )
363
[[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
367
-- This is pretty awful.
369
if self.variant == "time" then
370
terms.capitalized = "Sample"
371
terms.single = "sample"
372
terms.pastverb = "sampled"
373
elseif self.variant == "call" then
374
terms.capitalized = "Call"
375
terms.single = "call"
376
terms.pastverb = "called"
383
for func,record in pairs(self.rawstats) do
384
table.insert(ordering, func)
387
if sort_by_total_time then
388
table.sort( ordering,
389
function(a,b) return self.rawstats[a].time > self.rawstats[b].time end
392
table.sort( ordering,
394
local arec = self.rawstats[a]
395
local brec = self.rawstats[b]
396
local atime = arec.time - (arec.anon_child_time + arec.name_child_time)
397
local btime = brec.time - (brec.anon_child_time + brec.name_child_time)
403
for i=1,table.getn(ordering) do
404
local func = ordering[i]
405
local record = self.rawstats[func]
406
local thisfuncname = " " .. self:_pretty_name(func) .. " "
407
if string.len( thisfuncname ) < 42 then
409
string.rep( "-", (42 - string.len(thisfuncname))/2 ) .. thisfuncname
411
thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
414
total_time = total_time + ( record.time - ( record.anon_child_time +
415
record.name_child_time ) )
416
outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
417
string.rep( "-", 19 ) .. "\n" )
418
outfile:write( terms.capitalized.." count: " ..
419
string.format( "%4d", record.count ) .. "\n" )
420
outfile:write( "Time spend total: " ..
421
string.format( "%4.3f", record.time ) .. "s\n" )
422
outfile:write( "Time spent in children: " ..
423
string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
426
record.time - (record.anon_child_time + record.name_child_time)
427
outfile:write( "Time spent in self: " ..
428
string.format("%4.3f", timeinself) .. "s\n" )
429
outfile:write( "Time spent per " .. terms.single .. ": " ..
430
string.format("%4.5f", record.time/record.count) ..
431
"s/" .. terms.single .. "\n" )
432
outfile:write( "Time spent in self per "..terms.single..": " ..
433
string.format( "%4.5f", timeinself/record.count ) .. "s/" ..
436
-- Report on each child in the form
437
-- Child <funcname> called n times and took a.bs
438
local added_blank = 0
439
for k,v in pairs(record.children) do
440
if self.prevented_functions[k] == nil or
441
self.prevented_functions[k] == 0
443
if added_blank == 0 then
444
outfile:write( "\n" ) -- extra separation line
447
outfile:write( "Child " .. self:_pretty_name(k) ..
448
string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " ..
449
terms.pastverb.." " .. string.format("%6d", v) )
450
outfile:write( " times. Took " ..
451
string.format("%4.2f", record.children_time[k] ) .. "s\n" )
455
outfile:write( "\n" ) -- extra separation line
458
outfile:write( "\n\n" )
459
outfile:write( "Total time spent in profiled functions: " ..
460
string.format("%5.3g",total_time) .. "s\n" )
470
-- This writes the profile to the output file object as
471
-- loadable Lua source.
473
function _profiler.lua_report(self,outfile)
474
-- Purpose: Write out the entire raw state in a cross-referenceable form.
477
for func,record in pairs(self.rawstats) do
478
table.insert(ordering, func)
479
functonum[func] = table.getn(ordering)
483
"-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
484
outfile:write( "-- Function names\nfuncnames = {}\n" )
485
for i=1,table.getn(ordering) do
486
local thisfunc = ordering[i]
487
outfile:write( "funcnames[" .. i .. "] = " ..
488
string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
490
outfile:write( "\n" )
491
outfile:write( "-- Function times\nfunctimes = {}\n" )
492
for i=1,table.getn(ordering) do
493
local thisfunc = ordering[i]
494
local record = self.rawstats[thisfunc]
495
outfile:write( "functimes[" .. i .. "] = { " )
496
outfile:write( "tot=" .. record.time .. ", " )
497
outfile:write( "achild=" .. record.anon_child_time .. ", " )
498
outfile:write( "nchild=" .. record.name_child_time .. ", " )
499
outfile:write( "count=" .. record.count .. " }\n" )
501
outfile:write( "\n" )
502
outfile:write( "-- Child links\nchildren = {}\n" )
503
for i=1,table.getn(ordering) do
504
local thisfunc = ordering[i]
505
local record = self.rawstats[thisfunc]
506
outfile:write( "children[" .. i .. "] = { " )
507
for k,v in pairs(record.children) do
508
if functonum[k] then -- non-recorded functions will be ignored now
509
outfile:write( functonum[k] .. ", " )
512
outfile:write( "}\n" )
514
outfile:write( "\n" )
515
outfile:write( "-- Child call counts\nchildcounts = {}\n" )
516
for i=1,table.getn(ordering) do
517
local thisfunc = ordering[i]
518
local record = self.rawstats[thisfunc]
519
outfile:write( "children[" .. i .. "] = { " )
520
for k,v in record.children do
521
if functonum[k] then -- non-recorded functions will be ignored now
522
outfile:write( v .. ", " )
525
outfile:write( "}\n" )
527
outfile:write( "\n" )
528
outfile:write( "-- Child call time\nchildtimes = {}\n" )
529
for i=1,table.getn(ordering) do
530
local thisfunc = ordering[i]
531
local record = self.rawstats[thisfunc];
532
outfile:write( "children[" .. i .. "] = { " )
533
for k,v in pairs(record.children) do
534
if functonum[k] then -- non-recorded functions will be ignored now
535
outfile:write( record.children_time[k] .. ", " )
538
outfile:write( "}\n" )
540
outfile:write( "\n\n-- That is all.\n\n" )
544
-- Internal function to calculate a pretty name for the profile output
545
function _profiler._pretty_name(self,func)
547
-- Only the data collected during the actual
548
-- run seems to be correct.... why?
549
local info = self.rawstats[ func ].func_info
550
-- local info = debug.getinfo( func )
553
if info.what == "Lua" then
556
if info.what == "C" then
559
if info.what == "main" then
563
if info.name == nil then
564
name = name .. "<"..tostring(func) .. ">"
566
name = name .. info.name
570
name = name .. "@" .. info.source
572
if info.what == "C" then
575
name = name .. "@<string>"
579
if info.what == "C" then
582
name = name .. info.linedefined
590
-- This allows you to specify functions which you do
591
-- not want profiled. Setting level to 1 keeps the
592
-- function from being profiled. Setting level to 2
593
-- keeps both the function and its children from
596
-- BUG: 2 will probably act exactly like 1 in "time" mode.
597
-- If anyone cares, let me (zorba) know and it can be fixed.
599
function _profiler.prevent(self, func, level)
600
self.prevented_functions[func] = (level or 1)
604
_profiler.prevented_functions = {
605
[_profiler.start] = 2,
606
[_profiler.stop] = 2,
607
[_profiler._internal_profile_by_time] = 2,
608
[_profiler._internal_profile_by_call] = 2,
609
[_profiler_hook_wrapper_by_time] = 2,
610
[_profiler_hook_wrapper_by_call] = 2,
611
[_profiler.prevent] = 2,
612
[_profiler._get_func_rec] = 2,
613
[_profiler.report] = 2,
614
[_profiler.lua_report] = 2,
615
[_profiler._pretty_name] = 2