/zoeplat

To get this branch, use:
bzr branch /bzr/zoeplat
24 by Josh C
profiling and analysis
1
--[[
2
3
== Introduction ==
4
5
  Note that this requires os.clock(), debug.sethook(),
6
  and debug.getinfo() or your equivalent replacements to
7
  be available if this is an embedded application.
8
9
  Example usage:
10
11
    profiler = newProfiler()
12
    profiler:start()
13
14
    < call some functions that take time >
15
16
    profiler:stop()
17
18
    local outfile = io.open( "profile.txt", "w+" )
19
    profiler:report( outfile )
20
    outfile:close()
21
22
== Optionally choosing profiling method ==
23
24
The rest of this comment can be ignored if you merely want a good profiler.
25
26
  newProfiler(method, sampledelay):
27
28
If method is omitted or "time", will profile based on real performance.
29
optionally, frequency can be provided to control the number of opcodes
30
per profiling tick. By default this is 100000, which (on my system) provides
31
one tick approximately every 2ms and reduces system performance by about 10%.
32
This can be reduced to increase accuracy at the cost of performance, or
33
increased for the opposite effect.
34
35
If method is "call", will profile based on function calls. Frequency is
36
ignored.
37
38
39
"time" may bias profiling somewhat towards large areas with "simple opcodes",
40
as the profiling function (which introduces a certain amount of unavoidable
41
overhead) will be called more often. This can be minimized by using a larger
42
sample delay - the default should leave any error largely overshadowed by
43
statistical noise. With a delay of 1000 I was able to achieve inaccuray of
44
approximately 25%. Increasing the delay to 100000 left inaccuracy below my
45
testing error.
46
47
"call" may bias profiling heavily towards areas with many function calls.
48
Testing found a degenerate case giving a figure inaccurate by approximately
49
20,000%.  (Yes, a multiple of 200.) This is, however, more directly comparable
50
to common profilers (such as gprof) and also gives accurate function call
51
counts, which cannot be retrieved from "time".
52
53
I strongly recommend "time" mode, and it is now the default.
54
55
== History ==
56
57
2008-09-16 - Time-based profiling and conversion to Lua 5.1
58
  by Ben Wilhelm ( zorba-pepperfish@pavlovian.net ).
59
  Added the ability to optionally choose profiling methods, along with a new
60
  profiling method.
61
62
Converted to Lua 5, a few improvements, and 
63
additional documentation by Tom Spilman ( tom@sickheadgames.com )
64
65
Additional corrections and tidying by original author
66
Daniel Silverstone ( dsilvers@pepperfish.net )
67
68
== Status ==
69
70
Daniel Silverstone is no longer using this code, and judging by how long it's
71
been waiting for Lua 5.1 support, I don't think Tom Spilman is either. I'm
72
perfectly willing to take on maintenance, so if you have problems or
73
questions, go ahead and email me :)
74
-- Ben Wilhelm ( zorba-pepperfish@pavlovian.net ) '
75
76
== Copyright ==
77
78
Lua profiler - Copyright Pepperfish 2002,2003,2004
79
80
Permission is hereby granted, free of charge, to any person obtaining a copy
81
of this software and associated documentation files (the "Software"), to
82
deal in the Software without restriction, including without limitation the
83
rights to use, copy, modify, merge, publish, distribute, and/or sell copies
84
of the Software, and to permit persons to whom the Software is furnished to
85
do so, subject to the following conditions:
86
87
The above copyright notice and this permission notice shall be included in
88
all copies or substantial portions of the Software.
89
90
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
91
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
92
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
93
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
94
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
95
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
96
IN THE SOFTWARE.
97
98
--]]
99
100
101
--
102
-- All profiler related stuff is stored in the top level table '_profiler'
103
--
104
_profiler = {}
105
106
107
--
108
-- newProfiler() creates a new profiler object for managing 
109
-- the profiler and storing state.  Note that only one profiler 
110
-- object can be executing at one time.
111
--
112
function newProfiler(variant, sampledelay)
113
  if _profiler.running then
114
    print("Profiler already running.")
115
    return
116
  end
117
118
  variant = variant or "time"
119
120
  if variant ~= "time" and variant ~= "call" then
121
    print("Profiler method must be 'time' or 'call'.")
122
    return
123
  end
124
  
125
  local newprof = {}
126
  for k,v in pairs(_profiler) do
127
    newprof[k] = v
128
  end
129
  newprof.variant = variant
130
  newprof.sampledelay = sampledelay or 100000
131
  return newprof
132
end
133
134
135
--
136
-- This function starts the profiler.  It will do nothing
137
-- if this (or any other) profiler is already running.
138
--
139
function _profiler.start(self)
140
  if _profiler.running then
141
    return
142
  end
143
  -- Start the profiler. This begins by setting up internal profiler state
144
  _profiler.running = self
145
  self.rawstats = {}
146
  self.callstack = {}
147
  if self.variant == "time" then
148
    self.lastclock = os.clock()
149
    debug.sethook( _profiler_hook_wrapper_by_time, "", self.sampledelay )
150
  elseif self.variant == "call" then
151
    debug.sethook( _profiler_hook_wrapper_by_call, "cr" )
152
  else
153
    print("Profiler method must be 'time' or 'call'.")
154
    sys.exit(1)
155
  end
156
end
157
158
159
--
160
-- This function stops the profiler.  It will do nothing 
161
-- if a profiler is not running, and nothing if it isn't 
162
-- the currently running profiler.
163
--
164
function _profiler.stop(self)
165
  if _profiler.running ~= self then
166
    return
167
  end
168
  -- Stop the profiler.
169
  debug.sethook( nil )
170
  _profiler.running = nil
171
end
172
173
174
--
175
-- Simple wrapper to handle the hook.  You should not
176
-- be calling this directly. Duplicated to reduce overhead.
177
--
178
function _profiler_hook_wrapper_by_call(action)
179
  if _profiler.running == nil then
180
    debug.sethook( nil )
181
  end
182
  _profiler.running:_internal_profile_by_call(action)
183
end
184
function _profiler_hook_wrapper_by_time(action)
185
  if _profiler.running == nil then
186
    debug.sethook( nil )
187
  end
188
  _profiler.running:_internal_profile_by_time(action)
189
end
190
191
192
--
193
-- This is the main by-function-call function of the profiler and should not
194
-- be called except by the hook wrapper
195
--
196
function _profiler._internal_profile_by_call(self,action)
197
  -- Since we can obtain the 'function' for the item we've had call us, we
198
  -- can use that...
199
  local caller_info = debug.getinfo( 3 )
200
  if caller_info == nil then
201
    print "No caller_info"
202
    return
203
  end
204
  
205
  --SHG_LOG("[_profiler._internal_profile] "..(caller_info.name or "<nil>"))
206
207
  -- Retrieve the most recent activation record...
208
  local latest_ar = nil
209
  if table.getn(self.callstack) > 0 then
210
    latest_ar = self.callstack[table.getn(self.callstack)]
211
  end
212
  
213
  -- Are we allowed to profile this function?
214
  local should_not_profile = 0
215
  for k,v in pairs(self.prevented_functions) do
216
    if k == caller_info.func then
217
      should_not_profile = v
218
    end
219
  end
220
  -- Also check the top activation record...
221
  if latest_ar then
222
    if latest_ar.should_not_profile == 2 then
223
      should_not_profile = 2
224
    end
225
  end
226
227
  -- Now then, are we in 'call' or 'return' ?
228
  -- print("Profile:", caller_info.name, "SNP:", should_not_profile,
229
  --       "Action:", action )
230
  if action == "call" then
231
    -- Making a call...
232
    local this_ar = {}
233
    this_ar.should_not_profile = should_not_profile
234
    this_ar.parent_ar = latest_ar
235
    this_ar.anon_child = 0
236
    this_ar.name_child = 0
237
    this_ar.children = {}
238
    this_ar.children_time = {}
239
    this_ar.clock_start = os.clock()
240
    -- Last thing to do on a call is to insert this onto the ar stack...
241
    table.insert( self.callstack, this_ar )
242
  else
243
    local this_ar = latest_ar
244
    if this_ar == nil then
245
      return -- No point in doing anything if no upper activation record
246
    end
247
248
    -- Right, calculate the time in this function...
249
    this_ar.clock_end = os.clock()
250
    this_ar.this_time = this_ar.clock_end - this_ar.clock_start
251
252
    -- Now, if we have a parent, update its call info...
253
    if this_ar.parent_ar then
254
      this_ar.parent_ar.children[caller_info.func] =
255
        (this_ar.parent_ar.children[caller_info.func] or 0) + 1
256
      this_ar.parent_ar.children_time[caller_info.func] =
257
        (this_ar.parent_ar.children_time[caller_info.func] or 0 ) +
258
        this_ar.this_time
259
      if caller_info.name == nil then
260
        this_ar.parent_ar.anon_child =
261
          this_ar.parent_ar.anon_child + this_ar.this_time
262
      else
263
        this_ar.parent_ar.name_child =
264
          this_ar.parent_ar.name_child + this_ar.this_time
265
      end
266
    end
267
    -- Now if we're meant to record information about ourselves, do so...
268
    if this_ar.should_not_profile == 0 then
269
      local inforec = self:_get_func_rec(caller_info.func,1)
270
      inforec.count = inforec.count + 1
271
      inforec.time = inforec.time + this_ar.this_time
272
      inforec.anon_child_time = inforec.anon_child_time + this_ar.anon_child
273
      inforec.name_child_time = inforec.name_child_time + this_ar.name_child
274
      inforec.func_info = caller_info
275
      for k,v in pairs(this_ar.children) do
276
        inforec.children[k] = (inforec.children[k] or 0) + v
277
        inforec.children_time[k] =
278
          (inforec.children_time[k] or 0) + this_ar.children_time[k]
279
      end
280
    end
281
282
    -- Last thing to do on return is to drop the last activation record...
283
    table.remove( self.callstack, table.getn( self.callstack ) )
284
  end
285
end
286
287
288
--
289
-- This is the main by-time internal function of the profiler and should not
290
-- be called except by the hook wrapper
291
--
292
function _profiler._internal_profile_by_time(self,action)
293
  -- we do this first so we add the minimum amount of extra time to this call
294
  local timetaken = os.clock() - self.lastclock
295
  
296
  local depth = 3
297
  local at_top = true
298
  local last_caller
299
  local caller = debug.getinfo(depth)
300
  while caller do
301
    if not caller.func then caller.func = "(tail call)" end
302
    if self.prevented_functions[caller.func] == nil then
303
      local info = self:_get_func_rec(caller.func, 1, caller)
304
      info.count = info.count + 1
305
      info.time = info.time + timetaken
306
      if last_caller then
307
        -- we're not the head, so update the "children" times also
308
        if last_caller.name then
309
          info.name_child_time = info.name_child_time + timetaken
310
        else
311
          info.anon_child_time = info.anon_child_time + timetaken
312
        end
313
        info.children[last_caller.func] =
314
          (info.children[last_caller.func] or 0) + 1
315
        info.children_time[last_caller.func] =
316
          (info.children_time[last_caller.func] or 0) + timetaken
317
      end
318
    end
319
    depth = depth + 1
320
    last_caller = caller
321
    caller = debug.getinfo(depth)
322
  end
323
  
324
  self.lastclock = os.clock()
325
end
326
327
328
--
329
-- This returns a (possibly empty) function record for 
330
-- the specified function. It is for internal profiler use.
331
--
332
function _profiler._get_func_rec(self,func,force,info)
333
  -- Find the function ref for 'func' (if force and not present, create one)
334
  local ret = self.rawstats[func]
335
  if ret == nil and force ~= 1 then
336
    return nil
337
  end
338
  if ret == nil then
339
    -- Build a new function statistics table
340
    ret = {}
341
    ret.func = func
342
    ret.count = 0
343
    ret.time = 0
344
    ret.anon_child_time = 0
345
    ret.name_child_time = 0
346
    ret.children = {}
347
    ret.children_time = {}
348
    ret.func_info = info
349
    self.rawstats[func] = ret
350
  end
351
  return ret
352
end
353
354
355
--
356
-- This writes a profile report to the output file object.  If
357
-- sort_by_total_time is nil or false the output is sorted by
358
-- the function time minus the time in it's children.
359
--
360
function _profiler.report( self, outfile, sort_by_total_time )
361
362
  outfile:write
363
    [[Lua Profile output created by profiler.lua. Copyright Pepperfish 2002+
364
365
]]
366
367
  -- This is pretty awful.
368
  local terms = {}
369
  if self.variant == "time" then
370
    terms.capitalized = "Sample"
371
    terms.single = "sample"
372
    terms.pastverb = "sampled"
373
  elseif self.variant == "call" then
374
    terms.capitalized = "Call"
375
    terms.single = "call"
376
    terms.pastverb = "called"
377
  else
378
    assert(false)
379
  end
380
  
381
  local total_time = 0
382
  local ordering = {}
383
  for func,record in pairs(self.rawstats) do
384
    table.insert(ordering, func)
385
  end
386
387
  if sort_by_total_time then
388
    table.sort( ordering, 
389
      function(a,b) return self.rawstats[a].time > self.rawstats[b].time end
390
    )
391
  else
392
    table.sort( ordering, 
393
      function(a,b)
394
        local arec = self.rawstats[a] 
395
        local brec = self.rawstats[b]
396
        local atime = arec.time - (arec.anon_child_time + arec.name_child_time)
397
        local btime = brec.time - (brec.anon_child_time + brec.name_child_time)
398
        return atime > btime
399
      end
400
    )
401
  end
402
403
  for i=1,table.getn(ordering) do
404
    local func = ordering[i]
405
    local record = self.rawstats[func]
406
    local thisfuncname = " " .. self:_pretty_name(func) .. " "
407
    if string.len( thisfuncname ) < 42 then
408
      thisfuncname =
409
        string.rep( "-", (42 - string.len(thisfuncname))/2 ) .. thisfuncname
410
      thisfuncname =
411
        thisfuncname .. string.rep( "-", 42 - string.len(thisfuncname) )
412
    end
413
414
    total_time = total_time + ( record.time - ( record.anon_child_time +
415
      record.name_child_time ) )
416
    outfile:write( string.rep( "-", 19 ) .. thisfuncname ..
417
      string.rep( "-", 19 ) .. "\n" )
418
    outfile:write( terms.capitalized.." count:         " ..
419
      string.format( "%4d", record.count ) .. "\n" )
420
    outfile:write( "Time spend total:       " ..
421
      string.format( "%4.3f", record.time ) .. "s\n" )
422
    outfile:write( "Time spent in children: " ..
423
      string.format("%4.3f",record.anon_child_time+record.name_child_time) ..
424
      "s\n" )
425
    local timeinself =
426
      record.time - (record.anon_child_time + record.name_child_time)
427
    outfile:write( "Time spent in self:     " ..
428
      string.format("%4.3f", timeinself) .. "s\n" )
429
    outfile:write( "Time spent per " .. terms.single .. ":  " ..
430
      string.format("%4.5f", record.time/record.count) ..
431
      "s/" .. terms.single .. "\n" )
432
    outfile:write( "Time spent in self per "..terms.single..": " ..
433
      string.format( "%4.5f", timeinself/record.count ) .. "s/" ..
434
      terms.single.."\n" )
435
436
    -- Report on each child in the form
437
    -- Child  <funcname> called n times and took a.bs
438
    local added_blank = 0
439
    for k,v in pairs(record.children) do
440
      if self.prevented_functions[k] == nil or
441
         self.prevented_functions[k] == 0
442
      then
443
        if added_blank == 0 then
444
          outfile:write( "\n" ) -- extra separation line
445
          added_blank = 1
446
        end
447
        outfile:write( "Child " .. self:_pretty_name(k) ..
448
          string.rep( " ", 41-string.len(self:_pretty_name(k)) ) .. " " ..
449
          terms.pastverb.." " .. string.format("%6d", v) )
450
        outfile:write( " times. Took " ..
451
          string.format("%4.2f", record.children_time[k] ) .. "s\n" )
452
      end
453
    end
454
455
    outfile:write( "\n" ) -- extra separation line
456
    outfile:flush()
457
  end
458
  outfile:write( "\n\n" )
459
  outfile:write( "Total time spent in profiled functions: " ..
460
                 string.format("%5.3g",total_time) .. "s\n" )
461
  outfile:write( [[
462
463
END
464
]] )
465
  outfile:flush()
466
end
467
468
469
--
470
-- This writes the profile to the output file object as 
471
-- loadable Lua source.
472
--
473
function _profiler.lua_report(self,outfile)
474
  -- Purpose: Write out the entire raw state in a cross-referenceable form.
475
  local ordering = {}
476
  local functonum = {}
477
  for func,record in pairs(self.rawstats) do
478
    table.insert(ordering, func)
479
    functonum[func] = table.getn(ordering)
480
  end
481
482
  outfile:write(
483
    "-- Profile generated by profiler.lua Copyright Pepperfish 2002+\n\n" )
484
  outfile:write( "-- Function names\nfuncnames = {}\n" )
485
  for i=1,table.getn(ordering) do
486
    local thisfunc = ordering[i]
487
    outfile:write( "funcnames[" .. i .. "] = " ..
488
      string.format("%q", self:_pretty_name(thisfunc)) .. "\n" )
489
  end
490
  outfile:write( "\n" )
491
  outfile:write( "-- Function times\nfunctimes = {}\n" )
492
  for i=1,table.getn(ordering) do
493
    local thisfunc = ordering[i]
494
    local record = self.rawstats[thisfunc]
495
    outfile:write( "functimes[" .. i .. "] = { " )
496
    outfile:write( "tot=" .. record.time .. ", " )
497
    outfile:write( "achild=" .. record.anon_child_time .. ", " )
498
    outfile:write( "nchild=" .. record.name_child_time .. ", " )
499
    outfile:write( "count=" .. record.count .. " }\n" )
500
  end
501
  outfile:write( "\n" )
502
  outfile:write( "-- Child links\nchildren = {}\n" )
503
  for i=1,table.getn(ordering) do
504
    local thisfunc = ordering[i]
505
    local record = self.rawstats[thisfunc]
506
    outfile:write( "children[" .. i .. "] = { " )
507
    for k,v in pairs(record.children) do
508
      if functonum[k] then -- non-recorded functions will be ignored now
509
        outfile:write( functonum[k] .. ", " )
510
      end
511
    end
512
    outfile:write( "}\n" )
513
  end
514
  outfile:write( "\n" )
515
  outfile:write( "-- Child call counts\nchildcounts = {}\n" )
516
  for i=1,table.getn(ordering) do
517
    local thisfunc = ordering[i]
518
    local record = self.rawstats[thisfunc]
519
    outfile:write( "children[" .. i .. "] = { " )
520
    for k,v in record.children do
521
      if functonum[k] then -- non-recorded functions will be ignored now
522
        outfile:write( v .. ", " )
523
      end
524
    end
525
    outfile:write( "}\n" )
526
  end
527
  outfile:write( "\n" )
528
  outfile:write( "-- Child call time\nchildtimes = {}\n" )
529
  for i=1,table.getn(ordering) do
530
    local thisfunc = ordering[i]
531
    local record = self.rawstats[thisfunc];
532
    outfile:write( "children[" .. i .. "] = { " )
533
    for k,v in pairs(record.children) do
534
      if functonum[k] then -- non-recorded functions will be ignored now
535
        outfile:write( record.children_time[k] .. ", " )
536
      end
537
    end
538
    outfile:write( "}\n" )
539
  end
540
  outfile:write( "\n\n-- That is all.\n\n" )
541
  outfile:flush()
542
end
543
544
-- Internal function to calculate a pretty name for the profile output
545
function _profiler._pretty_name(self,func)
546
547
  -- Only the data collected during the actual
548
  -- run seems to be correct.... why?
549
  local info = self.rawstats[ func ].func_info
550
  -- local info = debug.getinfo( func )
551
552
  local name = ""
553
  if info.what == "Lua" then
554
    name = "L:"
555
  end
556
  if info.what == "C" then
557
    name = "C:"
558
  end
559
  if info.what == "main" then
560
    name = " :"
561
  end
562
  
563
  if info.name == nil then
564
    name = name .. "<"..tostring(func) .. ">"
565
  else
566
    name = name .. info.name
567
  end
568
569
  if info.source then
570
    name = name .. "@" .. info.source
571
  else
572
    if info.what == "C" then
573
      name = name .. "@?"
574
    else
575
      name = name .. "@<string>"
576
    end
577
  end
578
  name = name .. ":"
579
  if info.what == "C" then
580
    name = name .. "?"
581
  else
582
    name = name .. info.linedefined
583
  end
584
585
  return name
586
end
587
588
589
--
590
-- This allows you to specify functions which you do
591
-- not want profiled.  Setting level to 1 keeps the
592
-- function from being profiled.  Setting level to 2
593
-- keeps both the function and its children from
594
-- being profiled.
595
--
596
-- BUG: 2 will probably act exactly like 1 in "time" mode.
597
-- If anyone cares, let me (zorba) know and it can be fixed.
598
--
599
function _profiler.prevent(self, func, level)
600
  self.prevented_functions[func] = (level or 1)
601
end
602
603
604
_profiler.prevented_functions = {
605
  [_profiler.start] = 2,
606
  [_profiler.stop] = 2,
607
  [_profiler._internal_profile_by_time] = 2,
608
  [_profiler._internal_profile_by_call] = 2,
609
  [_profiler_hook_wrapper_by_time] = 2,
610
  [_profiler_hook_wrapper_by_call] = 2,
611
  [_profiler.prevent] = 2,
612
  [_profiler._get_func_rec] = 2,
613
  [_profiler.report] = 2,
614
  [_profiler.lua_report] = 2,
615
  [_profiler._pretty_name] = 2
616
}