Mail Archives: djgpp/1998/08/21/12:00:30
From: | DougEleveld <deleveld AT dds DOT nl>
|
Newsgroups: | comp.os.msdos.djgpp
|
Subject: | Re: DJGPPv2.02 & Profiling in a Windoze 98 Dos Box
|
Date: | Fri, 21 Aug 1998 17:52:16 +0200
|
Organization: | Rijksuniversiteit Groningen
|
Lines: | 236
|
Message-ID: | <35DD97B0.91B557B6@dds.nl>
|
References: | <e#uUvNMz9GA DOT 274 AT upnetnews03>
|
NNTP-Posting-Host: | client36-53.oprit.rug.nl
|
Mime-Version: | 1.0
|
To: | djgpp AT delorie DOT com
|
DJ-Gateway: | from newsgroup comp.os.msdos.djgpp
|
Shelby Cain wrote:
> I am having a problem getting profiling to work under DJGPPv2.02...
>
> Basically, after I have compiled/linked my program with the '-pg'
> switch, it
> appears as if only one sample is taken and the profiling code stops
> collected data (as reported by gprof). The end result is the
> implication
> that the total execution time was around 0.06 seconds on my system
> with 100%
> of it taken up by __dpmi_int.
>
> The above example is simply ex35.c from the Allegro gaming library...
> but I
> want to be able to use this for a more serious optimization project...
>
> Any ideas???
Well, I never use -pg for profiling anymore. I use some macros that
count cycles on a pentium. Much better resolution than using -pg.
here is the header file that I use. I hope it's usefull.... If you
don't know how to use it, them give me a mail
//----------------------------------------------------------------------------
//
// DETL - A template library 2.0 beta
//
// Douglas Eleveld (D DOT J DOT Eleveld AT anest DOT azg DOT nl or deleveld AT dds DOT nl)
//
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
// Pentium timer class and macros
//
// C++ class wrapper around some macros that I got from
comp.os.msdos.djgpp
//
//----------------------------------------------------------------------------
#ifndef PENTIUM_TIMER_HEADER
#define PENTIUM_TIMER_HEADER
//----------------------------------------------------------------------------
//#include "detl.h"
//----------------------------------------------------------------------------
// What I found on comp.os.msdos.djgpp through dejanews:
// Subject: DJGPP RDTSC demo (Pentium-only, ~100 lines)
// From: Tom Burgess <Tom_Burgess AT bc DOT sympatico DOT ca>
// Date: 1997/04/20
// Message-Id: <3359E27B DOT 6C9A AT bc DOT sympatico DOT ca>
// Newsgroups: comp.os.msdos.djgpp
/* rdtsc.c: DJGPP inline asm demo of Pentium cycle counter usage */
/* Reference: Agner Fog's "How to optimize for the Pentium" */
/* also thanks to Leath Muller for earlier posted RDTSC code */
// Hi, here's some code that might be useful to some for low-level
// Pentium optimization. If you get weird results, look carefully at
// what is known to be in cache when the code executes, code & data
// alignment, cache line conflicts, AGIs etc. Agner Fog warns that
// RDTSC doesn't work with virtual 86 mode but I've noted no problems
// with win95 dos shell, RHIDE or whatever. He also points out
// special Pentium Pro considerations which I have not addressed.
// Check out: http://announce.com/agner/assem/assem.html
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* RDTSC1 and RDTSC2 are macros to get Pentium RDTSC cycle count */
/* This returns 64 bits in EAX and EDX. */
/* If dest is address of a GNU long long, the 64 bit subtraction */
/* needed for interval measurement can be done directly */
/* RDTSC1 generates code for the initial timestamp read - the cld
and nops are included for repeatable pairing and to eliminate
shadowing effects from previous instructions */
#define RDTSC1(dest) \
__asm__(".byte 0x0F, 0x31\n\t"\
"movl %%eax, (%%edi)\n\t"\
"movl %%edx, 4(%%edi)\n\t"\
"cld \n\t"\
"nop \n\t nop \n\t nop \n\t"\
"nop \n\t nop \n\t nop \n\t"\
"nop \n\t nop \n\t"\
: : "D" (dest) : "eax", "edx")
// I added here the extra nops that were mentioned in a later posting
/* use RDTSC2 immediately after the code under test. The clc is a
non-pairable filler that also elimate potential shadow effects */
#define RDTSC2(dest) \
__asm__("clc \n\t"\
".byte 0x0F, 0x31\n\t"\
"movl %%eax, (%%edi)\n\t"\
"movl %%edx, 4(%%edi)\n\t"\
: : "D" (dest) : "eax", "edx")
//----------------------------------------------------------------------------
// C access to the timer and profiler
#ifndef __cplusplus
typedef struct { unsigned long long _overhead;
unsigned long long _start;
unsigned long long _end;
unsigned long long _runs;
unsigned long long _total;
} c_pentium_profiler;
#define PROFILER_RESET(x) { RDTSC1(&(x)._start); \
RDTSC2(&(x)._end); \
\
RDTSC1(&(x)._start); \
RDTSC2(&(x)._end); \
(x)._overhead = (x)._end - (x)._start; \
\
(x)._runs = 0.; \
(x)._total = 0.; \
}
#define PROFILER_START(x) { RDTSC1(&(x)._start); }
#define PROFILER_STOP(x) { RDTSC2(&(x)._end); \
(x)._runs++; \
(x)._total += ((x)._end -
(x)._start)-(x)._overhead; \
}
#define PROFILER_CYCLES(x)
(((double)((x)._total))/((double)((x)._runs)))
#define PROFILER_OVERHEAD(x) ((double)((x)._overhead))
//----------------------------------------------------------------------------
// C++ access to the timer and profiler
#else
// Pentuim timer class for cycle counts
class pentium_timer
{
private:
unsigned long long _overhead;
unsigned long long _start;
unsigned long long _end;
public:
// Basic constructor
pentium_timer (void)
{
/* Just want to get stuff into L1 cache */
RDTSC1(&_start);
RDTSC2(&_end);
/* Measure overhead */
RDTSC1(&_start);
RDTSC2(&_end);
_overhead = _end - _start;
};
// Start and stop the timer
inline void start (void) { RDTSC1(&_start); };
inline void stop (void) { RDTSC2(&_end); };
// Info functions
inline unsigned long long overhead (void) const { return
_overhead; };
inline unsigned long long cycles (void) const { return
(_end - _start)-_overhead; };
};
//----------------------------------------------------------------------------
// Pentuim timer class for cycle counts
class pentium_profiler
{
private:
// The internal timer
pentium_timer timer;
// Stats info
unsigned long long _runs;
unsigned long long _total;
public:
// Basic constructor
pentium_profiler (void)
:_runs(0),
_total(0)
{ };
// Start and stop the timer
inline void start (void)
{
timer.start();
};
inline void stop (void)
{
timer.stop();
_runs++;
_total+=timer.cycles();
};
// Info functions
inline unsigned long long cycles (void) const
{
if(_runs==0) return 0;
return (_total-timer.overhead()*_runs)/_runs;
};
inline unsigned long long runs (void) const
{
return _runs;
};
};
#endif
//----------------------------------------------------------------------------
#endif
- Raw text -