--- linux/arch/i386/kernel/smp.c.orig Fri May 14 16:04:53 1999 +++ linux/arch/i386/kernel/smp.c Fri May 14 17:43:06 1999 @@ -100,6 +100,7 @@ static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */ static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */ +static int forced_cpus = 0; /* Setup configured number of CPUs to activate, even if no SMP config is found */ int smp_found_config=0; /* Have we found an SMP box */ unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */ @@ -149,6 +150,13 @@ */ #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * Reads and clears the Pentium Timestamp-Counter + */ +#define READ_TSC(x) __asm__ __volatile__ ( "rdtsc" \ + :"=a" (((unsigned long*)&(x))[0]), \ + "=d" (((unsigned long*)&(x))[1])) + #define CLEAR_TSC wrmsr(0x10, 0x00001000, 0x00001000) /* @@ -170,6 +178,38 @@ max_cpus = 0; } +void __init smp_force_setup(char *str, int *ints) +{ + int i; + + if (ints && ints[0] > 0) + forced_cpus = ints[1]; + else { + forced_cpus = 0; + return; + } + if (forced_cpus > NR_CPUS) { + printk("warning: can force only %d CPUs\n", NR_CPUS); + forced_cpus = NR_CPUS; + } + /* + * forced config takes precedence. + */ + smp_found_config = 1; + + cpu_present_map = (1< 0x00000000ffffffffULL) + return 0; + if (!b) + panic("huh?\n"); + + b0 = (unsigned int) b; + a1 = ((unsigned int*)&a)[0]; + a2 = ((unsigned int*)&a)[1]; + + res = a1/b0 + + (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + + a2 / b0 + + (a2 * (0xffffffff % b0)) / b0; + + return res; +} + + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + unsigned long one_usec; + int buggy = 0; + + printk("checking TSC synchronization across CPUs: "); + + one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2); + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + READ_TSC(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + CLEAR_TSC; + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + t0 = tsc_values[i]; + sum += t0; + } + avg = div64(sum, smp_num_cpus); + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*one_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = div64(delta, one_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", + i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * smp_num_cpus is not necessarily known at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); + + READ_TSC(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + CLEAR_TSC; + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); + } +} +#undef NR_LOOPS + +#endif + extern void calibrate_delay(void); void __init smp_callin(void) @@ -866,6 +1084,13 @@ * Allow the master to continue. */ set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); + +#ifdef CONFIG_X86_TSC + /* + * Synchronize the TSC with the BP + */ + synchronize_tsc_ap (); +#endif } int cpucount = 0; @@ -1390,6 +1615,13 @@ #endif smp_done: +#ifdef CONFIG_X86_TSC + /* + * Synchronize the TSC with the AP + */ + if (cpucount) + synchronize_tsc_bp(); +#endif } --- linux/arch/i386/kernel/time.c.orig Fri May 14 16:09:06 1999 +++ linux/arch/i386/kernel/time.c Fri May 14 16:09:33 1999 @@ -74,7 +74,7 @@ * Equal to 2^32 * (1 / (clocks per usec) ). * Initialized in time_init. */ -static unsigned long fast_gettimeoffset_quotient=0; +unsigned long fast_gettimeoffset_quotient=0; extern rwlock_t xtime_lock;