Subject: New EST undervolting patch
To: None <current-users@netbsd.org>
From: Juraj Hercek <nbsd@hck.sk>
List: tech-kern
Date: 04/14/2007 19:25:34
This is a multi-part message in MIME format.
--------------010304040002010301090802
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Hello people,

There was some interest in having undervolting feature in current 
kernel. I did a patch some time ago which was merely a rewrite of est 
(http://mail-index.netbsd.org/tech-kern/2006/06/10/0006.html). It wasn't 
a way to go because of many changes needed to adjust it when something 
changed in original est. New patch is completely different in a such way 
that it tries to make *minimal* changes to the original est.

Because of the minimal changes "requirement", there are some things 
which are *ugly* (line numbers correspond to attached diff file):

a) some parts were copied from est, namely fqlist + some defines (lines 
89 - 112) + code which sets the frequency (reset_operating_point() 
function, lines 177 - 195). This could be solved by having an est.h 
header file and an implementation of setting operating point in est.c 
accessible from outside.

b) the __UNCONST dance... This is needed because of const table member 
in fqlist and the const of the fqlist pointer itself 
(est_uv_setup_fqlist(), lines 422-459 and assign_undervolted_value(), 
lines 198-208). This could be solved by un-const-ing relevant members.

If you think attached undervolting patch would be worth including in est 
driver in current kernel, I can adjust the est and undervolting part so 
it's not so ugly.

After patching your sources, you have to add EST_UNDERVOLTING option 
into your kernel config file in order to enable undervolting feature in 
est. This is what I use in my config:
...
# Enhanced SpeedStep Technology in the Pentium M
options         ENHANCED_SPEEDSTEP
options         EST_FREQ_USERWRITE      # any user can set frequency
options         EST_UNDERVOLTING
...

And just for completness, this is relevant part from my /etc/sysctl.conf 
(note: you probably want to use different values unless you have Dothan 
755 VID#A CPU):
...
# Set the proper voltage ramp
machdep.est.voltage.available=1164 1100 1036 972 908 844 780 700
...

I have tested it with estd during couple of last netbsd release builds. 
It helped me to prolong battery life for about 30 - 45 minutes (might be 
different with other Pentium M CPU types).

-- Juraj

PS: Any comments are welcome.


--------------010304040002010301090802
Content-Type: text/x-patch;
 name="est_uv_patch.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="est_uv_patch.diff"

Index: sys/arch/i386/conf/files.i386
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/conf/files.i386,v
retrieving revision 1.304
diff -u -r1.304 files.i386
--- sys/arch/i386/conf/files.i386	10 Apr 2007 02:40:16 -0000	1.304
+++ sys/arch/i386/conf/files.i386	14 Apr 2007 16:10:14 -0000
@@ -537,7 +537,8 @@
 
 # Enhanced SpeedStep
 file	arch/i386/i386/est.c		enhanced_speedstep
-defflag	opt_est.h	EST_FREQ_USERWRITE
+file	arch/i386/i386/est_uv.c		enhanced_speedstep & est_undervolting
+defflag	opt_est.h	EST_FREQ_USERWRITE EST_UNDERVOLTING
 
 # AMD PowerNow K7
 file   arch/i386/i386/powernow_k7.c	powernow_k7
Index: sys/arch/i386/i386/est.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/est.c,v
retrieving revision 1.37
diff -u -r1.37 est.c
--- sys/arch/i386/i386/est.c	25 Mar 2007 17:12:32 -0000	1.37
+++ sys/arch/i386/i386/est.c	14 Apr 2007 16:10:14 -0000
@@ -106,6 +106,10 @@
 #define	EST_TARGET_CTLFLAG	CTLFLAG_READWRITE
 #endif
 
+#ifdef EST_UNDERVOLTING
+#include "est_uv.h"
+#endif
+
 /* Convert MHz and mV into IDs for passing to the MSR. */
 #define ID16(MHz, mV, bus_clk) \
 	((((MHz * 100 + 50) / bus_clk) << 8) | ((mV ? mV - 700 : 0) >> 4))
@@ -1007,6 +1011,10 @@
 		est_fqlist = &fake_fqlist;
 	}
 
+#ifdef EST_UNDERVOLTING
+	est_fqlist = est_uv_setup_fqlist(est_fqlist, cpuname);
+#endif
+
 	/*
 	 * OK, tell the user the available frequencies.
 	 */
@@ -1057,6 +1065,12 @@
 	    NULL, 0, freq_names, freq_len, CTL_CREATE, CTL_EOL)) != 0)
 		goto err;
 
+#ifdef EST_UNDERVOLTING
+	/* Setup undervolting entries */
+	if (est_uv_setup_sysctl_entries(estnode, cpuname) != 0)
+		goto err;
+#endif
+
 	return;
 
  err:
Index: sys/arch/i386/i386/est_uv.c
===================================================================
RCS file: sys/arch/i386/i386/est_uv.c
diff -N sys/arch/i386/i386/est_uv.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/arch/i386/i386/est_uv.c	14 Apr 2007 16:10:14 -0000
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2007 Juraj Hercek.
+ * All rights reserved.
+ *
+ * Proper BSD license should go here...
+ * 
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+
+/* aprint_normal, memcpy */
+#include <sys/systm.h>
+/* malloc */
+#include <sys/malloc.h>
+/* sysctl stuff */
+#include <sys/sysctl.h>
+/* msr operations */
+#include <x86/cpu_msr.h>
+/* est undervolting interface */
+#include "est_uv.h"
+
+/*******************************************************************************
+ * Should this be in est.h ?
+ * This part contains data copied from est.c file
+ ******************************************************************************/
+extern int bus_clock;
+
+struct fqlist {
+	int vendor;
+	unsigned bus_clk;
+	unsigned n;
+	const uint16_t *table;
+};
+
+#define MSR2FREQINC(msr)	(((int) (msr) >> 8) & 0xff)
+#define MSR2VOLTINC(msr)	((int) (msr) & 0xff)
+
+#define MSR2MHZ(msr, bus)	((MSR2FREQINC((msr)) * (bus) + 50) / 100)
+#define MSR2MV(msr)		(MSR2VOLTINC(msr) * 16 + 700)
+
+/* Convert MHz and mV into IDs for passing to the MSR. */
+#define ID16(MHz, mV, bus_clk) \
+	((((MHz * 100 + 50) / bus_clk) << 8) | ((mV ? mV - 700 : 0) >> 4))
+
+/*******************************************************************************
+ * Private data
+ ******************************************************************************/
+#define EST_UNDERVOLT_CTLFLAG           CTLFLAG_READWRITE
+#define EST_VOLTAGE_AVAILABLE_CTLFLAG   CTLFLAG_READWRITE
+#define EST_VOLTAGE_CURRENT_CTLFLAG     CTLFLAG_READWRITE
+
+/* description of the drivers */
+static const char	est_uv_desc[] = "Enhanced SpeedStep UnderVolting";
+/* not NULL if functional */
+static const struct fqlist	*est_fqlist_original;
+/* not NULL if functional */
+static const struct fqlist	*est_fqlist_undervolt;
+
+/* undervolt enabled/disabled node */
+static int  est_node_volt_undervolt;
+/* undervolt available voltages node */
+static int  est_node_volt_available; 
+/* current undervolt voltage */
+static int  est_node_volt_current; 
+
+/* flag specifying the possibility of changing undervolt values */
+static int  undervolt_enabled = 0;	/* disabled by default */
+
+/*******************************************************************************
+ * Helper functions for sysctl helpers :-)
+ ******************************************************************************/
+static int
+get_integer_value(const int sysctl_number, int *integer)
+{
+	if (sysctl_number == est_node_volt_undervolt) {
+		*integer = undervolt_enabled;
+		return 1;
+	} else if (sysctl_number == est_node_volt_current) {
+		*integer = MSR2MV(rdmsr(MSR_PERF_CTL)); /* target voltage */
+		return 1;
+	} else
+		return 0;
+}
+
+/* uses frequency to find out current operating point */
+static int
+find_operating_point_index(const int fq)
+{
+	int	i;
+
+	for (i = est_fqlist_undervolt->n - 1; i > 0; i--)
+		if (MSR2MHZ(est_fqlist_undervolt->table[i], bus_clock) >= fq)
+			break;
+	return i;
+}
+
+static int
+get_target_frequency(void)
+{
+	return MSR2MHZ(rdmsr(MSR_PERF_CTL), bus_clock);
+}
+
+static int
+get_current_operating_point_index(void)
+{
+	return find_operating_point_index(get_target_frequency());
+}
+
+static void
+reset_operating_point(void)
+{
+	int				i;
+	struct msr_cpu_broadcast	mcb;
+
+	i = get_current_operating_point_index();
+	mcb.msr_read = true;
+	mcb.msr_type = MSR_PERF_CTL;
+	mcb.msr_mask = 0xffffULL;
+	mcb.msr_value = est_fqlist_undervolt->table[i];
+	msr_cpu_broadcast(&mcb);
+
+/* Old code applicable before Juan's changes to est.c in rev. 1.35
+	msr = rdmsr(MSR_PERF_CTL);
+	msr &= ~0xffffULL;
+	msr |= est_fqlist_undervolt->table[i];
+	wrmsr(MSR_PERF_CTL, msr);
+*/
+}
+
+static void
+assign_undervolted_value(int index, int uv_value)
+{
+	const struct fqlist	*list = est_fqlist_undervolt; /*just an alias*/
+	int		frequency = MSR2MHZ(list->table[index], bus_clock);
+	uint16_t	*id = __UNCONST(&(list->table[index]));
+
+	/*
+	aprint_debug("DBG: Frequency: %d - Value: %d\n", frequency, uv_value);
+	*/
+	*id = ID16(frequency , uv_value, bus_clock);
+}
+
+static int
+set_integer_value(const int sysctl_number, const int integer)
+{
+	if (sysctl_number == est_node_volt_undervolt) {
+		/* accept only 0 (disabled) or 1 (enabled) values */
+		if (integer < 0 || integer > 1) {
+			return EINVAL;
+		} else {
+			/* copy data from user into our local storage */
+			undervolt_enabled = integer;
+		}
+	} else if (sysctl_number == est_node_volt_current) {
+		/* make sure the voltage is not under 700 and not above
+		 * the limit specified by default voltage value */
+		int	max_voltage, index;
+
+		if (!undervolt_enabled)
+			return EPERM;
+
+		index = get_current_operating_point_index();
+		max_voltage = MSR2MV(est_fqlist_original->table[index]);
+		if (integer < 700 || integer > max_voltage)
+			return EINVAL;
+		else {
+			assign_undervolted_value(index, integer);
+		}
+	}
+	reset_operating_point();
+	return 0;
+}
+
+static int set_voltages(const int *buffer)
+{
+	int	i;
+
+	/* first, verify numbers */
+	for (i = 0; i < est_fqlist_undervolt->n; ++i)
+		if (buffer[i] > MSR2MV(est_fqlist_original->table[i]) ||
+				buffer[i] < 700)
+			return EINVAL;
+
+	/* second, set undervolted values */
+	for (i = 0; i < est_fqlist_undervolt->n; ++i)
+		assign_undervolted_value(i, buffer[i]);
+
+	/* also re-set current frequency so the voltage
+	 * comes into effect immediately */
+	reset_operating_point();
+	return 0;
+}
+
+static void
+build_voltage_string(char **string, int *length)
+{
+	char*		data;
+	int		i, tmp;
+	const int	size = est_fqlist_undervolt->n * (sizeof("9999 ")-1) + 1;
+
+	MALLOC(data, char*, size, M_SYSCTLDATA, M_ZERO);
+	data[0] = '\0';
+	tmp = 0;
+	for (i = 0; i < est_fqlist_undervolt->n; i++) {
+		tmp += snprintf(data + tmp, size - tmp, "%d%s",
+				MSR2MV(est_fqlist_undervolt->table[i]),
+				i < est_fqlist_undervolt->n - 1 ? " " : "");
+	}
+
+	*string = data;
+	*length = size;
+}
+
+static int
+convert_string_to_number(const char *string, const size_t max_size, int *number)
+{
+	size_t		size;
+	const int	base = 10;
+	int		unit, i;
+	char	c;
+
+	/* get size of the string */
+	for (size = 0, c = string[0];
+			c >= '0' && c <= '9' && size < max_size;
+			c = string[++size]);
+	/* perform conversion */
+	for (i = size - 1, unit = 1; i >= 0; --i, unit *= base)
+		*number += (string[i] - '0') * unit;
+
+	return size;
+}
+
+static int
+parse_data_string(const char* string, const size_t length)
+{
+	const size_t	max_size = sizeof("9999") - 1;
+	int		i, size, converted, integer, result, *buffer;
+	const char	*from;
+
+	MALLOC(buffer, int*, est_fqlist_undervolt->n * sizeof(int),
+			M_SYSCTLDATA, M_ZERO);
+	
+	for(i = 0, size = 0, from = string;
+			i < est_fqlist_undervolt->n && size <= length;
+			++i, from += converted + 1, size += converted) {
+		integer = 0;
+		converted = convert_string_to_number(from, max_size, &integer);
+		if(converted <= 0)
+			break; /* exit from for when there's no work to do */
+		else
+			buffer[i] = integer;
+	}
+
+	result = set_voltages(buffer);
+
+	free(buffer, M_SYSCTLDATA);
+	
+	return result;
+}
+
+static int
+get_string_value(const int sysctl_number, char **string, int *length)
+{
+	if (sysctl_number == est_node_volt_available) {
+		build_voltage_string(string, length);
+		return 0;
+	}
+	return 1;
+}
+
+static int
+set_string_value(const int sysctl_number, const char *string, const int length)
+{
+	if (sysctl_number == est_node_volt_available) {
+		if (undervolt_enabled)
+			return parse_data_string(string, length);
+		else
+			return EPERM;
+	}
+	return 0;
+}
+
+/*******************************************************************************
+ * Sysctl helper functions
+ ******************************************************************************/
+static int
+est_sysctl_integer_helper(SYSCTLFN_ARGS)
+{
+	struct sysctlnode	node;
+	int			integer, error, original;
+
+	if (est_fqlist_undervolt == NULL)
+		return EOPNOTSUPP;
+
+	if (!get_integer_value(rnode->sysctl_num, &integer))
+		return EOPNOTSUPP;
+
+	/* keep original copy */
+	original = integer;
+
+	/* setup parameters for sysctl_lookup */
+	node = *rnode;
+	node.sysctl_data = &integer;
+
+	/* now copy in / out data depending whether we were called in read
+	 * or write intention (sysctl / sysctl -w) */
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+
+	if (error || newp == NULL)
+		return error;
+
+	if (original != integer)
+		return set_integer_value(rnode->sysctl_num, integer);
+	else
+		return 0;	/* nothing to be set, return "ok" */
+}
+
+static int
+est_sysctl_string_helper(SYSCTLFN_ARGS)
+{
+	struct sysctlnode	node;
+	char			*string;
+	int			error, length, result;
+
+
+	if (est_fqlist_undervolt == NULL)
+		return EOPNOTSUPP;
+
+	if (get_string_value(rnode->sysctl_num, &string, &length))
+		return EOPNOTSUPP;
+
+	node = *rnode;
+	node.sysctl_data = string;
+	node.sysctl_size = length; /* includes also trailing 0 */
+
+	/* now copy in / out data depending whether we were called in read
+	 * or write intention (sysctl / sysctl -w) */
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+
+	if (error || newp == NULL) {
+		free(string, M_SYSCTLDATA);
+		return error;
+	}
+
+	result = set_string_value(rnode->sysctl_num, string, newlen);
+
+	free(string, M_SYSCTLDATA);
+	return result;
+}
+
+/*******************************************************************************
+ * Interface implementation
+ ******************************************************************************/
+extern const struct fqlist*
+est_uv_setup_fqlist(const struct fqlist *original, const char* cpuname)
+{
+	const struct fqlist *result = NULL;
+
+	est_fqlist_original = original;
+
+	/* verify input parameters */
+	if (est_fqlist_original == NULL) {
+		/* this should actually never happend thanks to fake list */
+		aprint_normal("%s: Undervolting disabled - no fqlist available",
+				cpuname);
+		est_fqlist_undervolt = NULL;
+		result = NULL;
+	} else {
+		uint16_t **table_ptr = 0;
+		/* allocate new undervolting data */
+		int len = sizeof(struct fqlist) +
+			sizeof(uint16_t) * est_fqlist_original->n;
+		/* est_fqlist_undervolt =
+			(fqlist*) malloc(len, M_SYSCTLDATA, M_ZERO); */
+		MALLOC(est_fqlist_undervolt, struct fqlist*, len,
+				M_SYSCTLDATA, M_ZERO);
+		/* copy fqlist */
+		memcpy(__UNCONST(est_fqlist_undervolt),
+				est_fqlist_original,
+				sizeof(struct fqlist) - sizeof(uint16_t*));
+		table_ptr = __UNCONST(&est_fqlist_undervolt->table);
+		*table_ptr = (uint16_t*)((unsigned char*)
+				__UNCONST(est_fqlist_undervolt) +
+				sizeof(struct fqlist));
+		memcpy(__UNCONST(est_fqlist_undervolt->table),
+				est_fqlist_original->table,
+				sizeof(uint16_t) * est_fqlist_original->n);
+		/* return pointer to an undervolting list */
+		result = est_fqlist_undervolt;
+	}
+	return result;
+}
+
+extern int
+est_uv_setup_sysctl_entries(const struct sysctlnode *estnode,
+		const char* cpuname)
+{
+	/* result = 0 -> ok, result != 0 -> problem */
+	int	result = 0;
+	/* sysctl_createv return code */
+	int	rc = 0;
+	/* generic node to get node data from sysctl_createv */
+	const struct	sysctlnode	*node;
+	/* voltage node to keep node data from sysctl_createv */
+	const struct	sysctlnode *voltnode;
+	/* voltage string data */
+	char	*voltage_names;
+	int	voltage_length;
+
+	/* machdep.est.voltage */
+	if (0 != (rc = sysctl_createv(NULL, 0, &estnode, &voltnode,
+	    0, CTLTYPE_NODE, "voltage", "Voltage related nodes",
+	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)))
+		return 1;
+
+	/* machdep.est.voltage.undervolt */
+	if (0 != (rc = sysctl_createv(NULL, 0, &voltnode, &node,
+	    EST_UNDERVOLT_CTLFLAG, CTLTYPE_INT, "undervolt",
+	    "Enable/Disable undervolting feature [0|1]",
+	    est_sysctl_integer_helper, 0, NULL, 0, CTL_CREATE, CTL_EOL)))
+		return 2;
+	est_node_volt_undervolt = node->sysctl_num;
+
+	/* machdep.est.voltage.current */
+	if (0 != (rc = sysctl_createv(NULL, 0, &voltnode, &node,
+	    EST_VOLTAGE_CURRENT_CTLFLAG, CTLTYPE_INT, "current",
+	    "Current opearating point voltage in mV",
+	    est_sysctl_integer_helper, 0, NULL, 0, CTL_CREATE, CTL_EOL)))
+		return 3;
+	est_node_volt_current = node->sysctl_num;
+
+	/* machdep.est.voltage.available */
+	build_voltage_string(&voltage_names, &voltage_length);
+	if (0 != (rc = sysctl_createv(NULL, 0, &voltnode, &node,
+	    EST_VOLTAGE_AVAILABLE_CTLFLAG, CTLTYPE_STRING, "available",
+	    "Available voltages for given operating points in mV",
+	    est_sysctl_string_helper, 0, voltage_names, voltage_length,
+	    CTL_CREATE, CTL_EOL))) {
+		free(voltage_names, M_SYSCTLDATA);
+		return 4;
+	}
+	est_node_volt_available = node->sysctl_num;
+
+	aprint_normal("%s: %s default voltages (mV): %s\n",
+			cpuname, est_uv_desc, voltage_names);
+
+	return result;
+}
+
Index: sys/arch/i386/i386/est_uv.h
===================================================================
RCS file: sys/arch/i386/i386/est_uv.h
diff -N sys/arch/i386/i386/est_uv.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ sys/arch/i386/i386/est_uv.h	14 Apr 2007 16:10:14 -0000
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2007 Juraj Hercek.
+ * All rights reserved.
+ *
+ * Proper BSD license should go here...
+ * 
+ */
+
+/* Function sets up undervolted fqlist from the original one.
+ * original holds the original fqlist list which is going to be a blueprint for
+ * undervolted fqlist. It returns pointer to fqlist used for undervolting.
+ */
+extern const struct fqlist*	est_uv_setup_fqlist(const struct fqlist *original, const char* cpuname);
+/* Function sets up sysctl entries for undervolting feature
+ * estnode represents the node for est driver
+ */
+extern int		est_uv_setup_sysctl_entries(const struct sysctlnode *estnode, const char* cpuname);
+

--------------010304040002010301090802--