Thursday, December 3, 2009

Moving from vs6 to vs9 breaks Latin1 support

I have a database that contains latin1 data and is accessed by several applications. One application runs in the msdos console. We used to build it with vs6 (msdev98) but recently moved to vs9 (microsoft visual studio 2008). We call setlocale(LC_CTYPE,"") near startup.

Default winxp is oem cp437. That means that not all latin1 characters can be displayed in msdos. Consequently, we detect these characters and display simple-ascii escape sequences instead.

Default winxp is LC_CTYPE English_United States.1252. In vs6 that didn't matter, but in vs9, when using printf, a magic last-minute best-fit mapping occurs. This is very painful for me.

I put together the following app so that I could see what's going on. As far as I can tell, the only way to defeat vs9's best-fit mapping is to instead call setlocale(LC_CTYPE,".OCP").

// Note: In order to ditch the UNICODE define you have to do the following:
// Configuration Properties >> C/C++ >> Preprocessor >> Preprocessor Definitions
// >> [uncheck] Inherit from parent or project defaults
// If your product is 14 years old, switching to UNICODE just isn't an option.

#include <iostream>
#include <windows.h>

void info()
{
  // GetCPInfoEx vars
  BOOL res;
  CPINFOEX CPInfoEx;
  
  // Registry vars
  #define CHAR_CMD_REG_DATA_SIZE 1024
  DWORD ret;
  HKEY hKey = 0;
  BYTE data[CHAR_CMD_REG_DATA_SIZE];
  DWORD dataSize = CHAR_CMD_REG_DATA_SIZE;
  bool regIsOpen = false;
  
  // GetLocaleInfo vars
  int status;
  #define LOCALE_DATA_SIZE 1024
  char localeData[LOCALE_DATA_SIZE];
  int localeDataSize = LOCALE_DATA_SIZE;
  
  // Open the registry
  ret = RegOpenKeyEx( HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Control\\Nls\\CodePage"
                    , 0, KEY_EXECUTE, &hKey );
  if (ret) { printf("\n RegOpenKeyEx Failed"); }
  else { regIsOpen = true; }
  
  // -----------------------------------------------------
  
  // http://codesnipers.com/?q=strange-case-of-two-system-locale-ansi-charsets#comment-53272
  // http://codesnipers.com/?q=node/46
  // http://blogs.msdn.com/michkap/archive/2005/02/01/364707.aspx
  //
  // LOCALE_USER_DEFAULT : Control Panel >> Regional and Language Options 
  // >> Regional Options [tab] >> Standards and Formats
  // - is per user and does not require reboot
  //
  // LOCALE_SYSTEM_DEFAULT : Control Panel >> Regional and Language Options 
  // >> Advanced [tab] >> Language for non-Unicode Programs
  // - is system wide and requires reboot
  // - specifies the default ANSI and OEM code pages, and some of the font 
  // linking preferences
  
  // -----------------------------------------------------
  
  printf("\n GetConsoleCP() %d -- Note: effected by the chcp command.\n",GetConsoleCP());
  
  printf("\n GetOEMCP() %d",GetOEMCP());
  
  res = GetCPInfoEx( CP_OEMCP, 0, &CPInfoEx );
  if (res==0) { printf("\n GetCPInfoEx Failed"); }
  else { printf("\n GetCPInfoEx(CP_OEMCP) %d",CPInfoEx.CodePage); }
  
  status = GetLocaleInfo( LOCALE_USER_DEFAULT, LOCALE_IDEFAULTCODEPAGE, localeData, localeDataSize );
  if (status==0) { printf("\n GetLocaleInfo Failed"); }
  else { printf("\n GetLocaleInfo(LOCALE_USER_DEFAULT,OEM) %s",localeData); }
  
  status = GetLocaleInfo( LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTCODEPAGE, localeData, localeDataSize );
  if (status==0) { printf("\n GetLocaleInfo Failed"); }
  else { printf("\n GetLocaleInfo(LOCALE_SYSTEM_DEFAULT,OEM) %s",localeData); }
  
  if (regIsOpen)
  {
    dataSize = CHAR_CMD_REG_DATA_SIZE;
    ret = RegQueryValueEx( hKey, "OEMCP", NULL, NULL, data, &dataSize );
    if (ret) { printf("\n RegQueryValueEx Failed"); }
    else { printf("\n Registry...\\CodePage\\OEMCP %s\n",(char*)data); }
  }
  
  // -----------------------------------------------------
  
  printf("\n GetACP() %d",GetACP());
  
  res = GetCPInfoEx( CP_ACP, 0, &CPInfoEx );
  if (res==0) { printf("\n GetCPInfoEx Failed"); }
  else { printf("\n GetCPInfoEx(CP_ACP) %d",CPInfoEx.CodePage); }
  
  status = GetLocaleInfo( LOCALE_USER_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, localeData, localeDataSize );
  if (status==0) { printf("\n GetLocaleInfo Failed"); }
  else { printf("\n GetLocaleInfo(LOCALE_USER_DEFAULT,ANSI) %s",localeData); }
  
  status = GetLocaleInfo( LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, localeData, localeDataSize );
  if (status==0) { printf("\n GetLocaleInfo Failed"); }
  else { printf("\n GetLocaleInfo(LOCALE_SYSTEM_DEFAULT,ANSI) %s",localeData); }
  
  if (regIsOpen)
  {
    dataSize = CHAR_CMD_REG_DATA_SIZE;
    ret = RegQueryValueEx( hKey, "ACP", NULL, NULL, data, &dataSize );
    if (ret) { printf("\n RegQueryValueEx Failed"); }
    else { printf("\n Registry...\\CodePage\\ACP %s\n",(char*)data); }
  }
  
  // -----------------------------------------------------
  
  char * str;
  
  str = setlocale(LC_ALL     , NULL); if (str == NULL) { printf("\n LC_ALL      = unknown"); } else { printf("\n LC_ALL      = %s",str); }
  str = setlocale(LC_COLLATE , NULL); if (str == NULL) { printf("\n LC_COLLATE  = unknown"); } else { printf("\n LC_COLLATE  = %s",str); }
  str = setlocale(LC_CTYPE   , NULL); if (str == NULL) { printf("\n LC_CTYPE    = unknown"); } else { printf("\n LC_CTYPE    = %s",str); }
  str = setlocale(LC_MONETARY, NULL); if (str == NULL) { printf("\n LC_MONETARY = unknown"); } else { printf("\n LC_MONETARY = %s",str); }
  str = setlocale(LC_NUMERIC , NULL); if (str == NULL) { printf("\n LC_NUMERIC  = unknown"); } else { printf("\n LC_NUMERIC  = %s",str); }
  str = setlocale(LC_TIME    , NULL); if (str == NULL) { printf("\n LC_TIME     = unknown"); } else { printf("\n LC_TIME     = %s",str); }
  
  printf("\n\n"
  " 0 1 2 3 4 5 6 7 8 9 A B C D E F\n\n"
  " 2 \x20 \x21 \x22 \x23 \x24 \x25 \x26 \x27 \x28 \x29 \x2a \x2b \x2c \x2d \x2e \x2f\n"
  " 3 \x30 \x31 \x32 \x33 \x34 \x35 \x36 \x37 \x38 \x39 \x3a \x3b \x3c \x3d \x3e \x3f\n"
  " 4 \x40 \x41 \x42 \x43 \x44 \x45 \x46 \x47 \x48 \x49 \x4a \x4b \x4c \x4d \x4e \x4f\n"
  " 5 \x50 \x51 \x52 \x53 \x54 \x55 \x56 \x57 \x58 \x59 \x5a \x5b \x5c \x5d \x5e \x5f\n"
  " 6 \x60 \x61 \x62 \x63 \x64 \x65 \x66 \x67 \x68 \x69 \x6a \x6b \x6c \x6d \x6e \x6f\n"
  " 7 \x70 \x71 \x72 \x73 \x74 \x75 \x76 \x77 \x78 \x79 \x7a \x7b \x7c \x7d \x7e \x7f\n"
  " 8 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f\n"
  " 9 \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f\n"
  " a \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf\n"
  " b \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf\n"
  " c \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf\n"
  " d \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf\n"
  " e \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef\n"
  " f \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff\n"
  "\n");
}
  
int main(int argc, char** argv)
{
  info();
  setlocale(LC_CTYPE,"");
  info();
  return 0;
}
{ "loggedin": false, "owner": false, "avatar": "", "render": "nothing", "trackingID": "UA-36983794-1", "description": "Changes introduced in msdev 2008, Visual Studio 9 mean that in order to print special characters in a dos console you must setlocale \u0022.OCP\u0022.", "page": { "blogIds": [ 21 ] }, "domain": "holtstrom.com", "base": "\/michael", "url": "https:\/\/holtstrom.com\/michael\/", "frameworkFiles": "https:\/\/holtstrom.com\/michael\/_framework\/_files.4\/", "commonFiles": "https:\/\/holtstrom.com\/michael\/_common\/_files.3\/", "mediaFiles": "https:\/\/holtstrom.com\/michael\/media\/_files.3\/", "tmdbUrl": "http:\/\/www.themoviedb.org\/", "tmdbPoster": "http:\/\/image.tmdb.org\/t\/p\/w342" }