mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 07:48:51 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			468 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			468 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \section{\module{locale} ---
 | |
|          Internationalization services}
 | |
| 
 | |
| \declaremodule{standard}{locale}
 | |
| \modulesynopsis{Internationalization services.}
 | |
| \moduleauthor{Martin von L\"owis}{loewis@informatik.hu-berlin.de}
 | |
| \sectionauthor{Martin von L\"owis}{loewis@informatik.hu-berlin.de}
 | |
| 
 | |
| 
 | |
| The \module{locale} module opens access to the \POSIX{} locale
 | |
| database and functionality. The \POSIX{} locale mechanism allows
 | |
| programmers to deal with certain cultural issues in an application,
 | |
| without requiring the programmer to know all the specifics of each
 | |
| country where the software is executed.
 | |
| 
 | |
| The \module{locale} module is implemented on top of the
 | |
| \module{_locale}\refbimodindex{_locale} module, which in turn uses an
 | |
| ANSI C locale implementation if available.
 | |
| 
 | |
| The \module{locale} module defines the following exception and
 | |
| functions:
 | |
| 
 | |
| 
 | |
| \begin{excdesc}{Error}
 | |
|   Exception raised when \function{setlocale()} fails.
 | |
| \end{excdesc}
 | |
| 
 | |
| \begin{funcdesc}{setlocale}{category\optional{, locale}}
 | |
|   If \var{locale} is specified, it may be a string, a tuple of the
 | |
|   form \code{(\var{language code}, \var{encoding})}, or \code{None}.
 | |
|   If it is a tuple, it is converted to a string using the locale
 | |
|   aliasing engine.  If \var{locale} is given and not \code{None},
 | |
|   \function{setlocale()} modifies the locale setting for the
 | |
|   \var{category}.  The available categories are listed in the data
 | |
|   description below.  The value is the name of a locale.  An empty
 | |
|   string specifies the user's default settings. If the modification of
 | |
|   the locale fails, the exception \exception{Error} is raised.  If
 | |
|   successful, the new locale setting is returned.
 | |
| 
 | |
|   If \var{locale} is omitted or \code{None}, the current setting for
 | |
|   \var{category} is returned.
 | |
| 
 | |
|   \function{setlocale()} is not thread safe on most systems.
 | |
|   Applications typically start with a call of
 | |
| 
 | |
| \begin{verbatim}
 | |
| import locale
 | |
| locale.setlocale(locale.LC_ALL, '')
 | |
| \end{verbatim}
 | |
| 
 | |
|   This sets the locale for all categories to the user's default
 | |
|   setting (typically specified in the \envvar{LANG} environment
 | |
|   variable).  If the locale is not changed thereafter, using
 | |
|   multithreading should not cause problems.
 | |
| 
 | |
|   \versionchanged[Added support for tuple values of the \var{locale}
 | |
|                   parameter]{2.0}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{localeconv}{}
 | |
|   Returns the database of of the local conventions as a dictionary.
 | |
|   This dictionary has the following strings as keys:
 | |
| 
 | |
|   \begin{tableiii}{l|l|p{3in}}{constant}{Key}{Category}{Meaning}
 | |
|     \lineiii{LC_NUMERIC}{\code{'decimal_point'}}
 | |
|             {Decimal point character.}
 | |
|     \lineiii{}{\code{'grouping'}}
 | |
|             {Sequence of numbers specifying which relative positions
 | |
|              the \code{'thousands_sep'} is expected.  If the sequence is
 | |
|              terminated with \constant{CHAR_MAX}, no further grouping
 | |
|              is performed. If the sequence terminates with a \code{0}, 
 | |
|              the last group size is repeatedly used.}
 | |
|     \lineiii{}{\code{'thousands_sep'}}
 | |
|             {Character used between groups.}\hline
 | |
|     \lineiii{LC_MONETARY}{\code{'int_curr_symbol'}}
 | |
|             {International currency symbol.}
 | |
|     \lineiii{}{\code{'currency_symbol'}}
 | |
|             {Local currency symbol.}
 | |
|     \lineiii{}{\code{'mon_decimal_point'}}
 | |
|             {Decimal point used for monetary values.}
 | |
|     \lineiii{}{\code{'mon_thousands_sep'}}
 | |
|             {Group separator used for monetary values.}
 | |
|     \lineiii{}{\code{'mon_grouping'}}
 | |
|             {Equivalent to \code{'grouping'}, used for monetary
 | |
|              values.}
 | |
|     \lineiii{}{\code{'positive_sign'}}
 | |
|             {Symbol used to annotate a positive monetary value.}
 | |
|     \lineiii{}{\code{'negative_sign'}}
 | |
|             {Symbol used to annotate a nnegative monetary value.}
 | |
|     \lineiii{}{\code{'frac_digits'}}
 | |
|             {Number of fractional digits used in local formatting
 | |
|              of monetary values.}
 | |
|     \lineiii{}{\code{'int_frac_digits'}}
 | |
|             {Number of fractional digits used in international
 | |
|              formatting of monetary values.}
 | |
|   \end{tableiii}
 | |
| 
 | |
|   The possible values for \code{'p_sign_posn'} and
 | |
|   \code{'n_sign_posn'} are given below.
 | |
| 
 | |
|   \begin{tableii}{c|l}{code}{Value}{Explanation}
 | |
|     \lineii{0}{Currency and value are surrounded by parentheses.}
 | |
|     \lineii{1}{The sign should precede the value and currency symbol.}
 | |
|     \lineii{2}{The sign should follow the value and currency symbol.}
 | |
|     \lineii{3}{The sign should immediately precede the value.}
 | |
|     \lineii{4}{The sign should immediately follow the value.}
 | |
|     \lineii{\constant{LC_MAX}}{Nothing is specified in this locale.}
 | |
|   \end{tableii}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{nl_langinfo}{option}
 | |
| 
 | |
| Return some locale-specific information as a string. This function is
 | |
| not available on all systems, and the set of possible options might
 | |
| also vary across platforms. The possible argument values are numbers,
 | |
| for which symbolic constants are available in the locale module.
 | |
| 
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{getdefaultlocale}{\optional{envvars}}
 | |
|   Tries to determine the default locale settings and returns
 | |
|   them as a tuple of the form \code{(\var{language code},
 | |
|   \var{encoding})}.
 | |
| 
 | |
|   According to \POSIX, a program which has not called
 | |
|   \code{setlocale(LC_ALL, '')} runs using the portable \code{'C'}
 | |
|   locale.  Calling \code{setlocale(LC_ALL, '')} lets it use the
 | |
|   default locale as defined by the \envvar{LANG} variable.  Since we
 | |
|   do not want to interfere with the current locale setting we thus
 | |
|   emulate the behavior in the way described above.
 | |
| 
 | |
|   To maintain compatibility with other platforms, not only the
 | |
|   \envvar{LANG} variable is tested, but a list of variables given as
 | |
|   envvars parameter.  The first found to be defined will be
 | |
|   used.  \var{envvars} defaults to the search path used in GNU gettext;
 | |
|   it must always contain the variable name \samp{LANG}.  The GNU
 | |
|   gettext search path contains \code{'LANGUAGE'}, \code{'LC_ALL'},
 | |
|   \code{'LC_CTYPE'}, and \code{'LANG'}, in that order.
 | |
| 
 | |
|   Except for the code \code{'C'}, the language code corresponds to
 | |
|   \rfc{1766}.  \var{language code} and \var{encoding} may be
 | |
|   \code{None} if their values cannot be determined.
 | |
|   \versionadded{2.0}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{getlocale}{\optional{category}}
 | |
|   Returns the current setting for the given locale category as
 | |
|   tuple (language code, encoding).  \var{category} may be one of the
 | |
|   \constant{LC_*} values except \constant{LC_ALL}.  It defaults to
 | |
|   \constant{LC_CTYPE}.
 | |
| 
 | |
|   Except for the code \code{'C'}, the language code corresponds to
 | |
|   \rfc{1766}.  \var{language code} and \var{encoding} may be
 | |
|   \code{None} if their values cannot be determined.
 | |
|   \versionadded{2.0}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{normalize}{localename}
 | |
|   Returns a normalized locale code for the given locale name.  The
 | |
|   returned locale code is formatted for use with
 | |
|   \function{setlocale()}.  If normalization fails, the original name
 | |
|   is returned unchanged.
 | |
| 
 | |
|   If the given encoding is not known, the function defaults to
 | |
|   the default encoding for the locale code just like
 | |
|   \function{setlocale()}.
 | |
|   \versionadded{2.0}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{resetlocale}{\optional{category}}
 | |
|   Sets the locale for \var{category} to the default setting.
 | |
| 
 | |
|   The default setting is determined by calling
 | |
|   \function{getdefaultlocale()}.  \var{category} defaults to
 | |
|   \constant{LC_ALL}.
 | |
|   \versionadded{2.0}
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{strcoll}{string1, string2}
 | |
|   Compares two strings according to the current
 | |
|   \constant{LC_COLLATE} setting. As any other compare function,
 | |
|   returns a negative, or a positive value, or \code{0}, depending on
 | |
|   whether \var{string1} collates before or after \var{string2} or is
 | |
|   equal to it.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{strxfrm}{string}
 | |
|   Transforms a string to one that can be used for the built-in
 | |
|   function \function{cmp()}\bifuncindex{cmp}, and still returns
 | |
|   locale-aware results.  This function can be used when the same
 | |
|   string is compared repeatedly, e.g. when collating a sequence of
 | |
|   strings.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{format}{format, val\optional{, grouping}}
 | |
|   Formats a number \var{val} according to the current
 | |
|   \constant{LC_NUMERIC} setting.  The format follows the conventions
 | |
|   of the \code{\%} operator.  For floating point values, the decimal
 | |
|   point is modified if appropriate.  If \var{grouping} is true, also
 | |
|   takes the grouping into account.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{str}{float}
 | |
|   Formats a floating point number using the same format as the
 | |
|   built-in function \code{str(\var{float})}, but takes the decimal
 | |
|   point into account.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{atof}{string}
 | |
|   Converts a string to a floating point number, following the
 | |
|   \constant{LC_NUMERIC} settings.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{atoi}{string}
 | |
|   Converts a string to an integer, following the
 | |
|   \constant{LC_NUMERIC} conventions.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{datadesc}{LC_CTYPE}
 | |
| \refstmodindex{string}
 | |
|   Locale category for the character type functions.  Depending on the
 | |
|   settings of this category, the functions of module
 | |
|   \refmodule{string} dealing with case change their behaviour.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_COLLATE}
 | |
|   Locale category for sorting strings.  The functions
 | |
|   \function{strcoll()} and \function{strxfrm()} of the
 | |
|   \module{locale} module are affected.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_TIME}
 | |
|   Locale category for the formatting of time.  The function
 | |
|   \function{time.strftime()} follows these conventions.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_MONETARY}
 | |
|   Locale category for formatting of monetary values.  The available
 | |
|   options are available from the \function{localeconv()} function.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_MESSAGES}
 | |
|   Locale category for message display. Python currently does not
 | |
|   support application specific locale-aware messages.  Messages
 | |
|   displayed by the operating system, like those returned by
 | |
|   \function{os.strerror()} might be affected by this category.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_NUMERIC}
 | |
|   Locale category for formatting numbers.  The functions
 | |
|   \function{format()}, \function{atoi()}, \function{atof()} and
 | |
|   \function{str()} of the \module{locale} module are affected by that
 | |
|   category.  All other numeric formatting operations are not
 | |
|   affected.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{LC_ALL}
 | |
|   Combination of all locale settings.  If this flag is used when the
 | |
|   locale is changed, setting the locale for all categories is
 | |
|   attempted. If that fails for any category, no category is changed at
 | |
|   all.  When the locale is retrieved using this flag, a string
 | |
|   indicating the setting for all categories is returned. This string
 | |
|   can be later used to restore the settings.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{CHAR_MAX}
 | |
|   This is a symbolic constant used for different values returned by
 | |
|   \function{localeconv()}.
 | |
| \end{datadesc}
 | |
| 
 | |
| The \function{nl_langinfo} function accepts one of the following keys.
 | |
| Most descriptions are taken from the corresponding description in the
 | |
| GNU C library.
 | |
| 
 | |
| \begin{datadesc}{CODESET}
 | |
| Return a string with the name of the character encoding used in the
 | |
| selected locale.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{D_T_FMT}
 | |
| Return a string that can be used as a format string for strftime(3) to
 | |
| represent time and date in a locale-specific way.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{D_FMT}
 | |
| Return a string that can be used as a format string for strftime(3) to
 | |
| represent a date in a locale-specific way.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{T_FMT}
 | |
| Return a string that can be used as a format string for strftime(3) to
 | |
| represent a time in a locale-specific way.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{T_FMT_AMPM}
 | |
| The return value can be used as a format string for `strftime' to
 | |
| represent time in the am/pm format.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{DAY_1 ... DAY_7}
 | |
| Return name of the n-th day of the week. \[Warning: this follows the US
 | |
| convention DAY_1 = Sunday, not the international convention (ISO 8601)
 | |
| that Monday is the first day of the week.\]
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ABDAY_1 ... ABDAY_7}
 | |
| Return abbreviated name of the n-th day of the week.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{MON_1 ... MON_12}
 | |
| Return name of the n-th month.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ABMON_1 ... ABMON_12}
 | |
| Return abbreviated name of the n-th month.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{RADIXCHAR}
 | |
| Return radix character (decimal dot, decimal comma, etc.)
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{THOUSEP}
 | |
| Return separator character for thousands (groups of three digits).
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{YESEXPR}
 | |
| Return a regular expression that can be used with the regex
 | |
| function to recognize a positive response to a yes/no question.
 | |
| \[Warning: the expression is in the syntax suitable for the
 | |
|   regex C library function, which might differ from the syntax
 | |
|   used in \module{re}\]
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{NOEXPR}
 | |
| Return a regular expression that can be used with the regex(3)
 | |
| function to recognize a negative response to a yes/no question.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{CRNCYSTR}
 | |
| Return the currency symbol, preceded by "-" if the symbol should
 | |
| appear before the value, "+" if the symbol should appear after the
 | |
| value, or "." if the symbol should replace the radix character.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ERA}
 | |
| The return value represents the era used in the current locale.
 | |
| 
 | |
| Most locales do not define this value.  An example of a locale which
 | |
| does define this value is the Japanese one.  In Japan, the traditional
 | |
| representation of dates includes the name of the era corresponding to
 | |
| the then-emperor's reign.
 | |
| 
 | |
| Normally it should not be necessary to use this value directly.
 | |
| Specifying the \code{E} modifier in their format strings causes the
 | |
| \function{strftime} function to use this information.  The format of the
 | |
| returned string is not specified, and therefore you should not assume
 | |
| knowledge of it on different systems.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ERA_YEAR}
 | |
| The return value gives the year in the relevant era of the locale.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ERA_D_T_FMT}
 | |
| This return value can be used as a format string for
 | |
| \function{strftime} to represent dates and times in a locale-specific
 | |
| era-based way.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ERA_D_FMT}
 | |
| This return value can be used as a format string for
 | |
| \function{strftime} to represent time in a locale-specific era-based
 | |
| way.
 | |
| \end{datadesc}
 | |
| 
 | |
| \begin{datadesc}{ALT_DIGITS}
 | |
| The return value is a representation of up to 100 values used to
 | |
| represent the values 0 to 99.
 | |
| \end{datadesc}
 | |
| 
 | |
| Example:
 | |
| 
 | |
| \begin{verbatim}
 | |
| >>> import locale
 | |
| >>> loc = locale.setlocale(locale.LC_ALL) # get current locale
 | |
| >>> locale.setlocale(locale.LC_ALL, 'de') # use German locale
 | |
| >>> locale.strcoll('f\xe4n', 'foo') # compare a string containing an umlaut 
 | |
| >>> locale.setlocale(locale.LC_ALL, '') # use user's preferred locale
 | |
| >>> locale.setlocale(locale.LC_ALL, 'C') # use default (C) locale
 | |
| >>> locale.setlocale(locale.LC_ALL, loc) # restore saved locale
 | |
| \end{verbatim}
 | |
| 
 | |
| 
 | |
| \subsection{Background, details, hints, tips and caveats}
 | |
| 
 | |
| The C standard defines the locale as a program-wide property that may
 | |
| be relatively expensive to change.  On top of that, some
 | |
| implementation are broken in such a way that frequent locale changes
 | |
| may cause core dumps.  This makes the locale somewhat painful to use
 | |
| correctly.
 | |
| 
 | |
| Initially, when a program is started, the locale is the \samp{C} locale, no
 | |
| matter what the user's preferred locale is.  The program must
 | |
| explicitly say that it wants the user's preferred locale settings by
 | |
| calling \code{setlocale(LC_ALL, '')}.
 | |
| 
 | |
| It is generally a bad idea to call \function{setlocale()} in some library
 | |
| routine, since as a side effect it affects the entire program.  Saving
 | |
| and restoring it is almost as bad: it is expensive and affects other
 | |
| threads that happen to run before the settings have been restored.
 | |
| 
 | |
| If, when coding a module for general use, you need a locale
 | |
| independent version of an operation that is affected by the locale
 | |
| (e.g. \function{string.lower()}, or certain formats used with
 | |
| \function{time.strftime()})), you will have to find a way to do it
 | |
| without using the standard library routine.  Even better is convincing
 | |
| yourself that using locale settings is okay.  Only as a last resort
 | |
| should you document that your module is not compatible with
 | |
| non-\samp{C} locale settings.
 | |
| 
 | |
| The case conversion functions in the
 | |
| \refmodule{string}\refstmodindex{string} module are affected by the
 | |
| locale settings.  When a call to the \function{setlocale()} function
 | |
| changes the \constant{LC_CTYPE} settings, the variables
 | |
| \code{string.lowercase}, \code{string.uppercase} and
 | |
| \code{string.letters} are recalculated.  Note that this code that uses
 | |
| these variable through `\keyword{from} ... \keyword{import} ...',
 | |
| e.g.\ \code{from string import letters}, is not affected by subsequent
 | |
| \function{setlocale()} calls.
 | |
| 
 | |
| The only way to perform numeric operations according to the locale
 | |
| is to use the special functions defined by this module:
 | |
| \function{atof()}, \function{atoi()}, \function{format()},
 | |
| \function{str()}.
 | |
| 
 | |
| \subsection{For extension writers and programs that embed Python
 | |
|             \label{embedding-locale}}
 | |
| 
 | |
| Extension modules should never call \function{setlocale()}, except to
 | |
| find out what the current locale is.  But since the return value can
 | |
| only be used portably to restore it, that is not very useful (except
 | |
| perhaps to find out whether or not the locale is \samp{C}).
 | |
| 
 | |
| When Python is embedded in an application, if the application sets the
 | |
| locale to something specific before initializing Python, that is
 | |
| generally okay, and Python will use whatever locale is set,
 | |
| \emph{except} that the \constant{LC_NUMERIC} locale should always be
 | |
| \samp{C}.
 | |
| 
 | |
| The \function{setlocale()} function in the \module{locale} module
 | |
| gives the Python programmer the impression that you can manipulate the
 | |
| \constant{LC_NUMERIC} locale setting, but this not the case at the C
 | |
| level: C code will always find that the \constant{LC_NUMERIC} locale
 | |
| setting is \samp{C}.  This is because too much would break when the
 | |
| decimal point character is set to something else than a period
 | |
| (e.g. the Python parser would break).  Caveat: threads that run
 | |
| without holding Python's global interpreter lock may occasionally find
 | |
| that the numeric locale setting differs; this is because the only
 | |
| portable way to implement this feature is to set the numeric locale
 | |
| settings to what the user requests, extract the relevant
 | |
| characteristics, and then restore the \samp{C} numeric locale.
 | |
| 
 | |
| When Python code uses the \module{locale} module to change the locale,
 | |
| this also affects the embedding application.  If the embedding
 | |
| application doesn't want this to happen, it should remove the
 | |
| \module{_locale} extension module (which does all the work) from the
 | |
| table of built-in modules in the \file{config.c} file, and make sure
 | |
| that the \module{_locale} module is not accessible as a shared library.
 | 
