mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 11:23:31 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			351 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			351 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{\module{locale} ---
 | 
						|
         Internationalization services}
 | 
						|
 | 
						|
\declaremodule{standard}{locale}
 | 
						|
\modulesynopsis{Internationalization services.}
 | 
						|
\moduleauthor{Martin von L\"owis}{loewis@informatik.hu-berlin.de}
 | 
						|
\sectionauthor{Martin von L\"owis}{loewis@informatik.hu-berlin.de}
 | 
						|
 | 
						|
 | 
						|
The \module{locale} module opens access to the \POSIX{} locale
 | 
						|
database and functionality. The \POSIX{} locale mechanism allows
 | 
						|
programmers to deal with certain cultural issues in an application,
 | 
						|
without requiring the programmer to know all the specifics of each
 | 
						|
country where the software is executed.
 | 
						|
 | 
						|
The \module{locale} module is implemented on top of the
 | 
						|
\module{_locale}\refbimodindex{_locale} module, which in turn uses an
 | 
						|
ANSI C locale implementation if available.
 | 
						|
 | 
						|
The \module{locale} module defines the following exception and
 | 
						|
functions:
 | 
						|
 | 
						|
 | 
						|
\begin{excdesc}{Error}
 | 
						|
  Exception raised when \function{setlocale()} fails.
 | 
						|
\end{excdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{setlocale}{category\optional{, locale}}
 | 
						|
  If \var{locale} is specified, it may be a string, a tuple of the
 | 
						|
  form \code{(\var{language code}, \var{encoding})}, or \code{None}.
 | 
						|
  If it is a tuple, it is converted to a string using the locale
 | 
						|
  aliasing engine.  If \var{locale} is given and not \code{None},
 | 
						|
  \function{setlocale()} modifies the locale setting for the
 | 
						|
  \var{category}.  The available categories are listed in the data
 | 
						|
  description below.  The value is the name of a locale.  An empty
 | 
						|
  string specifies the user's default settings. If the modification of
 | 
						|
  the locale fails, the exception \exception{Error} is raised.  If
 | 
						|
  successful, the new locale setting is returned.
 | 
						|
 | 
						|
  If \var{locale} is omitted or \code{None}, the current setting for
 | 
						|
  \var{category} is returned.
 | 
						|
 | 
						|
  \function{setlocale()} is not thread safe on most systems.
 | 
						|
  Applications typically start with a call of
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
import locale
 | 
						|
locale.setlocale(locale.LC_ALL, '')
 | 
						|
\end{verbatim}
 | 
						|
 | 
						|
  This sets the locale for all categories to the user's default
 | 
						|
  setting (typically specified in the \envvar{LANG} environment
 | 
						|
  variable).  If the locale is not changed thereafter, using
 | 
						|
  multithreading should not cause problems.
 | 
						|
 | 
						|
  \versionchanged[Added support for tuple values of the \var{locale}
 | 
						|
                  parameter]{2.0}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{localeconv}{}
 | 
						|
  Returns the database of of the local conventions as a dictionary.
 | 
						|
  This dictionary has the following strings as keys:
 | 
						|
 | 
						|
  \begin{tableiii}{l|l|p{3in}}{constant}{Key}{Category}{Meaning}
 | 
						|
    \lineiii{LC_NUMERIC}{\code{'decimal_point'}}
 | 
						|
            {Decimal point character.}
 | 
						|
    \lineiii{}{\code{'grouping'}}
 | 
						|
            {Sequence of numbers specifying which relative positions
 | 
						|
             the \code{'thousands_sep'} is expected.  If the sequence is
 | 
						|
             terminated with \constant{CHAR_MAX}, no further grouping
 | 
						|
             is performed. If the sequence terminates with a \code{0}, 
 | 
						|
             the last group size is repeatedly used.}
 | 
						|
    \lineiii{}{\code{'thousands_sep'}}
 | 
						|
            {Character used between groups.}\hline
 | 
						|
    \lineiii{LC_MONETARY}{\code{'int_curr_symbol'}}
 | 
						|
            {International currency symbol.}
 | 
						|
    \lineiii{}{\code{'currency_symbol'}}
 | 
						|
            {Local currency symbol.}
 | 
						|
    \lineiii{}{\code{'mon_decimal_point'}}
 | 
						|
            {Decimal point used for monetary values.}
 | 
						|
    \lineiii{}{\code{'mon_thousands_sep'}}
 | 
						|
            {Group separator used for monetary values.}
 | 
						|
    \lineiii{}{\code{'mon_grouping'}}
 | 
						|
            {Equivalent to \code{'grouping'}, used for monetary
 | 
						|
             values.}
 | 
						|
    \lineiii{}{\code{'positive_sign'}}
 | 
						|
            {Symbol used to annotate a positive monetary value.}
 | 
						|
    \lineiii{}{\code{'negative_sign'}}
 | 
						|
            {Symbol used to annotate a nnegative monetary value.}
 | 
						|
    \lineiii{}{\code{'frac_digits'}}
 | 
						|
            {Number of fractional digits used in local formatting
 | 
						|
             of monetary values.}
 | 
						|
    \lineiii{}{\code{'int_frac_digits'}}
 | 
						|
            {Number of fractional digits used in international
 | 
						|
             formatting of monetary values.}
 | 
						|
  \end{tableiii}
 | 
						|
 | 
						|
  The possible values for \code{'p_sign_posn'} and
 | 
						|
  \code{'n_sign_posn'} are given below.
 | 
						|
 | 
						|
  \begin{tableii}{c|l}{code}{Value}{Explanation}
 | 
						|
    \lineii{0}{Currency and value are surrounded by parentheses.}
 | 
						|
    \lineii{1}{The sign should precede the value and currency symbol.}
 | 
						|
    \lineii{2}{The sign should follow the value and currency symbol.}
 | 
						|
    \lineii{3}{The sign should immediately precede the value.}
 | 
						|
    \lineii{4}{The sign should immediately follow the value.}
 | 
						|
    \lineii{\constant{LC_MAX}}{Nothing is specified in this locale.}
 | 
						|
  \end{tableii}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{getdefaultlocale}{\optional{envvars}}
 | 
						|
  Tries to determine the default locale settings and returns
 | 
						|
  them as a tuple of the form \code{(\var{language code},
 | 
						|
  \var{encoding})}.
 | 
						|
 | 
						|
  According to \POSIX, a program which has not called
 | 
						|
  \code{setlocale(LC_ALL, '')} runs using the portable \code{'C'}
 | 
						|
  locale.  Calling \code{setlocale(LC_ALL, '')} lets it use the
 | 
						|
  default locale as defined by the \envvar{LANG} variable.  Since we
 | 
						|
  do not want to interfere with the current locale setting we thus
 | 
						|
  emulate the behavior in the way described above.
 | 
						|
 | 
						|
  To maintain compatibility with other platforms, not only the
 | 
						|
  \envvar{LANG} variable is tested, but a list of variables given as
 | 
						|
  envvars parameter.  The first found to be defined will be
 | 
						|
  used.  \var{envvars} defaults to the search path used in GNU gettext;
 | 
						|
  it must always contain the variable name \samp{LANG}.  The GNU
 | 
						|
  gettext search path contains \code{'LANGUAGE'}, \code{'LC_ALL'},
 | 
						|
  code{'LC_CTYPE'}, and \code{'LANG'}, in that order.
 | 
						|
 | 
						|
  Except for the code \code{'C'}, the language code corresponds to
 | 
						|
  \rfc{1766}.  \var{language code} and \var{encoding} may be
 | 
						|
  \code{None} if their values cannot be determined.
 | 
						|
  \versionadded{2.0}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{getlocale}{\optional{category}}
 | 
						|
  Returns the current setting for the given locale category as
 | 
						|
  tuple (language code, encoding).  \var{category} may be one of the
 | 
						|
  \constant{LC_*} values except \constant{LC_ALL}.  It defaults to
 | 
						|
  \constant{LC_CTYPE}.
 | 
						|
 | 
						|
  Except for the code \code{'C'}, the language code corresponds to
 | 
						|
  \rfc{1766}.  \var{language code} and \var{encoding} may be
 | 
						|
  \code{None} if their values cannot be determined.
 | 
						|
  \versionadded{2.0}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{normalize}{localename}
 | 
						|
  Returns a normalized locale code for the given locale name.  The
 | 
						|
  returned locale code is formatted for use with
 | 
						|
  \function{setlocale()}.  If normalization fails, the original name
 | 
						|
  is returned unchanged.
 | 
						|
 | 
						|
  If the given encoding is not known, the function defaults to
 | 
						|
  the default encoding for the locale code just like
 | 
						|
  \function{setlocale()}.
 | 
						|
  \versionadded{2.0}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{resetlocale}{\optional{category}}
 | 
						|
  Sets the locale for \var{category} to the default setting.
 | 
						|
 | 
						|
  The default setting is determined by calling
 | 
						|
  \function{getdefaultlocale()}.  \var{category} defaults to
 | 
						|
  \constant{LC_ALL}.
 | 
						|
  \versionadded{2.0}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{strcoll}{string1, string2}
 | 
						|
  Compares two strings according to the current
 | 
						|
  \constant{LC_COLLATE} setting. As any other compare function,
 | 
						|
  returns a negative, or a positive value, or \code{0}, depending on
 | 
						|
  whether \var{string1} collates before or after \var{string2} or is
 | 
						|
  equal to it.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{strxfrm}{string}
 | 
						|
  Transforms a string to one that can be used for the built-in
 | 
						|
  function \function{cmp()}\bifuncindex{cmp}, and still returns
 | 
						|
  locale-aware results.  This function can be used when the same
 | 
						|
  string is compared repeatedly, e.g. when collating a sequence of
 | 
						|
  strings.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{format}{format, val\optional{, grouping}}
 | 
						|
  Formats a number \var{val} according to the current
 | 
						|
  \constant{LC_NUMERIC} setting.  The format follows the conventions
 | 
						|
  of the \code{\%} operator.  For floating point values, the decimal
 | 
						|
  point is modified if appropriate.  If \var{grouping} is true, also
 | 
						|
  takes the grouping into account.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{str}{float}
 | 
						|
  Formats a floating point number using the same format as the
 | 
						|
  built-in function \code{str(\var{float})}, but takes the decimal
 | 
						|
  point into account.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{atof}{string}
 | 
						|
  Converts a string to a floating point number, following the
 | 
						|
  \constant{LC_NUMERIC} settings.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{atoi}{string}
 | 
						|
  Converts a string to an integer, following the
 | 
						|
  \constant{LC_NUMERIC} conventions.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_CTYPE}
 | 
						|
\refstmodindex{string}
 | 
						|
  Locale category for the character type functions.  Depending on the
 | 
						|
  settings of this category, the functions of module
 | 
						|
  \refmodule{string} dealing with case change their behaviour.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_COLLATE}
 | 
						|
  Locale category for sorting strings.  The functions
 | 
						|
  \function{strcoll()} and \function{strxfrm()} of the
 | 
						|
  \module{locale} module are affected.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_TIME}
 | 
						|
  Locale category for the formatting of time.  The function
 | 
						|
  \function{time.strftime()} follows these conventions.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_MONETARY}
 | 
						|
  Locale category for formatting of monetary values.  The available
 | 
						|
  options are available from the \function{localeconv()} function.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_MESSAGES}
 | 
						|
  Locale category for message display. Python currently does not
 | 
						|
  support application specific locale-aware messages.  Messages
 | 
						|
  displayed by the operating system, like those returned by
 | 
						|
  \function{os.strerror()} might be affected by this category.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_NUMERIC}
 | 
						|
  Locale category for formatting numbers.  The functions
 | 
						|
  \function{format()}, \function{atoi()}, \function{atof()} and
 | 
						|
  \function{str()} of the \module{locale} module are affected by that
 | 
						|
  category.  All other numeric formatting operations are not
 | 
						|
  affected.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{LC_ALL}
 | 
						|
  Combination of all locale settings.  If this flag is used when the
 | 
						|
  locale is changed, setting the locale for all categories is
 | 
						|
  attempted. If that fails for any category, no category is changed at
 | 
						|
  all.  When the locale is retrieved using this flag, a string
 | 
						|
  indicating the setting for all categories is returned. This string
 | 
						|
  can be later used to restore the settings.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
\begin{datadesc}{CHAR_MAX}
 | 
						|
  This is a symbolic constant used for different values returned by
 | 
						|
  \function{localeconv()}.
 | 
						|
\end{datadesc}
 | 
						|
 | 
						|
Example:
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
>>> import locale
 | 
						|
>>> loc = locale.setlocale(locale.LC_ALL) # get current locale
 | 
						|
>>> locale.setlocale(locale.LC_ALL, 'de') # use German locale
 | 
						|
>>> locale.strcoll('f\xe4n', 'foo') # compare a string containing an umlaut 
 | 
						|
>>> locale.setlocale(locale.LC_ALL, '') # use user's preferred locale
 | 
						|
>>> locale.setlocale(locale.LC_ALL, 'C') # use default (C) locale
 | 
						|
>>> locale.setlocale(locale.LC_ALL, loc) # restore saved locale
 | 
						|
\end{verbatim}
 | 
						|
 | 
						|
 | 
						|
\subsection{Background, details, hints, tips and caveats}
 | 
						|
 | 
						|
The C standard defines the locale as a program-wide property that may
 | 
						|
be relatively expensive to change.  On top of that, some
 | 
						|
implementation are broken in such a way that frequent locale changes
 | 
						|
may cause core dumps.  This makes the locale somewhat painful to use
 | 
						|
correctly.
 | 
						|
 | 
						|
Initially, when a program is started, the locale is the \samp{C} locale, no
 | 
						|
matter what the user's preferred locale is.  The program must
 | 
						|
explicitly say that it wants the user's preferred locale settings by
 | 
						|
calling \code{setlocale(LC_ALL, '')}.
 | 
						|
 | 
						|
It is generally a bad idea to call \function{setlocale()} in some library
 | 
						|
routine, since as a side effect it affects the entire program.  Saving
 | 
						|
and restoring it is almost as bad: it is expensive and affects other
 | 
						|
threads that happen to run before the settings have been restored.
 | 
						|
 | 
						|
If, when coding a module for general use, you need a locale
 | 
						|
independent version of an operation that is affected by the locale
 | 
						|
(e.g. \function{string.lower()}, or certain formats used with
 | 
						|
\function{time.strftime()})), you will have to find a way to do it
 | 
						|
without using the standard library routine.  Even better is convincing
 | 
						|
yourself that using locale settings is okay.  Only as a last resort
 | 
						|
should you document that your module is not compatible with
 | 
						|
non-\samp{C} locale settings.
 | 
						|
 | 
						|
The case conversion functions in the
 | 
						|
\refmodule{string}\refstmodindex{string} and
 | 
						|
\module{strop}\refbimodindex{strop} modules are affected by the locale
 | 
						|
settings.  When a call to the \function{setlocale()} function changes
 | 
						|
the \constant{LC_CTYPE} settings, the variables
 | 
						|
\code{string.lowercase}, \code{string.uppercase} and
 | 
						|
\code{string.letters} (and their counterparts in \module{strop}) are
 | 
						|
recalculated.  Note that this code that uses these variable through
 | 
						|
`\keyword{from} ... \keyword{import} ...', e.g. \code{from string
 | 
						|
import letters}, is not affected by subsequent \function{setlocale()}
 | 
						|
calls.
 | 
						|
 | 
						|
The only way to perform numeric operations according to the locale
 | 
						|
is to use the special functions defined by this module:
 | 
						|
\function{atof()}, \function{atoi()}, \function{format()},
 | 
						|
\function{str()}.
 | 
						|
 | 
						|
\subsection{For extension writers and programs that embed Python
 | 
						|
            \label{embedding-locale}}
 | 
						|
 | 
						|
Extension modules should never call \function{setlocale()}, except to
 | 
						|
find out what the current locale is.  But since the return value can
 | 
						|
only be used portably to restore it, that is not very useful (except
 | 
						|
perhaps to find out whether or not the locale is \samp{C}).
 | 
						|
 | 
						|
When Python is embedded in an application, if the application sets the
 | 
						|
locale to something specific before initializing Python, that is
 | 
						|
generally okay, and Python will use whatever locale is set,
 | 
						|
\emph{except} that the \constant{LC_NUMERIC} locale should always be
 | 
						|
\samp{C}.
 | 
						|
 | 
						|
The \function{setlocale()} function in the \module{locale} module
 | 
						|
gives the Python programmer the impression that you can manipulate the
 | 
						|
\constant{LC_NUMERIC} locale setting, but this not the case at the C
 | 
						|
level: C code will always find that the \constant{LC_NUMERIC} locale
 | 
						|
setting is \samp{C}.  This is because too much would break when the
 | 
						|
decimal point character is set to something else than a period
 | 
						|
(e.g. the Python parser would break).  Caveat: threads that run
 | 
						|
without holding Python's global interpreter lock may occasionally find
 | 
						|
that the numeric locale setting differs; this is because the only
 | 
						|
portable way to implement this feature is to set the numeric locale
 | 
						|
settings to what the user requests, extract the relevant
 | 
						|
characteristics, and then restore the \samp{C} numeric locale.
 | 
						|
 | 
						|
When Python code uses the \module{locale} module to change the locale,
 | 
						|
this also affects the embedding application.  If the embedding
 | 
						|
application doesn't want this to happen, it should remove the
 | 
						|
\module{_locale} extension module (which does all the work) from the
 | 
						|
table of built-in modules in the \file{config.c} file, and make sure
 | 
						|
that the \module{_locale} module is not accessible as a shared library.
 |