aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Kuzminski <marcin@python-works.com>2012-11-15 00:57:52 +0100
committerMarcin Kuzminski <marcin@python-works.com>2012-11-15 00:57:52 +0100
commit3b831505abba789e21caf813f487d744ba8f3a9b (patch)
treef9a858fec93f7c8b8a4cd69bd3e587ceef2ac1ad
parentfaef847ffb39e5291b1d123e1d20ca385412357d (diff)
Implemented #647, option to pass list of default encoding used to encode to/decode from unicode
--HG-- branch : beta
-rw-r--r--development.ini2
-rw-r--r--production.ini2
-rw-r--r--rhodecode/config/deployment.ini_tmpl2
-rw-r--r--rhodecode/lib/utils2.py60
-rw-r--r--rhodecode/lib/vcs/utils/__init__.py15
5 files changed, 59 insertions, 22 deletions
diff --git a/development.ini b/development.ini
index 76b23046..66c3c8ad 100644
--- a/development.ini
+++ b/development.ini
@@ -76,6 +76,8 @@ use_gravatar = true
container_auth_enabled = false
proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
default_encoding = utf8
## overwrite schema of clone url
diff --git a/production.ini b/production.ini
index 799b7a2d..e3bae76d 100644
--- a/production.ini
+++ b/production.ini
@@ -76,6 +76,8 @@ use_gravatar = true
container_auth_enabled = false
proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
default_encoding = utf8
## overwrite schema of clone url
diff --git a/rhodecode/config/deployment.ini_tmpl b/rhodecode/config/deployment.ini_tmpl
index 270adcc3..3aaaceb2 100644
--- a/rhodecode/config/deployment.ini_tmpl
+++ b/rhodecode/config/deployment.ini_tmpl
@@ -76,6 +76,8 @@ use_gravatar = true
container_auth_enabled = false
proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
default_encoding = utf8
## overwrite schema of clone url
diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py
index 4c1f1fbe..5e36e670 100644
--- a/rhodecode/lib/utils2.py
+++ b/rhodecode/lib/utils2.py
@@ -66,6 +66,7 @@ def __get_lem():
return dict(d)
+
def str2bool(_str):
"""
returs True/False value from given string, it tries to translate the
@@ -83,6 +84,27 @@ def str2bool(_str):
return _str in ('t', 'true', 'y', 'yes', 'on', '1')
+def aslist(obj, sep=None, strip=True):
+ """
+ Returns given string separated by sep as list
+
+ :param obj:
+ :param sep:
+ :param strip:
+ """
+ if isinstance(obj, (basestring)):
+ lst = obj.split(sep)
+ if strip:
+ lst = [v.strip() for v in lst]
+ return lst
+ elif isinstance(obj, (list, tuple)):
+ return obj
+ elif obj is None:
+ return []
+ else:
+ return [obj]
+
+
def convert_line_endings(line, mode):
"""
Converts a given line "line end" accordingly to given mode
@@ -182,18 +204,23 @@ def safe_unicode(str_, from_encoding=None):
if not from_encoding:
import rhodecode
- DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
- from_encoding = DEFAULT_ENCODING
+ DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+ 'utf8'), sep=',')
+ from_encoding = DEFAULT_ENCODINGS
+
+ if not isinstance(from_encoding, (list, tuple)):
+ from_encoding = [from_encoding]
try:
return unicode(str_)
except UnicodeDecodeError:
pass
- try:
- return unicode(str_, from_encoding)
- except UnicodeDecodeError:
- pass
+ for enc in from_encoding:
+ try:
+ return unicode(str_, enc)
+ except UnicodeDecodeError:
+ pass
try:
import chardet
@@ -202,7 +229,7 @@ def safe_unicode(str_, from_encoding=None):
raise Exception()
return str_.decode(encoding)
except (ImportError, UnicodeDecodeError, Exception):
- return unicode(str_, from_encoding, 'replace')
+ return unicode(str_, from_encoding[0], 'replace')
def safe_str(unicode_, to_encoding=None):
@@ -226,13 +253,18 @@ def safe_str(unicode_, to_encoding=None):
if not to_encoding:
import rhodecode
- DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
- to_encoding = DEFAULT_ENCODING
+ DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+ 'utf8'), sep=',')
+ to_encoding = DEFAULT_ENCODINGS
- try:
- return unicode_.encode(to_encoding)
- except UnicodeEncodeError:
- pass
+ if not isinstance(to_encoding, (list, tuple)):
+ to_encoding = [to_encoding]
+
+ for enc in to_encoding:
+ try:
+ return unicode_.encode(enc)
+ except UnicodeEncodeError:
+ pass
try:
import chardet
@@ -242,7 +274,7 @@ def safe_str(unicode_, to_encoding=None):
return unicode_.encode(encoding)
except (ImportError, UnicodeEncodeError):
- return unicode_.encode(to_encoding, 'replace')
+ return unicode_.encode(to_encoding[0], 'replace')
return safe_str
diff --git a/rhodecode/lib/vcs/utils/__init__.py b/rhodecode/lib/vcs/utils/__init__.py
index 2af9a561..bd82ffa4 100644
--- a/rhodecode/lib/vcs/utils/__init__.py
+++ b/rhodecode/lib/vcs/utils/__init__.py
@@ -38,12 +38,12 @@ def safe_unicode(str_, from_encoding=None):
:rtype: unicode
:returns: unicode object
"""
+ from rhodecode.lib.utils2 import safe_unicode
+ return safe_unicode(str_, from_encoding)
+
if isinstance(str_, unicode):
return str_
- if not from_encoding:
- import rhodecode
- DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
- from_encoding = DEFAULT_ENCODING
+
try:
return unicode(str_)
except UnicodeDecodeError:
@@ -75,13 +75,12 @@ def safe_str(unicode_, to_encoding=None):
:rtype: str
:returns: str object
"""
+ from rhodecode.lib.utils2 import safe_str
+ return safe_str(unicode_, to_encoding)
if isinstance(unicode_, str):
return unicode_
- if not to_encoding:
- import rhodecode
- DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
- to_encoding = DEFAULT_ENCODING
+
try:
return unicode_.encode(to_encoding)
except UnicodeEncodeError: