Implemented #647, option to pass list of default encoding used to encode to/decode from unicode

--HG-- branch : beta
author: Marcin Kuzminski <marcin@python-works.com> 2012-11-15 00:57:52 +0100
committer: Marcin Kuzminski <marcin@python-works.com> 2012-11-15 00:57:52 +0100
commit: 3b831505abba789e21caf813f487d744ba8f3a9b (patch)
tree: f9a858fec93f7c8b8a4cd69bd3e587ceef2ac1ad
parent: faef847ffb39e5291b1d123e1d20ca385412357d (diff)
5 files changed, 59 insertions, 22 deletions
diff --git a/development.ini b/development.ini
index 76b23046..66c3c8ad 100644
--- a/development.ini
+++ b/development.ini
@@ -76,6 +76,8 @@ use_gravatar = true
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
diff --git a/production.ini b/production.ini
index 799b7a2d..e3bae76d 100644
--- a/production.ini
+++ b/production.ini
@@ -76,6 +76,8 @@ use_gravatar = true
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
diff --git a/rhodecode/config/deployment.ini_tmpl b/rhodecode/config/deployment.ini_tmpl
index 270adcc3..3aaaceb2 100644
--- a/rhodecode/config/deployment.ini_tmpl
+++ b/rhodecode/config/deployment.ini_tmpl
@@ -76,6 +76,8 @@ use_gravatar = true
 
 container_auth_enabled = false
 proxypass_auth_enabled = false
+## default encoding used to convert from and to unicode
+## can be also a comma seperated list of encoding in case of mixed encodings
 default_encoding = utf8
 
 ## overwrite schema of clone url
diff --git a/rhodecode/lib/utils2.py b/rhodecode/lib/utils2.py
index 4c1f1fbe..5e36e670 100644
--- a/rhodecode/lib/utils2.py
+++ b/rhodecode/lib/utils2.py
@@ -66,6 +66,7 @@ def __get_lem():
 
     return dict(d)
 
+
 def str2bool(_str):
     """
     returs True/False value from given string, it tries to translate the
@@ -83,6 +84,27 @@ def str2bool(_str):
     return _str in ('t', 'true', 'y', 'yes', 'on', '1')
 
 
+def aslist(obj, sep=None, strip=True):
+    """
+    Returns given string separated by sep as list
+
+    :param obj:
+    :param sep:
+    :param strip:
+    """
+    if isinstance(obj, (basestring)):
+        lst = obj.split(sep)
+        if strip:
+            lst = [v.strip() for v in lst]
+        return lst
+    elif isinstance(obj, (list, tuple)):
+        return obj
+    elif obj is None:
+        return []
+    else:
+        return [obj]
+
+
 def convert_line_endings(line, mode):
     """
     Converts a given line  "line end" accordingly to given mode
@@ -182,18 +204,23 @@ def safe_unicode(str_, from_encoding=None):
 
     if not from_encoding:
         import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
-        from_encoding = DEFAULT_ENCODING
+        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+                                                        'utf8'), sep=',')
+        from_encoding = DEFAULT_ENCODINGS
+
+    if not isinstance(from_encoding, (list, tuple)):
+        from_encoding = [from_encoding]
 
     try:
         return unicode(str_)
     except UnicodeDecodeError:
         pass
 
-    try:
-        return unicode(str_, from_encoding)
-    except UnicodeDecodeError:
-        pass
+    for enc in from_encoding:
+        try:
+            return unicode(str_, enc)
+        except UnicodeDecodeError:
+            pass
 
     try:
         import chardet
@@ -202,7 +229,7 @@ def safe_unicode(str_, from_encoding=None):
             raise Exception()
         return str_.decode(encoding)
     except (ImportError, UnicodeDecodeError, Exception):
-        return unicode(str_, from_encoding, 'replace')
+        return unicode(str_, from_encoding[0], 'replace')
 
 
 def safe_str(unicode_, to_encoding=None):
@@ -226,13 +253,18 @@ def safe_str(unicode_, to_encoding=None):
 
     if not to_encoding:
         import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding','utf8')
-        to_encoding = DEFAULT_ENCODING
+        DEFAULT_ENCODINGS = aslist(rhodecode.CONFIG.get('default_encoding',
+                                                        'utf8'), sep=',')
+        to_encoding = DEFAULT_ENCODINGS
 
-    try:
-        return unicode_.encode(to_encoding)
-    except UnicodeEncodeError:
-        pass
+    if not isinstance(to_encoding, (list, tuple)):
+        to_encoding = [to_encoding]
+
+    for enc in to_encoding:
+        try:
+            return unicode_.encode(enc)
+        except UnicodeEncodeError:
+            pass
 
     try:
         import chardet
@@ -242,7 +274,7 @@ def safe_str(unicode_, to_encoding=None):
 
         return unicode_.encode(encoding)
     except (ImportError, UnicodeEncodeError):
-        return unicode_.encode(to_encoding, 'replace')
+        return unicode_.encode(to_encoding[0], 'replace')
 
     return safe_str
 
diff --git a/rhodecode/lib/vcs/utils/__init__.py b/rhodecode/lib/vcs/utils/__init__.py
index 2af9a561..bd82ffa4 100644
--- a/rhodecode/lib/vcs/utils/__init__.py
+++ b/rhodecode/lib/vcs/utils/__init__.py
@@ -38,12 +38,12 @@ def safe_unicode(str_, from_encoding=None):
     :rtype: unicode
     :returns: unicode object
     """
+    from rhodecode.lib.utils2 import safe_unicode
+    return safe_unicode(str_, from_encoding)
+
     if isinstance(str_, unicode):
         return str_
-    if not from_encoding:
-        import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
-        from_encoding = DEFAULT_ENCODING
+
     try:
         return unicode(str_)
     except UnicodeDecodeError:
@@ -75,13 +75,12 @@ def safe_str(unicode_, to_encoding=None):
     :rtype: str
     :returns: str object
     """
+    from rhodecode.lib.utils2 import safe_str
+    return safe_str(unicode_, to_encoding)
 
     if isinstance(unicode_, str):
         return unicode_
-    if not to_encoding:
-        import rhodecode
-        DEFAULT_ENCODING = rhodecode.CONFIG.get('default_encoding', 'utf8')
-        to_encoding = DEFAULT_ENCODING
+
     try:
         return unicode_.encode(to_encoding)
     except UnicodeEncodeError:
author	Marcin Kuzminski <marcin@python-works.com>	2012-11-15 00:57:52 +0100
committer	Marcin Kuzminski <marcin@python-works.com>	2012-11-15 00:57:52 +0100
commit	3b831505abba789e21caf813f487d744ba8f3a9b (patch)
tree	f9a858fec93f7c8b8a4cd69bd3e587ceef2ac1ad
parent	faef847ffb39e5291b1d123e1d20ca385412357d (diff)