如果您需要检测语言以响应用户 *** 作,则可以使用google
ajax语言API:
输出量#!/usr/bin/env pythonimport jsonimport urllib, urllib2def detect_language(text, userip=None, referrer="http://stackoverflow.com/q/4545977/4279", api_key=None): query = {'q': text.enpre('utf-8') if isinstance(text, unipre) else text} if userip: query.update(userip=userip) if api_key: query.update(key=api_key) url = 'https://ajax.googleapis.com/ajax/services/language/detect?v=1.0&%s'%( urllib.urlenpre(query)) request = urllib2.Request(url, None, headers=dict(Referer=referrer)) d = json.load(urllib2.urlopen(request)) if d['responseStatus'] != 200 or u'error' in d['responseData']: raise IOError(d) return d['responseData']['language']print detect_language("Python - can I detect unipre string language pre?")
[Google翻译APIen
v2](http://pre.google.com/apis/language/translate/v2/using_rest.html)
默认限制为每天100000个字符(一次不超过5000个)。
#!/usr/bin/env python# -*- coding: utf-8 -*-import jsonimport urllib, urllib2from operator import itemgetterdef detect_language_v2(chunks, api_key): """ chunks: either string or sequence of strings Return list of corresponding language pres """ if isinstance(chunks, basestring): chunks = [chunks] url = 'https://www.googleapis.com/language/translate/v2' data = urllib.urlenpre(dict( q=[t.enpre('utf-8') if isinstance(t, unipre) else t for t in chunks], key=api_key, target="en"), doseq=1) # the request length MUST be < 5000 if len(data) > 5000: raise ValueError("request is too long, see " "http://pre.google.com/apis/language/translate/terms.html") #NOTE: use POST to allow more than 2K characters request = urllib2.Request(url, data, headers={'X-HTTP-Method-Override': 'GET'}) d = json.load(urllib2.urlopen(request)) if u'error' in d: raise IOError(d) return map(itemgetter('detectedSourceLanguage'), d['data']['translations'])
现在,您可以请求显式检测语言:
def detect_language_v2(chunks, api_key): """ chunks: either string or sequence of strings Return list of corresponding language pres """ if isinstance(chunks, basestring): chunks = [chunks] url = 'https://www.googleapis.com/language/translate/v2/detect' data = urllib.urlenpre(dict( q=[t.enpre('utf-8') if isinstance(t, unipre) else tfor t in chunks], key=api_key), doseq=True) # the request length MUST be < 5000 if len(data) > 5000: raise ValueError("request is too long, see " "http://pre.google.com/apis/language/translate/terms.html") #NOTE: use POST to allow more than 2K characters request = urllib2.Request(url, data, headers={'X-HTTP-Method-Override': 'GET'}) d = json.load(urllib2.urlopen(request)) return [sorted(L, key=itemgetter('confidence'))[-1]['language'] for L in d['data']['detections']]
例:
输出量print detect_language_v2( ["Python - can I detect unipre string language pre?", u"матрёшка", u"打水"], api_key=open('api_key.txt').read().strip())
[u'en', u'ru', u'zh-CN']
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)