UPDATE
The Google Translate API has been officially deprecated an alternative is the Microsoft Translator V2, check this article for more details.
In this past post we use the Google translate API v2 to translate from an language to another. today will show how use the current version of this API (v1) which include a nice functionality to detect the language from a given text.
This API can be accessed via JavaScript in a web page or using a HTTP request. in the next sample i will use the second option.
Before to continue you must aware of this warning of Google.
Note: The Google Translate API must be used for user-generated translations. Automated or batched queries of any kind are strictly prohibited.
Here you can find the Terms of Service of this API.
The next code is for educational purposes only.
To detect the language from a text you must make a request to his URI
https://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=Hello+World
As you can see the only required parameter is the encoded text to detect.
the JSON response to this request will be something like this
{ "responseData" : { "confidence" : 0.11489271400000001,
"isReliable" : false,
"language" : "en"
},
"responseDetails" : null,
"responseStatus" : 200
}
the responseStatus contain the result of the operation, the 200 indicate which the language from text was successfully detected.
So now we need to define a few types, functions and constants to make the work more easy.
type
//the supported languages
TGoogleLanguages=
(Autodetect,Afrikaans,Albanian,Arabic,Basque,Belarusian,Bulgarian,Catalan,Chinese,Chinese_Traditional,
Croatian,Czech,Danish,Dutch,English,Estonian,Filipino,Finnish,French,Galician,German,Greek,
Haitian_Creole,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Irish,Italian,Japanese,Latvian,
Lithuanian,Macedonian,Malay,Maltese,Norwegian,Persian,Polish,Portuguese,Romanian,Russian,
Serbian,Slovak,Slovenian,Spanish,Swahili,Swedish,Thai,Turkish,Ukrainian,Vietnamese,Welsh,Yiddish,Unknow);
//the string representation for the enumerated types
const
GoogleLanguagesStr : array[TGoogleLanguages] of string =
('Autodetect','Afrikaans','Albanian','Arabic','Basque','Belarusian','Bulgarian','Catalan','Chinese','Chinese_Traditional',
'Croatian','Czech','Danish','Dutch','English','Estonian','Filipino','Finnish','French','Galician','German','Greek',
'Haitian_Creole','Hebrew','Hindi','Hungarian','Icelandic','Indonesian','Irish','Italian','Japanese','Latvian',
'Lithuanian','Macedonian','Malay','Maltese','Norwegian','Persian','Polish','Portuguese','Romanian','Russian',
'Serbian','Slovak','Slovenian','Spanish','Swahili','Swedish','Thai','Turkish','Ukrainian','Vietnamese','Welsh','Yiddish','Unknow');
//The languages code to be used in HTTP request
GoogleLanguagesArr : array[TGoogleLanguages] of string =
( 'Autodetect','af','sq','ar','eu','be','bg','ca','zh-CN','zh-TW','hr','cs','da','nl','en','et','tl','fi','fr','gl',
'de','el','ht','iw','hi','hu','is','id','ga','it','ja','lv','lt','mk','ms','mt','no','fa','pl','pt',
'ro','ru','sr','sk','sl','es','sw','sv','th','tr','uk','vi','cy','yi','Unknow');
//URI to translate a text using the V1 from the API
GoogleTranslateUrl='https://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s';
//URI to detect the language from a text
GoogleLngDetectUrl='https://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s';
//return a stream containing the HTTP response InternetOpen
procedure WinInet_HttpGet(const Url: string;Stream:TStream);overload;
const
BuffSize = 1024*1024;
var
hInter : HINTERNET;
UrlHandle: HINTERNET;
BytesRead: DWORD;
Buffer : Pointer;
begin
hInter := InternetOpen('', INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);
if Assigned(hInter) then
begin
Stream.Seek(0,0);
GetMem(Buffer,BuffSize);
try
UrlHandle := InternetOpenUrl(hInter, PChar(Url), nil, 0, INTERNET_FLAG_RELOAD, 0);
if Assigned(UrlHandle) then
begin
repeat
InternetReadFile(UrlHandle, Buffer, BuffSize, BytesRead);
if BytesRead>0 then
Stream.WriteBuffer(Buffer^,BytesRead);
until BytesRead = 0;
InternetCloseHandle(UrlHandle);
end;
finally
FreeMem(Buffer);
end;
InternetCloseHandle(hInter);
end
end;
//return a string containing the HTTP response.
function WinInet_HttpGet(const Url: string): string;overload;
Var
StringStream : TStringStream;
begin
Result:='';
StringStream:=TStringStream.Create('',TEncoding.UTF8);
try
WinInet_HttpGet(Url,StringStream);
if StringStream.Size>0 then
begin
StringStream.Seek(0,0);
Result:=StringStream.ReadString(StringStream.Size);
end;
finally
StringStream.Free;
end;
end;
And now to process the response using the DBXJSON unit
function DetectLanguage_DBXJSON(const Text:string):TGoogleLanguages;
var
EncodedRequest: string;
json : TJSONObject;
jPair : TJSONPair;
jValue : TJSONValue;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
json := TJSONObject.ParseJSONValue(TEncoding.ASCII.GetBytes(Response),0) as TJSONObject;
try
jPair := json.Get(2);//get the responseStatus
if jPair.JsonValue.ToString<>'200' then //200 is all ok
Result := Unknow
else
begin
jPair := json.Get(0);
jValue := TJSONObject(jPair.JsonValue).Get(0).JsonValue;
LngStr := jValue.Value;
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
finally
json.Free;
end;
end;
end;
Another alternative using the JSON superobject library
function DetectLanguage_JSONsuperobject(const Text:string):TGoogleLanguages;
var
EncodedRequest: string;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
if SO(Response)['responseStatus'].AsInteger<>200 then //if responseStatus<>200 then exist a error in the response
Result:=Unknow
else
begin
LngStr:=SO(Response)['responseData.language'].AsString;
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
end;
end;
and finally a option without JSON
function DetectLanguage_JSONLess(const Text:string):TGoogleLanguages;
const
TagErr='{"responseData": null,';
TagIOk='{"responseData": {"language":"';
TagFOk='","isReliable":';
var
EncodedRequest: string;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
if StartsStr(TagErr,(Response)) then //Response Error
begin
Result:=Unknow
end
else
begin //Response Ok
LngStr:=StringReplace(Response,TagIOk,'',[rfReplaceAll]);
LngStr:=Copy(LngStr,1,Pos(TagFOk,LngStr)-1);
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
end;
end;
To finish here i leave the full source code from a console application which show all the alternatives to decode the JSON response and as extra include the routine to translate a text using the Google translate API v1.
program GoogleAPITranslateV1;
{$APPTYPE CONSOLE}
{$DEFINE USE_SUPER_OBJECT}
{$DEFINE USE_DBXJSON}
{$DEFINE USE_JSONLess}
uses
Windows
,Classes
,WinInet
,Activex
,HTTPApp
,SysUtils
{$IFDEF USE_JSONLess}
,StrUtils
{$ENDIF}
{$IFDEF USE_SUPER_OBJECT}
,superobject
{$ENDIF}
{$IFDEF USE_DBXJSON}
,DBXJSON
{$ENDIF}
;
type
TGoogleLanguages=
(Autodetect,Afrikaans,Albanian,Arabic,Basque,Belarusian,Bulgarian,Catalan,Chinese,Chinese_Traditional,
Croatian,Czech,Danish,Dutch,English,Estonian,Filipino,Finnish,French,Galician,German,Greek,
Haitian_Creole,Hebrew,Hindi,Hungarian,Icelandic,Indonesian,Irish,Italian,Japanese,Latvian,
Lithuanian,Macedonian,Malay,Maltese,Norwegian,Persian,Polish,Portuguese,Romanian,Russian,
Serbian,Slovak,Slovenian,Spanish,Swahili,Swedish,Thai,Turkish,Ukrainian,Vietnamese,Welsh,Yiddish,Unknow);
const
GoogleLanguagesStr : array[TGoogleLanguages] of string =
('Autodetect','Afrikaans','Albanian','Arabic','Basque','Belarusian','Bulgarian','Catalan','Chinese','Chinese_Traditional',
'Croatian','Czech','Danish','Dutch','English','Estonian','Filipino','Finnish','French','Galician','German','Greek',
'Haitian_Creole','Hebrew','Hindi','Hungarian','Icelandic','Indonesian','Irish','Italian','Japanese','Latvian',
'Lithuanian','Macedonian','Malay','Maltese','Norwegian','Persian','Polish','Portuguese','Romanian','Russian',
'Serbian','Slovak','Slovenian','Spanish','Swahili','Swedish','Thai','Turkish','Ukrainian','Vietnamese','Welsh','Yiddish','Unknow');
GoogleLanguagesArr : array[TGoogleLanguages] of string =
( 'Autodetect','af','sq','ar','eu','be','bg','ca','zh-CN','zh-TW','hr','cs','da','nl','en','et','tl','fi','fr','gl',
'de','el','ht','iw','hi','hu','is','id','ga','it','ja','lv','lt','mk','ms','mt','no','fa','pl','pt',
'ro','ru','sr','sk','sl','es','sw','sv','th','tr','uk','vi','cy','yi','Unknow');
GoogleTranslateUrl='https://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s';
//http://code.google.com/apis/language/translate/v1/using_rest_translate.html
GoogleLngDetectUrl='https://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=%s';
procedure WinInet_HttpGet(const Url: string;Stream:TStream);overload;
const
BuffSize = 1024*1024;
var
hInter : HINTERNET;
UrlHandle: HINTERNET;
BytesRead: DWORD;
Buffer : Pointer;
begin
hInter := InternetOpen('Mozilla/3.0', INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);
if Assigned(hInter) then
begin
Stream.Seek(0,0);
GetMem(Buffer,BuffSize);
try
UrlHandle := InternetOpenUrl(hInter, PChar(Url), nil, 0, INTERNET_FLAG_RELOAD, 0);
if Assigned(UrlHandle) then
begin
repeat
InternetReadFile(UrlHandle, Buffer, BuffSize, BytesRead);
if BytesRead>0 then
Stream.WriteBuffer(Buffer^,BytesRead);
until BytesRead = 0;
InternetCloseHandle(UrlHandle);
end;
finally
FreeMem(Buffer);
end;
InternetCloseHandle(hInter);
end
end;
function WinInet_HttpGet(const Url: string): string;overload;
Var
StringStream : TStringStream;
begin
Result:='';
StringStream:=TStringStream.Create('',TEncoding.UTF8);
try
WinInet_HttpGet(Url,StringStream);
if StringStream.Size>0 then
begin
StringStream.Seek(0,0);
Result:=StringStream.ReadString(StringStream.Size);
end;
finally
StringStream.Free;
end;
end;
{$IFDEF USE_SUPER_OBJECT}
function DetectLanguage_JSONsuperobject(const Text:string):TGoogleLanguages;
var
EncodedRequest: string;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
if SO(Response)['responseStatus'].AsInteger<>200 then //if responseStatus<>200 then exist a error in the response
Result:=Unknow
else
begin
LngStr:=SO(Response)['responseData.language'].AsString;
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
end;
end;
function Translate_JSONsuperobject(const Text:string;Source,Dest:TGoogleLanguages):string;
var
EncodedRequest: string;
Response : string;
begin
Result:='';
if Source=Autodetect then
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),'%7C'+GoogleLanguagesArr[Dest]])
else
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),GoogleLanguagesArr[Source]+'%7C'+GoogleLanguagesArr[Dest]]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
if SO(Response)['responseData'].AsObject=nil then //if the first element is null then ocurrs an error
Result:=Format('Error Code %d %s',[SO(Response)['responseStatus'].AsInteger,SO(Response)['responseDetails'].AsString])
else
Result:=SO(Response)['responseData.translatedText'].AsString;
end;
end;
{$ENDIF}
{$IFDEF USE_DBXJSON}
function DetectLanguage_DBXJSON(const Text:string):TGoogleLanguages;
var
EncodedRequest: string;
json : TJSONObject;
jPair : TJSONPair;
jValue : TJSONValue;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
json := TJSONObject.ParseJSONValue(TEncoding.ASCII.GetBytes(Response),0) as TJSONObject;
try
jPair := json.Get(2);//get the responseStatus
if jPair.JsonValue.ToString<>'200' then //200 is all ok
Result := Unknow
else
begin
jPair := json.Get(0);
jValue := TJSONObject(jPair.JsonValue).Get(0).JsonValue;
LngStr := jValue.Value;
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
finally
json.Free;
end;
end;
end;
function Translate_DBXJSON(const Text:string;Source,Dest:TGoogleLanguages):string;
var
EncodedRequest: string;
json : TJSONObject;
jPair : TJSONPair;
jValue : TJSONValue;
Response : string;
begin
Result:='';
if Source=Autodetect then
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),'%7C'+GoogleLanguagesArr[Dest]])
else
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),GoogleLanguagesArr[Source]+'%7C'+GoogleLanguagesArr[Dest]]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
json := TJSONObject.ParseJSONValue(TEncoding.ASCII.GetBytes(Response),0) as TJSONObject;
try
jPair := json.Get(2);//get the responseStatus
if jPair.JsonValue.ToString<>'200' then //200 is all ok
//{"responseData": null, "responseDetails": "invalid translation language pair", "responseStatus": 400}
Result := Format('Error Code %s message %s',[json.Get(2).JsonValue.ToString,json.Get(1).JsonValue.ToString])
else
begin
jPair := json.Get(0);
jValue := TJSONObject(jPair.JsonValue).Get(0).JsonValue;
Result := jValue.ToString;
end;
finally
json.Free;
end;
Result:=HTMLDecode(Result);
end;
end;
{$ENDIF}
{$IFDEF USE_JSONLess}
function DetectLanguage_JSONLess(const Text:string):TGoogleLanguages;
const
TagErr='{"responseData": null,';
TagIOk='{"responseData": {"language":"';
TagFOk='","isReliable":';
var
EncodedRequest: string;
Response : string;
Lng : TGoogleLanguages;
LngStr : string;
begin
Result:=Unknow;
EncodedRequest:=Format(GoogleLngDetectUrl,[HTTPEncode(Text)]);
Response:=WinInet_HttpGet(EncodedRequest); //{"responseData": {"language":"en","isReliable":false,"confidence":0.114892714},"responseDetails": null, "responseStatus": 200}
if Response<>'' then
begin
if StartsStr(TagErr,(Response)) then //Response Error
begin
Result:=Unknow
end
else
begin //Response Ok
LngStr:=StringReplace(Response,TagIOk,'',[rfReplaceAll]);
LngStr:=Copy(LngStr,1,Pos(TagFOk,LngStr)-1);
for lng:=Low(TGoogleLanguages) to High(TGoogleLanguages) do
if GoogleLanguagesArr[Lng]=LngStr then
begin
Result:=lng;
exit;
end;
end;
end;
end;
function Translate_JSONLess(const Text:string;Source,Dest:TGoogleLanguages):string;
const
TagErr='{"responseData": null,';
TagIOk='{"responseData": {"translatedText":"';
TagAut=',"detectedSourceLanguage":"';
TagFOk='"}, "responseDetails":';
var
EncodedRequest: string;
Response : string;
begin
Result:='';
if Source=Autodetect then
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),'%7C'+GoogleLanguagesArr[Dest]])
else
EncodedRequest:=Format(GoogleTranslateUrl,[HTTPEncode(Text),GoogleLanguagesArr[Source]+'%7C'+GoogleLanguagesArr[Dest]]);
Response:=WinInet_HttpGet(EncodedRequest);
if Response<>'' then
begin
if StartsStr(TagErr,(Response)) then //Response Error
begin
Result:='Error'
end
else
begin //Response Ok
if Source=Autodetect then
begin
Result:=StringReplace(Response,TagIOk,'',[rfReplaceAll]);
Result:=Copy(Result,1,Pos(TagAut,Result)-2);
end
else
begin
Result:=StringReplace(Response,TagIOk,'',[rfReplaceAll]);
Result:=Copy(Result,1,Pos(TagFOk,Result)-1);
end;
end;
Result:=HTMLDecode(Result);
end;
end;
{$ENDIF}
Const
Text ='Hello World';
TextEn ='Hello World';
TextEs ='Hola Mundo';
Var
TranslatedText : string;
begin
try
CoInitialize(nil);
try
{$IFDEF USE_JSONLess}
Writeln('Without JSON (very ugly)');
Writeln('');
TranslatedText:=Translate_JSONLess(Text,Autodetect,Spanish);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONLess(Text,English,Chinese_Traditional);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONLess(Text,English,German);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONLess(Text,English,Danish);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONLess(Text,English,Portuguese);
Writeln(TranslatedText);
Writeln('');
Writeln('Detecting language Without JSON');
Writeln('');
Writeln(Format('language detected for "%s" : %s',[TextEn,GoogleLanguagesStr[DetectLanguage_JSONLess(TextEn)]]));
Writeln(Format('language detected for "%s" : %s',[TextEs,GoogleLanguagesStr[DetectLanguage_JSONLess(TextEs)]]));
{$ENDIF}
{$IFDEF USE_SUPER_OBJECT}
Writeln('Using the superobject library');
Writeln('');
TranslatedText:=Translate_JSONsuperobject(Text,Autodetect,Spanish);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONsuperobject(Text,English,Chinese_Traditional);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONsuperobject(Text,English,German);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONsuperobject(Text,English,Danish);
Writeln(TranslatedText);
TranslatedText:=Translate_JSONsuperobject(Text,English,Portuguese);
Writeln(TranslatedText);
Writeln('');
Writeln('Detecting language using the superobject library');
Writeln('');
Writeln(Format('language detected for "%s" : %s',[TextEn,GoogleLanguagesStr[DetectLanguage_JSONsuperobject(TextEn)]]));
Writeln(Format('language detected for "%s" : %s',[TextEs,GoogleLanguagesStr[DetectLanguage_JSONsuperobject(TextEs)]]));
{$ENDIF}
{$IFDEF USE_DBXJSON}
Writeln('Using the DBXJSON unit');
Writeln('');
TranslatedText:=Translate_DBXJSON(Text,Autodetect,Spanish);
Writeln(TranslatedText);
TranslatedText:=Translate_DBXJSON(Text,English,Chinese_Traditional);
Writeln(TranslatedText);
TranslatedText:=Translate_DBXJSON(Text,English,German);
Writeln(TranslatedText);
TranslatedText:=Translate_DBXJSON(Text,English,Danish);
Writeln(TranslatedText);
TranslatedText:=Translate_DBXJSON(Text,English,Portuguese);
Writeln(TranslatedText);
Writeln('');
Writeln('Detecting language using the DBXJSON unit');
Writeln('');
Writeln(Format('language detected for "%s" : %s',[TextEn,GoogleLanguagesStr[DetectLanguage_DBXJSON(TextEn)]]));
Writeln(Format('language detected for "%s" : %s',[TextEs,GoogleLanguagesStr[DetectLanguage_DBXJSON(TextEs)]]));
{$ENDIF}
finally
CoUninitialize;
end;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
Readln;
end.
-33.636934
-70.679350