50_TelegramBot: Unicode / Umlaute handling reworked

git-svn-id: https://svn.fhem.de/fhem/trunk@11713 2b470e98-0d58-463d-a4d8-8e2adae1ed80
This commit is contained in:
viegener
2016-06-25 14:33:43 +00:00
parent 10b166bb28
commit 6c75455e8d
2 changed files with 119 additions and 25 deletions

View File

@@ -1,5 +1,6 @@
# Add changes at the top of the list. Keep it in ASCII, and 80-char wide.
# Do not insert empty lines here, update check depends on it.
- bugfix: 50_TelegramBot: Unicode / Umlaute handling reworked
- changed: 30_MilightBridge: Specify port in define
- changed: 31_HUEDevice: allow toggle command for unreachble devices
new ignoreReachable attribute

View File

@@ -136,11 +136,22 @@
# fix for addPar (Caption) on photos in SendIt
# fix for contact list UTF8 encoding on restart
# fix: encoding problem in some environments leading to wrong length calc in httputils (msg457443)
#
# fix: Favorite description without alias name was not parsed correctly
# fix: Favorite alias only handled if really contains more than the /
#
# Complete rework of JSON/UTF8 code to solve timeout and encoding issues
# Add \t for messages texts - will be a single space in the message
# 1.8 2016-05-05 UNicode / Umlaute handling changed, \t added
#
#
##############################################################################
# TASKS
#
# allow literals in msges: U+27F2 - \xe2\x9f\xb2 / Forum msg458794
#
# Look for solution on space at beginning of line --> checked that data is sent correctly to telegram but does not end up in the message
#
# allow keyboards in the device api
@@ -158,7 +169,11 @@ use strict;
use warnings;
#use HttpUtils;
use utf8;
use Encode;
# JSON:XS is used here normally
use JSON;
use File::Basename;
@@ -356,7 +371,6 @@ sub TelegramBot_State($$$$) {
# Log3 $hash->{NAME}, 4, "TelegramBot_State called with :$name: value :$value:";
if ($name eq 'Contacts') {
$value = TelegramBot_PutToUTF8( $value );
TelegramBot_CalcContactsHash( $hash, $value );
Log3 $hash->{NAME}, 4, "TelegramBot_State Contacts hash has now :".scalar(keys %{$hash->{Contacts}}).":";
}
@@ -464,6 +478,7 @@ sub TelegramBot_Set($@)
# for internal testing only
Log3 $name, 5, "TelegramBot_Set $name: start debug option ";
# delete $hash->{sentMsgPeer};
$ret = TelegramBot_SendIt( $hash, AttrVal($name,'defaultPeer',undef), "abc def\n def ghi", undef, 0, undef );
# BOTONLY
@@ -478,7 +493,6 @@ sub TelegramBot_Set($@)
my $arg = join(" ", @args );
Log3 $name, 3, "TelegramBot_Set $name: set new contacts to :$arg: ";
# first set the hash accordingly
$arg = TelegramBot_PutToUTF8( $arg );
TelegramBot_CalcContactsHash($hash, $arg);
# then calculate correct string reading and put this into the reading
@@ -548,7 +562,7 @@ sub TelegramBot_Get($@)
if ( ( defined($guret) ) && ( ref($guret) eq "HASH" ) ) {
if ( defined($guret->{file_path} ) ) {
# URL is https://api.telegram.org/file/bot<token>/<file_path>
my $filePath = TelegramBot_GetUTF8Back( $guret->{file_path} );
my $filePath = $guret->{file_path};
$hash->{fileUrl} = "https://api.telegram.org/file/bot".$hash->{Token}."/".$filePath;
$ret = $hash->{fileUrl};
} else {
@@ -722,8 +736,9 @@ sub TelegramBot_SplitFavoriteDef($$) {
my ( $alias, $desc, $parsecmd, $confirm );
if ( $cmd =~ /^\s*((\/[^\[=]+)?(\[([^\]]+)\])?=)?(\??)(.*)$/ ) {
if ( $cmd =~ /^\s*((\/[^\[=]*)?(\[([^\]]+)\])?=)?(\??)(.*?)$/ ) {
$alias = $2;
$alias = undef if ( $alias && ( $alias eq "/" ) );
$desc = $4;
$confirm = $5;
$parsecmd = $6;
@@ -766,9 +781,11 @@ sub TelegramBot_SentFavorites($$$$) {
my $ecmd = $clist[$cmdId];
my ( $alias, $desc, $parsecmd, $needsConfirm ) = TelegramBot_SplitFavoriteDef( $hash, $ecmd );
$ecmd = $parsecmd;
return "Alias could not be parsed :$ecmd:" if ( ! $parsecmd );
# Debug "Needsconfirm: ". $needsConfirm;
$ecmd = $parsecmd;
# Debug "Needsconfirm: ". $needsConfirm;
if ( ( ! $isConfirm ) && ( $needsConfirm ) ) {
# ask first for confirmation
@@ -776,7 +793,16 @@ sub TelegramBot_SentFavorites($$$$) {
my @tmparr;
my @keys = ();
my @tmparr1 = ( TelegramBot_PutToUTF8( $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd)." ausführen?" ) );
# my @tmparr1 = ( TelegramBot_PutToUTF8( $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd)." ausführen?" ) );
# my @tmparr1 = ( $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd)." ausführen?" );
# my $tmptxt = encode_utf8( $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd)." ausführen?" );
my $tmptxt = $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd).encode_utf8( " ausführen?" );
# my $tmptxt = $fcmd.$cmd."? = ".(($desc)?$desc:$parsecmd);
# Debug "tmptxt :$tmptxt:";
# utf8::upgrade($tmptxt);
# $tmptxt = TelegramBot_PutToUTF8($tmptxt);
# $tmptxt = decode_utf8($tmptxt);
my @tmparr1 = ( $tmptxt );
push( @keys, \@tmparr1 );
my @tmparr2 = ( "Abbruch" );
push( @keys, \@tmparr2 );
@@ -784,7 +810,7 @@ sub TelegramBot_SentFavorites($$$$) {
my $jsonkb = TelegramBot_MakeKeyboard( $hash, 1, @keys );
# LOCAL: External message
$ret = AttrVal( $name, 'textResponseConfirm', 'TelegramBot FHEM : $peer\n Bestätigung \n');
$ret = encode_utf8( AttrVal( $name, 'textResponseConfirm', 'TelegramBot FHEM : $peer\n Bestätigung \n') );
$ret =~ s/\$peer/$mpeernorm/g;
# $ret = "TelegramBot FHEM : ($mpeernorm)\n Bestätigung \n";
@@ -809,10 +835,12 @@ sub TelegramBot_SentFavorites($$$$) {
foreach my $cs ( @clist ) {
$cnt += 1;
my ( $alias, $desc, $parsecmd, $needsConfirm ) = TelegramBot_SplitFavoriteDef( $hash, $cs );
my @tmparr = ( TelegramBot_PutToUTF8( $fcmd.$cnt." = ".($alias?$alias." = ":"").(($desc)?$desc:$parsecmd) ) );
push( @keys, \@tmparr );
if ( defined($parsecmd) ) {
my @tmparr = ( $fcmd.$cnt." = ".($alias?$alias." = ":"").(($desc)?$desc:$parsecmd) );
push( @keys, \@tmparr );
}
}
# my @tmparr = ( TelegramBot_PutToUTF8( $fcmd."0 = Abbruch" ) );
# my @tmparr = ( $fcmd."0 = Abbruch" );
# push( @keys, \@tmparr );
my $jsonkb = TelegramBot_MakeKeyboard( $hash, 1, @keys );
@@ -851,7 +879,7 @@ sub TelegramBot_SentLastCommand($$$) {
my @keys = ();
foreach my $cs ( @cmds ) {
my @tmparr = ( TelegramBot_PutToUTF8( $cs ) );
my @tmparr = ( $cs );
push( @keys, \@tmparr );
}
# my @tmparr = ( $fcmd."0 = Abbruch" );
@@ -1241,14 +1269,20 @@ sub TelegramBot_SendIt($$$$$;$$)
# $TelegramBot_hu_do_params{url} = "http://requestb.in";
## JVI
# Debug "send org msg :".$msg.":";
if ( length($msg) > 1000 ) {
$hash->{sentMsgText} = substr($msg,0, 1000)."...";
} else {
$hash->{sentMsgText} = $msg;
}
my $c = chr(10);
$msg =~ s/([^\\])\\n/$1$c/g;
$msg =~ s/(?<![\\])\\n/\x0A/g;
$msg =~ s/(?<![\\])\\t/\x09/g;
## JVI
# Debug "send conv msg :".$msg.":";
# add msg (no file)
$ret = TelegramBot_AddMultipart($hash, \%TelegramBot_hu_do_params, "text", undef, $msg, 0 ) if ( ! defined( $ret ) );
@@ -1304,12 +1338,20 @@ sub TelegramBot_SendIt($$$$$;$$)
}
## JVI
# Debug "send command :".$TelegramBot_hu_do_params{data}.":";
if ( defined( $ret ) ) {
Log3 $name, 3, "TelegramBot_SendIt $name: Failed with :$ret:";
TelegramBot_Callback( \%TelegramBot_hu_do_params, $ret, "");
} else {
$TelegramBot_hu_do_params{args} = \@args;
# reset UTF8 flag for ensuring length in httputils is correctly handling lenght (as bytes)
# Debug "send a command :".$TelegramBot_hu_do_params{data}.":";
# $TelegramBot_hu_do_params{data} = encode_utf8(decode_utf8($TelegramBot_hu_do_params{data}));
# Debug "send b command :".$TelegramBot_hu_do_params{data}.":";
HttpUtils_NonblockingGet( \%TelegramBot_hu_do_params);
}
@@ -1413,7 +1455,19 @@ sub TelegramBot_MakeKeyboard($$@)
my $refkb = \%par;
$ret = encode_json( $refkb );
# $refkb = TelegramBot_Deepencode( $name, $refkb );
# $ret = encode_json( $refkb );
my $json = JSON->new->utf8;
$ret = $json->utf8(0)->encode( $refkb );
Log3 $name, 4, "TelegramBot_MakeKeyboard $name: json :$ret: is utf8? ".(utf8::is_utf8($ret)?"yes":"no");
if ( utf8::is_utf8($ret) ) {
utf8::downgrade($ret);
Log3 $name, 4, "TelegramBot_MakeKeyboard $name: json downgraded :$ret: is utf8? ".(utf8::is_utf8($ret)?"yes":"no");
}
# Debug "json_keyboard :$ret:";
return $ret;
}
@@ -1489,11 +1543,46 @@ sub TelegramBot_RetrySend($)
my $ref = $param->{args};
Log3 $name, 4, "TelegramBot_Retrysend $name: reply @$ref[4] retry @$ref[5] :@$ref[0]: -:@$ref[1]: ";
Log3 $name, 4, "TelegramBot_Retrysend $name: reply ".(defined( @$ref[4] )?@$ref[4]:"<undef>")." retry @$ref[5] :@$ref[0]: -:@$ref[1]: ";
TelegramBot_SendIt( $hash, @$ref[0], @$ref[1], @$ref[2], @$ref[3], @$ref[4], @$ref[5] );
}
sub TelegramBot_Deepencode
{
my @result;
my $name = shift( @_ );
# Debug "TelegramBot_Deepencode with :".(@_).":";
for (@_) {
my $reftype= ref $_;
if( $reftype eq "ARRAY" ) {
Log3 $name, 4, "TelegramBot_Deepencode $name: found an ARRAY";
push @result, [ TelegramBot_Deepencode($name, @$_) ];
}
elsif( $reftype eq "HASH" ) {
my %h;
@h{keys %$_}= TelegramBot_Deepencode($name, values %$_);
Log3 $name, 4, "TelegramBot_Deepencode $name: found a HASH";
push @result, \%h;
}
else {
my $us = $_ ;
if ( utf8::is_utf8($us) ) {
$us = encode_utf8( $_ );
}
Log3 $name, 4, "TelegramBot_Deepencode $name: encoded a String from :".$_.": to :".$us.":";
push @result, $us;
}
}
return @_ == 1 ? $result[0] : @result;
}
#####################################
# INTERNAL: Callback is the callback for any nonblocking call to the bot api (e.g. the long poll on update call)
@@ -1535,9 +1624,12 @@ sub TelegramBot_Callback($$$)
### mark as latin1 to ensure no conversion is happening (this works surprisingly)
eval {
$data = encode( 'latin1', $data );
# $data = encode( 'latin1', $data );
$data = encode_utf8( $data );
# $data = decode_utf8( $data );
# Debug "-----AFTER------\n".$data."\n-------UC=".${^UNICODE} ."-----\n";
$jo = decode_json( $data );
$jo = TelegramBot_Deepencode( $name, $jo );
};
@@ -1549,7 +1641,7 @@ sub TelegramBot_Callback($$$)
$ret = "Callback returned no valid JSON !";
} elsif ( ! $jo->{ok} ) {
if ( defined( $jo->{description} ) ) {
$ret = "Callback returned error:".TelegramBot_GetUTF8Back( $jo->{description} ).":";
$ret = "Callback returned error:".$jo->{description}.":";
} else {
$ret = "Callback returned error without description";
}
@@ -1802,8 +1894,6 @@ sub TelegramBot_ParseMsg($$$)
if ( defined( $mtext ) ) {
my $mtext = TelegramBot_GetUTF8Back( $mtext );
Log3 $name, 4, "TelegramBot_ParseMsg $name: text :$mtext:";
my $mpeernorm = $mpeer;
@@ -2268,8 +2358,7 @@ sub TelegramBot_encodeContactString($) {
$str =~ s/^\s+|\s+$//g;
$str =~ s/ /_/g;
return TelegramBot_GetUTF8Back( $str );
# return $str;
return $str;
}
#####################################
@@ -2328,7 +2417,9 @@ sub TelegramBot_checkAllowedPeer($$$) {
sub TelegramBot_GetUTF8Back( $ ) {
my ( $data ) = @_;
return encode('utf8', $data);
return $data;
#JVI
# return encode('utf8', $data);
}
@@ -2338,7 +2429,9 @@ sub TelegramBot_GetUTF8Back( $ ) {
sub TelegramBot_PutToUTF8( $ ) {
my ( $data ) = @_;
return decode('utf8', $data);
return $data;
#JVI
# return decode('utf8', $data);
}