Oops, forgot the scripts;-)
Pjotr Kourzanov wrote:
Dear all,
Having been dissatisfied with performance and features of xmltv2vdr (too slow, no credits/category information in epg.data) I have created a new script (based on AWK), that can be found in the attachment.
If you want to try it, put your channels.conf.xmltv in one folder with these two scripts, and then run cat *.xml | ./xmltv2epg > epg.
My mileage: 54MB XML -> 18MB EPG in 44 minuties (xmltv2vdr used to take 9 hours).
Pjotr
vdr mailing list vdr@linuxtv.org http://www.linuxtv.org/cgi-bin/mailman/listinfo/vdr
#!/bin/sh # external dependencies date="/bin/date" sed="/bin/sed" grep="/bin/grep" recode='/usr/bin/recode -f "$in_charset..$charset"'
# input data dir=`dirname $0` channels="$dir/channels.conf.xmltv"
# miscellaneous htmlnorm='s,<[^<>]*>,\n&\n,g'
in_tz="Europe/Amsterdam" export in_charset=utf8
export charset=utf8 export LC_ALL=ru_RU.UTF-8 export TZ="Europe/Amsterdam"
xmltv2epg_awk="$dir/xmltv2epg.awk"
$sed -e $htmlnorm | $grep -v "^[ \t\r\n]*$" | $xmltv2epg_awk -vchannels="$channels" -vbindate="$date" | eval $recode
#!/usr/bin/awk -f BEGIN { stderr="/dev/stderr" load_channels(channels) #curid="" separator["desc"]="|" separator["display-name"]=separator["title"]=separator["category"]=" / " separator["adapter"]=separator["presenter"]=separator["director"]=separator["actor"]=", " }
#{ print >stderr } #/^<[^ \t\r\n]+/ { handled=0 }
/^<rating/ { rating_system=gensub(/.*system="([^"]*)".*/,"\1","g") }
/^<subtitles/ { handled=1 data["subtitles"]=gensub(/.*type="([^"]*)".*/,"\1","g") }
/^<channel/ { handled=1 id=gensub(/.*id="([^"]*)".*/,"\1","g") data["display-name"]="" }
/^</channel>/ { handled=1 names[id]=data["display-name"] if (!channel_src[id]) { print "NO SRC for",id > stderr next } events[id]=1 }
/^<programme/ { handled=1 id=gensub(/.*channel="([^"]*)".*/,"\1","g") if (!events[id]) next
#if (id!=curid && curid) { # curid=id #}
if (events[id]==1) { if (in_channel) print "c" print "C",channel_src[id],names[id] in_channel=1 }
start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\1-\2-\3 \4:\5:\6 \7","g") if (start==$0) start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\1-\2-\3 \4:\5 \7","g")
stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\1-\2-\3 \4:\5:\6 \7","g") if (stop==$0) stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\1-\2-\3 \4:\5 \7","g")
#print id,start,stop if (stop==$0) stop=start
cmd=sprintf("%s -d '%s' +'%%s';"\ "%s -d '%s' +'%%s'", bindate,start, bindate,stop)
i=1 while (cmd | getline line) dates[i++]=line close(cmd)
if (dates[2]==dates[1]) dates[2]=dates[1]+60*60*3
print "E",events[id],dates[1],dates[2]-dates[1]
delete data delete curtag }
/^</programme/ { handled=1 if (!events[id]) next events[id]++ if(data["title"]) print "T",data["title"] if(data["category"]) print "S",data["category"] oOFS=OFS; OFS="|" ext="" for (t in curtag) ext=append(ext,t) print "D "\ (data["sub-title"] ? data["sub-title"] : "")\ (data["episode-num"] ? (data["sub-title"] ? ", " : "") "part " gensub(/ . (.*) . /,"\1","g",data["episode-num"]) : "")\ (data["country"] ? data["country"] " " : "")\ (data["date"] ? data["date"] " " : "")\ (data["premiere"] ? "(premiere in " data["premiere"] ")" : ""), (data["desc"] ? data["desc"] : ""),"|", (ext ? ext " ": "")\ (data["audio"] ? data["audio"] " ": "")\ (data["video"] ? data["video"] " ": "")\ (data["colour"]=="no" ? " BW " : "")\ (data["subtitles"] ? data["subtitles"] " ": ""), (data["adapter"] ? "|Adapter: " data["adapter"] : "")\ (data["presenter"] ? "|Presenter: " data["presenter"] : "")\ (data["director"] ? "|Director: " data["director"] : "")\ (data["actor"] ? "|Actor(s): " data["actor"] : "")\ (data["writer"] ? "|Writer(s): " data["writer"] : ""), (data["rating"] ? "|rating " data["rating"] (rating_system ? "(" rating_system ")" : "") : "")\ (data["star-rating"] ? "|" data["star-rating"] : "") OFS=oOFS print "e" }
!handled && /^<[^ \t\r\n]+.*/>/ { tag=gensub(/^<([^ \t\r\n]+).*>/,"\1","g") #print "bool tag:",tag curtag[tag]=1 next }
!handled && /^<[^ \t\r\n]+.*>/ { tag=gensub(/^<([^ \t\r\n]+).*>/,"\1","g") if (tag ~ /^//) { tag=substr(tag,2) value=0 delete active[tag] } else active[tag]=1 #tags[tag]=1 #print "tag:",tag,value next }
!handled { for (t in active) data[t]=append(data[t],txtconv($0),separator[t]) }
END { print "c" #for (t in tags) print t > stderr } function load_channels(channels, a,i,n,chsrc,b,c,j,m) { oFS=FS; FS=":" while (getline < channels) if ($0 !~ /^:/) { name=$1; freq=$2; mode=$3; src=$4 sr=$5; vpid=$6; apid=$7; tpid=$8 ca=$9; spid=$10; npid=$11; tid=$12 rid=$13; esrc=$14 n=split(esrc,a,";") for (i=1; i<=n; i++) { chsrc=src "-" npid "-" tid "-" spid if (a[i] ~ /^vsetv=/) { split(a[i],b,"=") m=split(b[2],c,",") for (j=1; j<=m; j++) { print "vsetv:",chsrc,c[j] > stderr channel_src[c[j]]=append(channel_src[c[j]],chsrc) channel_ch[chsrc]=append(channel_ch[chsrc],c[j]) } } else { m=split(a[i],c,",") for (j=1; j<=m; j++) { print "xmltv:",chsrc,c[j] > stderr #channel_src[c[j]]=append(channel_src[c[j]],chsrc) channel_src[c[j]]=chsrc channel_ch[chsrc]=append(channel_ch[chsrc],c[j]) } } } } FS=oFS } function append(a,b,c) { return a (a ? (c ? c : " ") : "") b } function txtconv(a) { a=gensub("&","\&","g",a) a=gensub(""",""","g",a) a=gensub("'","'","g",a) a=gensub("<","<","g",a) a=gensub(">;",">","g",a) a=gensub(":",":","g",a) a=gensub("ö","รถ","g",a) a=gensub("’","'","g",a) return a }