SatIndex grabber now working.
3 using System.Collections.Generic;
4 using System.Diagnostics;
8 using System.Text.RegularExpressions;
9 using System.Threading.Tasks;
15 public static Channel ParseChannel(string aUrl)
17 Channel channel = new Channel();
19 string satIndex = new WebClient().DownloadString(aUrl);
20 //Debug.Write(satIndex);
23 channel.Name = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Name')").Next().Text());
24 //Convert from default encoding to UTF8
25 //We spend a lot of time trying to get this right until we found our answer in the following thread.
26 //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
27 //byte[] bytes = Encoding.Default.GetBytes(channel.Name);
28 //channel.Name = Encoding.UTF8.GetString(bytes);
30 channel.Satellite = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Satellit')").Next().Text());
31 channel.OrbitalPosition = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Position')").Next().Text());
32 // Frequency, remove dots and unit
33 channel.Frequency = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Frequenz')").Next().Text());
34 channel.Frequency = channel.Frequency.Replace(" MHz", "").Replace(".", "");
35 // Just get 'H' or 'V' I guess
36 channel.Polarisation = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Polarisation')").Next().Text()).Substring(0,1);
37 channel.Transponder = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Transponder:')").Next().Text());
38 channel.TransponderID = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Transponder ID')").Next().Text()).Replace(".", "");
39 channel.Beam = "Astra";
40 channel.Standard = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Art')").Next().Text());
41 channel.Modulation = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Modulation')").Next().Text());
42 channel.SymbolRate = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Symbolrate')").Next().Text());
43 channel.SymbolRate = channel.SymbolRate.Replace(" kSym/s", "").Replace(".", "");
44 channel.FEC = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('FEC')").Next().Text());
45 channel.Provider = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Provider')").Next().Text());
47 channel.Bitrate = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Video Bitrate')").Next().Text());
48 channel.NetworkID = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Netzwerk ID')").Next().Text());
50 channel.SID = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Service ID')").Next().Text());
51 channel.VPID = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Video Pid')").Next().Text());
52 channel.PCR = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('PCR Pid')").Next().Text());
53 channel.PMT = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('PMT Pid')").Next().Text());
54 channel.TXT = WebUtility.HtmlDecode(dom.Find("td.chdet1:contains('Videotext Pid')").Next().Text()).Replace(" (kein Videotext)","");
56 // We should get 4 entries:
61 CQ properties = dom.Find(".standart2");
62 channel.Category = WebUtility.HtmlDecode(properties[0].InnerText).Trim();
63 channel.Country = WebUtility.HtmlDecode(properties[1].InnerText).Trim();
69 //*[@id="container"]/div[2]/table[3]/tbody/tr/td/table[2]/tbody/tr/td
70 public static List<Channel> Parse(IProgress<ProgressReport> aProgress, List<Channel> aChannels, string aUrl, string aOrbitalPosition, bool aUseChannelIdForName = false, string aCategoryOverride = "")
72 //Create our list of channels
73 List<Channel> channels = new List<Channel>();
74 //To avoid duplicated name
75 Dictionary<string, int> names = new Dictionary<string, int>();
77 string satIndex = new WebClient().DownloadString(aUrl);
78 //Debug.Write(satIndex);
82 CQ channelsTd = dom.Find(".freq1");
84 ProgressReport report = new ProgressReport();
85 report.Max = channelsTd.Count();
87 aProgress.Report(report);
89 foreach ( IDomObject td in channelsTd)
91 string channelUrl = "https://www.satindex.de" + td.FirstChild.GetAttribute("href");
93 Channel channel = ParseChannel(channelUrl);
95 //Make sure our channel name looks descent
96 channel.Name = CleanChannelName(channel.Name);
97 //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
98 if (names.ContainsKey(channel.Name))
100 names[channel.Name]++;
101 channel.Name += " " + names[channel.Name];
105 names.Add(channel.Name, 1);
108 // Add it to our collection
109 channels.Add(channel);
112 aProgress.Report(report);
118 //Get all the Frequency tables in our page
119 // Why is this not working?
120 //CQ sats = dom["#container > div:nth-child(2) > table:nth-child(16) > tbody > tr > td > table:nth-child(8) > tbody > tr > td > table"];
121 // As a workaround we did the following
122 CQ sats = dom["#container"]["div:nth-child(2)"]["table:nth-child(16)"]["tbody"]["tr"]["td"]["table:nth-child(8)"]["tbody"]["tr"]["td"]["table"];
124 List<IDomObject> transponders = sats.ToList();
128 foreach (IDomObject frq in transponders)
130 Channel common = new Channel();
132 //Parse channel details
133 //common.OrbitalPosition = aOrbitalPosition;
134 //string wsm1 = WebUtility.HtmlDecode(frq.Cq().Find(".wsm1").Get(0).InnerText).Trim();
137 common.Satellite = "Astra 19.2° East";
138 common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
139 common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
140 common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
141 common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
142 common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
143 common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
144 common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
145 common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
148 common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
154 common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
155 if (common.Bitrate.Substring(0, ", ".Length) == ", ")
157 common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
160 common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
161 //common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
163 common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
164 //common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
166 //We got common properties for the coming channels
167 //Debug.Write(common.ToString());
169 //Now get all the channels for that frequency
170 //Channel common = new Channel();
173 CQ channelsTableRows = frq.Cq().Find("tbody").Children("tr");
176 //CQ channelsDiv = frq.Cq().Next("div");
177 //CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
179 foreach (IDomObject row in channelsTableRows)
181 Channel channel = new Channel();
182 //Initialize this channel with common properties on this frequency
183 channel.Copy(common);
185 //Try and parse channel name
186 CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
187 if (cqChannelName.Length == 0)
189 cqChannelName = row.Cq().Find("td:nth-child(3) > i");
190 if (cqChannelName.Length == 0)
192 //Can't get channel name
193 Debug.Write("WARNING: Can't find channel name! Skipping this channel");
198 string channelName = "";
199 if (cqChannelName.Get(0).HasAttribute("title") && aUseChannelIdForName)
201 //We want to use the channel ID
202 channelName = cqChannelName.Get(0).GetAttribute("title");
206 channelName = cqChannelName.Get(0).InnerText;
210 channel.Name = WebUtility.HtmlDecode(channelName);
211 //Convert from default encoding to UTF8
212 //We spend a lot of time trying to get this right until we found our answer in the following thread.
213 //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
214 byte[] bytes = Encoding.Default.GetBytes(channel.Name);
215 channel.Name = Encoding.UTF8.GetString(bytes);
219 if (channel.Name == "Name" || channel.Name == "Sorted by name")
221 //Skipping header rows
225 //Make sure our channel name looks descent
226 channel.Name = CleanChannelName(channel.Name);
227 //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
228 if (names.ContainsKey(channel.Name))
230 names[channel.Name]++;
231 channel.Name += " " + names[channel.Name];
235 names.Add(channel.Name, 1);
239 //We don't want channels we already have
240 Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
241 if (existingChannel != null)
247 //So we have a channel name get the other properties then
248 channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
249 channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
250 if (channel.Category == "")
252 channel.Category = "Other";
255 //Override category if needed
256 if (aCategoryOverride != "")
258 channel.Category = aCategoryOverride;
262 //Skip the encryptions
263 channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
264 channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
266 channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
267 channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
268 channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
270 //Append that new channel to our list
271 channels.Add(channel);
273 //Show it in debug output
274 Debug.Write(channel);
276 } //For each frequency
282 public static string CleanChannelName(string aName)
284 aName = aName.Trim();
285 string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
287 foreach (string item in remove)
289 //if (aName.EndsWith(item))
290 if (aName.Contains(item))
292 aName = aName.Substring(0, aName.LastIndexOf(item));
293 break; //only allow one match at most
297 string[] removePrefix = { "Id: " };
299 foreach (string item in removePrefix)
301 if (aName.StartsWith(item))
303 aName = aName.Substring(item.Length, aName.Length - item.Length);
304 break; //only allow one match at most
310 aName = aName.Trim();
315 public static List<Channel> CleanChannelList(List<Channel> aChannels)
317 //Create our list of channels
318 List<Channel> channels = new List<Channel>();
320 foreach (Channel channel in aChannels)
322 Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
323 if (hdChannel == null
324 && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
325 && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
329 if (channel.Category == "Allgemein"
330 && channel.Name.Contains("Sky"))
332 channel.Category = "Movies & Series";
335 if (channel.Name == "SYFY HD")
337 channel.Category = "Movies & Series";
340 // Patch Bundesliga channel names by removing Sport, cause they are way too long names
341 if (channel.Name.Contains("Bundesliga"))
343 channel.Name = channel.Name.Replace("Sport ", "");
346 //Patch some missing or bad categories
347 if (channel.Name.Contains("Bundesliga")
348 || channel.Name.Contains("Sport"))
350 channel.Category = "Sport";
353 if (channel.Name.Contains("Sky Select"))
355 channel.Category = "Pay per view";
358 if (channel.Name.Contains("TNT")
359 || channel.Name.Contains("13th"))
361 channel.Category = "Movies & Series";
364 if (channel.Category =="Kinderprogramm")
366 channel.Category = "Kids";
369 if (channel.Category == "Hinweistafel")
371 channel.Category = "General";
374 if (channel.Name.StartsWith("Sky Atlantic")
375 || channel.Name.StartsWith("SyFy")
376 || channel.Name.StartsWith("Fox"))
378 channel.Category = "Series";
381 //Collapse some categories
382 if (channel.Category == "Entertainment"
383 || channel.Category == "Kultur"
384 || channel.Category == "Verschiedenes")
386 channel.Category = "General";
389 if (channel.Category == "Musik"
390 || channel.Name.Contains("Music")
391 || channel.Name.Contains("Musik"))
393 channel.Category = "Music";
398 if (channel.Category == "Porn"
399 || channel.Category == "Erotik"
400 || channel.Name.Contains("Blue Movie")
401 || Regex.IsMatch(channel.Name,"Sex", RegexOptions.IgnoreCase)
402 || Regex.IsMatch(channel.Name, "Erotik", RegexOptions.IgnoreCase)
403 || Regex.IsMatch(channel.Name, "Girl", RegexOptions.IgnoreCase)
404 || Regex.IsMatch(channel.Name, "Eros", RegexOptions.IgnoreCase)
405 || Regex.IsMatch(channel.Name, "Gay", RegexOptions.IgnoreCase)
406 || Regex.IsMatch(channel.Name, "frauen", RegexOptions.IgnoreCase)
407 || Regex.IsMatch(channel.Name, "Maenner", RegexOptions.IgnoreCase)
408 || Regex.IsMatch(channel.Name, "bunny", RegexOptions.IgnoreCase)
409 || Regex.IsMatch(channel.Name, "date", RegexOptions.IgnoreCase)
412 channel.Category = "Erotic";
415 if (channel.Category == "Presentations"
416 || channel.Category == "Nachrichten")
418 channel.Category = "News";
421 if (channel.Category == "History"
422 || channel.Category == "Dokus / Reportagen")
424 channel.Category = "Documentaries";
427 if (channel.Category == "Travel"
428 || channel.Category == "Urlaub / Reisen")
430 channel.Category = "Documentaries";
434 if (channel.Category == "Lifestyle"
435 || channel.Category == "Allgemein"
436 || channel.Category == "Other"
437 || channel.Category == "Cultural")
439 channel.Category = "General";
442 if (channel.Category == "Movies"
443 || channel.Category == "Spielfilme")
445 channel.Category = "Movies & Series";
448 if (channel.Category == "Series")
450 channel.Category = "Movies & Series";
453 if (channel.Category == "Regional Programm")
455 channel.Category = "Regional";
458 //No corresponding HD channel, keep it then
459 channels.Add(channel);
463 Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");