1.1 --- a/KingOfSat.cs Fri May 15 22:41:33 2015 +0200
1.2 +++ b/KingOfSat.cs Sat May 16 01:31:32 2015 +0200
1.3 @@ -9,148 +9,274 @@
1.4
1.5 namespace SatChanGen
1.6 {
1.7 - class KingOfSat
1.8 - {
1.9 - //
1.10 - // Summary:
1.11 - // Create a new CQ object wrapping a single element.
1.12 - //
1.13 - // Parameters:
1.14 - // aUrl:
1.15 - // URL to a KingOfSat channel list. Typically a package list.
1.16 - //
1.17 - // Return:
1.18 - // List of channels parsed.
1.19 - public static List<Channel> Parse(string aUrl, string aOrbitalPosition)
1.20 - {
1.21 - string kos = new WebClient().DownloadString(aUrl);
1.22 - //Debug.Write(kos);
1.23 + class KingOfSat
1.24 + {
1.25 + //
1.26 + // Summary:
1.27 + // Create a new CQ object wrapping a single element.
1.28 + //
1.29 + // Parameters:
1.30 + // aUrl:
1.31 + // URL to a KingOfSat channel list. Typically a package list.
1.32 + //
1.33 + // Return:
1.34 + // List of channels parsed.
1.35 + public static List<Channel> Parse(List<Channel> aChannels, string aUrl, string aOrbitalPosition)
1.36 + {
1.37 + //To avoid duplicated name
1.38 + Dictionary<string, int> names = new Dictionary<string, int>();
1.39
1.40 - CQ dom = kos;
1.41 + string kos = new WebClient().DownloadString(aUrl);
1.42 + //Debug.Write(kos);
1.43
1.44 - //Get all the Frequency elements in our page
1.45 - CQ sats = dom[".frq"];
1.46 + CQ dom = kos;
1.47
1.48 - //Create our list of channels
1.49 - List<Channel> channels = new List<Channel>();
1.50 + //Get all the Frequency elements in our page
1.51 + CQ sats = dom[".frq"];
1.52
1.53 - foreach (IDomObject frq in sats.ToList())
1.54 - {
1.55 - Channel common = new Channel();
1.56 + //Create our list of channels
1.57 + List<Channel> channels = new List<Channel>();
1.58
1.59 - //Parse channel details
1.60 - common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td > a > font").Get(0).InnerText).Trim();
1.61 - if (common.OrbitalPosition != aOrbitalPosition)
1.62 - {
1.63 + foreach (IDomObject frq in sats.ToList())
1.64 + {
1.65 + Channel common = new Channel();
1.66 +
1.67 + //Parse channel details
1.68 + common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td > a > font").Get(0).InnerText).Trim();
1.69 + if (common.OrbitalPosition != aOrbitalPosition)
1.70 + {
1.71 //Wrong sat, skip
1.72 - continue;
1.73 - }
1.74 - common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
1.75 - common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
1.76 - common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
1.77 - common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
1.78 - common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
1.79 - common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
1.80 - common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
1.81 - common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
1.82 - common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
1.83 - try
1.84 - {
1.85 - common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
1.86 - }
1.87 - catch (Exception)
1.88 - {
1.89 - }
1.90 + continue;
1.91 + }
1.92 + common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
1.93 + common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
1.94 + common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
1.95 + common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
1.96 + common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
1.97 + common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
1.98 + common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
1.99 + common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
1.100 + common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
1.101 + try
1.102 + {
1.103 + common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
1.104 + }
1.105 + catch (Exception)
1.106 + {
1.107 + }
1.108
1.109 - common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
1.110 - if (common.Bitrate.Substring(0, ", ".Length) == ", ")
1.111 - {
1.112 - common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
1.113 - }
1.114 - //
1.115 - common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
1.116 - common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
1.117 - //
1.118 - common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
1.119 - common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
1.120 + common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
1.121 + if (common.Bitrate.Substring(0, ", ".Length) == ", ")
1.122 + {
1.123 + common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
1.124 + }
1.125 + //
1.126 + common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
1.127 + common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
1.128 + //
1.129 + common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
1.130 + common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
1.131
1.132 - //We got common properties for the coming channels
1.133 - //Debug.Write(common.ToString());
1.134 + //We got common properties for the coming channels
1.135 + //Debug.Write(common.ToString());
1.136
1.137 - //Now get all the channels for that frequency
1.138 - //Channel common = new Channel();
1.139 + //Now get all the channels for that frequency
1.140 + //Channel common = new Channel();
1.141
1.142 - CQ channelsDiv = frq.Cq().Next("div");
1.143 - CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
1.144 + CQ channelsDiv = frq.Cq().Next("div");
1.145 + CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
1.146
1.147 - foreach (IDomObject row in channelsTableRows)
1.148 - {
1.149 - Channel channel = new Channel();
1.150 - //Initialize this channel with common properties on this frequency
1.151 + foreach (IDomObject row in channelsTableRows)
1.152 + {
1.153 + Channel channel = new Channel();
1.154 + //Initialize this channel with common properties on this frequency
1.155 channel.Copy(common);
1.156
1.157 - //Try and parse channel name
1.158 - CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
1.159 - if (cqChannelName.Length == 0)
1.160 - {
1.161 - cqChannelName = row.Cq().Find("td:nth-child(3) > i");
1.162 - if (cqChannelName.Length == 0)
1.163 - {
1.164 - //Can't get channel name
1.165 - Debug.Write("WARNING: Can't find channel name! Skipping this channel");
1.166 - continue;
1.167 - }
1.168 - }
1.169 + //Try and parse channel name
1.170 + CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
1.171 + if (cqChannelName.Length == 0)
1.172 + {
1.173 + cqChannelName = row.Cq().Find("td:nth-child(3) > i");
1.174 + if (cqChannelName.Length == 0)
1.175 + {
1.176 + //Can't get channel name
1.177 + Debug.Write("WARNING: Can't find channel name! Skipping this channel");
1.178 + continue;
1.179 + }
1.180 + }
1.181
1.182 - channel.Name = WebUtility.HtmlDecode(cqChannelName.Get(0).InnerText).Trim();
1.183 - if (channel.Name == "Name")
1.184 - {
1.185 - //Skipping header rows
1.186 - continue;
1.187 - }
1.188 + string channelNameInnerText = cqChannelName.Get(0).InnerText;
1.189 + channel.Name = WebUtility.HtmlDecode(channelNameInnerText).Trim();
1.190 + //Encoding decoder = Encoding.UTF8;
1.191 + //channel.Name = decoder.GetString(channel.Name);
1.192 + //channel.Name = channelNameInnerText.Trim(); //Make up your mind :)
1.193 + if (channel.Name == "Name")
1.194 + {
1.195 + //Skipping header rows
1.196 + continue;
1.197 + }
1.198
1.199 - //Make sure our channel name looks descent
1.200 - channel.Name = CleanChannelName(channel.Name);
1.201 + //Make sure our channel name looks descent
1.202 + channel.Name = CleanChannelName(channel.Name);
1.203 + //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
1.204 + if (names.ContainsKey(channel.Name))
1.205 + {
1.206 + names[channel.Name]++;
1.207 + channel.Name += " " + names[channel.Name];
1.208 + }
1.209 + else
1.210 + {
1.211 + names.Add(channel.Name, 1);
1.212 + }
1.213
1.214 - //So we have a channel name get the other properties then
1.215 - channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
1.216 - channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
1.217 - //Skip the packages
1.218 - //Skip the encryptions
1.219 - channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
1.220 - channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
1.221 - //Skip audios
1.222 - channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.223 - channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.224 - channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.225 + //
1.226 + //We don't want channels we already have
1.227 + Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
1.228 + if (existingChannel!=null)
1.229 + {
1.230 + continue;
1.231 + }
1.232
1.233 - //Append that new channel to our list
1.234 - channels.Add(channel);
1.235
1.236 - //Show it in debug output
1.237 - Debug.Write(channel);
1.238 - } //For each channel
1.239 - } //For each frequency
1.240 + //So we have a channel name get the other properties then
1.241 + channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
1.242 + channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
1.243 + if (channel.Category=="")
1.244 + {
1.245 + channel.Category = "Other";
1.246 + }
1.247 + //Skip the packages
1.248 + //Skip the encryptions
1.249 + channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
1.250 + channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
1.251 + //Skip audios
1.252 + channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.253 + channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.254 + channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
1.255
1.256 - return channels;
1.257 - }
1.258 + //Append that new channel to our list
1.259 + channels.Add(channel);
1.260
1.261 - //
1.262 - public static string CleanChannelName(string aName)
1.263 - {
1.264 - aName.Trim();
1.265 - string[] remove = { "Germany", "Deutschland", "(Germany)", "(Deutschland)" };
1.266 + //Show it in debug output
1.267 + Debug.Write(channel);
1.268 + } //For each channel
1.269 + } //For each frequency
1.270
1.271 - foreach (string item in remove)
1.272 - {
1.273 - if (aName.EndsWith(item))
1.274 + return channels;
1.275 + }
1.276 +
1.277 + //
1.278 + public static string CleanChannelName(string aName)
1.279 {
1.280 - aName = aName.Substring(0, aName.LastIndexOf(item));
1.281 - break; //only allow one match at most
1.282 + aName = aName.Trim();
1.283 + string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
1.284 +
1.285 + foreach (string item in remove)
1.286 + {
1.287 + //if (aName.EndsWith(item))
1.288 + if (aName.Contains(item))
1.289 + {
1.290 + aName = aName.Substring(0, aName.LastIndexOf(item));
1.291 + break; //only allow one match at most
1.292 + }
1.293 + }
1.294 + aName = aName.Trim();
1.295 + return aName;
1.296 }
1.297 - }
1.298 - aName.Trim();
1.299 - return aName;
1.300 +
1.301 + //
1.302 + public static List<Channel> CleanChannelList(List<Channel> aChannels)
1.303 + {
1.304 + //Create our list of channels
1.305 + List<Channel> channels = new List<Channel>();
1.306 +
1.307 + foreach (Channel channel in aChannels)
1.308 + {
1.309 + Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
1.310 + if (hdChannel==null
1.311 + && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
1.312 + && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
1.313 + )
1.314 + {
1.315 + //Patch some missing or bad categories
1.316 + if (channel.Name.Contains("Bundesliga")
1.317 + || channel.Name.Contains("Sport"))
1.318 + {
1.319 + channel.Category = "Sport";
1.320 + }
1.321 +
1.322 + if (channel.Name.Contains("Sky Select"))
1.323 + {
1.324 + channel.Category = "Pay per view";
1.325 + }
1.326 +
1.327 +
1.328 + if (channel.Name.StartsWith("Sky Atlantic")
1.329 + || channel.Name.StartsWith("SyFy")
1.330 + || channel.Name.StartsWith("Fox"))
1.331 + {
1.332 + channel.Category = "Series";
1.333 + }
1.334 +
1.335 + if (channel.Name.StartsWith("Sky 3D"))
1.336 + {
1.337 + channel.Category = "Movies";
1.338 + }
1.339 +
1.340 + //Collapse some categories
1.341 + if (channel.Category == "Entertainment"
1.342 + || channel.Category == "Music"
1.343 + || channel.Name.Contains("Music"))
1.344 + {
1.345 + channel.Category = "General";
1.346 + }
1.347 +
1.348 + if (channel.Category == "Porn")
1.349 + {
1.350 + channel.Category = "Erotic";
1.351 + }
1.352 +
1.353 + if (channel.Category == "Presentations")
1.354 + {
1.355 + channel.Category = "News";
1.356 + }
1.357 +
1.358 + if (channel.Category == "History")
1.359 + {
1.360 + channel.Category = "Documentaries";
1.361 + }
1.362 +
1.363 + if (channel.Category == "Lifestyle")
1.364 + {
1.365 + channel.Category = "General";
1.366 + }
1.367 +
1.368 + //if (channel.Category == "Regional")
1.369 + //{
1.370 + // channel.Category = "General";
1.371 + //}
1.372 +
1.373 + if (channel.Category == "Other")
1.374 + {
1.375 + channel.Category = "General";
1.376 + }
1.377 +
1.378 + if (channel.Category == "Cultural")
1.379 + {
1.380 + channel.Category = "General";
1.381 + }
1.382 +
1.383 +
1.384 + //No corresponding HD channel, keep it then
1.385 + channels.Add(channel);
1.386 + }
1.387 + else
1.388 + {
1.389 + Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");
1.390 + }
1.391 + }
1.392 +
1.393 + return channels;
1.394 + }
1.395 }
1.396 - }
1.397 }