KingOfSat.cs
author StephaneLenclud
Sun, 17 May 2015 20:35:37 +0200
changeset 7 bf908f6c7758
parent 6 3061de2306d9
child 8 adff2dec03a0
permissions -rw-r--r--
Adding category override option, ID usage for name, hiding channels starting
with '.'. Folding Movies and Series in a single category. Adding channel prefix
removel.
     1 using System;
     2 using System.Collections.Generic;
     3 using System.Linq;
     4 using System.Text;
     5 using CsQuery;
     6 using System.Diagnostics;
     7 using System.Net;
     8 
     9 namespace SatChanGen
    10 {
    11     class KingOfSat
    12     {
    13         //
    14         // Summary:
    15         //     Create a new CQ object wrapping a single element.
    16         //
    17         // Parameters:
    18         //   aUrl:
    19         //     URL to a KingOfSat channel list. Typically a package list.
    20         //
    21         // Return:
    22         //   List of channels parsed.		
    23         public static List<Channel> Parse(List<Channel> aChannels, string aUrl, string aOrbitalPosition, bool aUseChannelIdForName=false, string aCategoryOverride="")
    24         {
    25             //To avoid duplicated name
    26             Dictionary<string, int> names = new Dictionary<string, int>();
    27 
    28             string kos = new WebClient().DownloadString(aUrl);
    29             //Debug.Write(kos);
    30 
    31             CQ dom = kos;
    32 
    33             //Get all the Frequency elements in our page
    34             CQ sats = dom[".frq"];
    35 
    36             //Create our list of channels
    37             List<Channel> channels = new List<Channel>();
    38 
    39             foreach (IDomObject frq in sats.ToList())
    40             {
    41                 Channel common = new Channel();
    42 
    43                 //Parse channel details
    44                 common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td > a > font").Get(0).InnerText).Trim();
    45                 if (common.OrbitalPosition != aOrbitalPosition)
    46                 {
    47                     //Wrong sat, skip
    48                     continue;
    49                 }
    50                 common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
    51                 common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
    52                 common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
    53                 common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
    54                 common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
    55                 common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
    56                 common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
    57                 common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
    58                 common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
    59                 try
    60                 {
    61                     common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
    62                 }
    63                 catch (Exception)
    64                 {
    65                 }
    66 
    67                 common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
    68                 if (common.Bitrate.Substring(0, ", ".Length) == ", ")
    69                 {
    70                     common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
    71                 }
    72                 //
    73                 common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
    74                 common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
    75                 //
    76                 common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
    77                 common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
    78 
    79                 //We got common properties for the coming channels
    80                 //Debug.Write(common.ToString());
    81 
    82                 //Now get all the channels for that frequency
    83                 //Channel common = new Channel();
    84 
    85                 CQ channelsDiv = frq.Cq().Next("div");
    86                 CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
    87 
    88                 foreach (IDomObject row in channelsTableRows)
    89                 {
    90                     Channel channel = new Channel();
    91                     //Initialize this channel with common properties on this frequency
    92                     channel.Copy(common);
    93 
    94                     //Try and parse channel name
    95                     CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
    96                     if (cqChannelName.Length == 0)
    97                     {
    98                         cqChannelName = row.Cq().Find("td:nth-child(3) > i");
    99                         if (cqChannelName.Length == 0)
   100                         {
   101                             //Can't get channel name
   102                             Debug.Write("WARNING: Can't find channel name! Skipping this channel");
   103                             continue;
   104                         }
   105                     }
   106 
   107                     string channelName = "";
   108                     if (cqChannelName.Get(0).HasAttribute("title") && aUseChannelIdForName)
   109                     {
   110                         //We want to use the channel ID
   111                         channelName = cqChannelName.Get(0).GetAttribute("title");
   112                     }
   113                     else
   114                     {
   115                         channelName = cqChannelName.Get(0).InnerText;
   116                     }
   117 
   118                     //We spend a lot of time trying to get this right until we found our answer in the following thread.
   119                     //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
   120                     //Decode HTML
   121                     channel.Name = WebUtility.HtmlDecode(channelName);
   122                     //Convert from default encoding to UTF8
   123                     byte[] bytes = Encoding.Default.GetBytes(channel.Name);
   124                     channel.Name = Encoding.UTF8.GetString(bytes);
   125 
   126 
   127 
   128                     if (channel.Name == "Name" || channel.Name == "Sorted by name")
   129                     {
   130                         //Skipping header rows
   131                         continue;
   132                     }
   133 
   134                     //Make sure our channel name looks descent
   135                     channel.Name = CleanChannelName(channel.Name);
   136                     //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
   137                     if (names.ContainsKey(channel.Name))
   138                     {
   139                         names[channel.Name]++;
   140                         channel.Name += " " + names[channel.Name];
   141                     }
   142                     else
   143                     {
   144                         names.Add(channel.Name, 1);
   145                     }
   146 
   147                     //
   148                     //We don't want channels we already have
   149                     Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
   150                     if (existingChannel != null)
   151                     {
   152                         continue;
   153                     }
   154 
   155 
   156                     //So we have a channel name get the other properties then
   157                     channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
   158                     channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
   159                     if (channel.Category == "")
   160                     {
   161                         channel.Category = "Other";
   162                     }
   163 
   164                     //Override category if needed
   165                     if (aCategoryOverride != "")
   166                     {
   167                         channel.Category = aCategoryOverride;
   168                     }
   169 
   170                     //Skip the packages
   171                     //Skip the encryptions
   172                     channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
   173                     channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
   174                     //Skip audios
   175                     channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   176                     channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   177                     channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   178 
   179                     //Append that new channel to our list
   180                     channels.Add(channel);
   181 
   182                     //Show it in debug output
   183                     Debug.Write(channel);
   184                 } //For each channel
   185             } //For each frequency
   186 
   187             return channels;
   188         }
   189 
   190         //
   191         public static string CleanChannelName(string aName)
   192         {
   193             aName = aName.Trim();
   194             string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
   195 
   196             foreach (string item in remove)
   197             {
   198                 //if (aName.EndsWith(item))
   199                 if (aName.Contains(item))
   200                 {
   201                     aName = aName.Substring(0, aName.LastIndexOf(item));
   202                     break; //only allow one match at most
   203                 }
   204             }
   205 
   206             string[] removePrefix = { "Id: " };
   207 
   208             foreach (string item in removePrefix)
   209             {
   210                 if (aName.StartsWith(item))
   211                 {
   212                     aName = aName.Substring(item.Length, aName.Length - item.Length);
   213                     break; //only allow one match at most
   214                 }
   215             }
   216 
   217 
   218 
   219             aName = aName.Trim();
   220             return aName;
   221         }
   222 
   223         //
   224         public static List<Channel> CleanChannelList(List<Channel> aChannels)
   225         {
   226             //Create our list of channels
   227             List<Channel> channels = new List<Channel>();
   228 
   229             foreach (Channel channel in aChannels)
   230             {
   231                 Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
   232                 if (hdChannel == null
   233                     && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
   234                     && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
   235                     )
   236                 {
   237                     //Patch some missing or bad categories
   238                     if (channel.Name.Contains("Bundesliga")
   239                         || channel.Name.Contains("Sport"))
   240                     {
   241                         channel.Category = "Sport";
   242                     }
   243 
   244                     if (channel.Name.Contains("Sky Select"))
   245                     {
   246                         channel.Category = "Pay per view";
   247                     }
   248 
   249 
   250                     if (channel.Name.StartsWith("Sky Atlantic")
   251                         || channel.Name.StartsWith("SyFy")
   252                         || channel.Name.StartsWith("Fox"))
   253                     {
   254                         channel.Category = "Series";
   255                     }
   256 
   257                     if (channel.Name.StartsWith("Sky 3D"))
   258                     {
   259                         channel.Category = "Movies";
   260                     }
   261 
   262                     //Collapse some categories
   263                     if (channel.Category == "Entertainment"
   264                         || channel.Category == "Music"
   265                         || channel.Name.Contains("Music"))
   266                     {
   267                         channel.Category = "General";
   268                     }
   269 
   270                     if (channel.Category == "Porn")
   271                     {
   272                         channel.Category = "Erotic";
   273                     }
   274 
   275                     if (channel.Category == "Presentations")
   276                     {
   277                         channel.Category = "News";
   278                     }
   279 
   280                     if (channel.Category == "History")
   281                     {
   282                         channel.Category = "Documentaries";
   283                     }
   284 
   285                     if (channel.Category == "Lifestyle")
   286                     {
   287                         channel.Category = "General";
   288                     }
   289 
   290                     //if (channel.Category == "Regional")
   291                     //{
   292                     //    channel.Category = "General";
   293                     //}
   294 
   295                     if (channel.Category == "Other")
   296                     {
   297                         channel.Category = "General";
   298                     }
   299 
   300                     if (channel.Category == "Cultural")
   301                     {
   302                         channel.Category = "General";
   303                     }
   304 
   305                     if (channel.Category == "Movies")
   306                     {
   307                         channel.Category = "Movies & Series";
   308                     }
   309 
   310                     if (channel.Category == "Series")
   311                     {
   312                         channel.Category = "Movies & Series";
   313                     }
   314 
   315 
   316                     //No corresponding HD channel, keep it then
   317                     channels.Add(channel);
   318                 }
   319                 else
   320                 {
   321                     Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");
   322                 }
   323             }
   324 
   325             return channels;
   326         }
   327     }
   328 }