KingOfSat.cs
author StephaneLenclud
Sat, 06 Oct 2018 14:07:31 +0200
changeset 9 b77b09f680e7
parent 8 adff2dec03a0
permissions -rw-r--r--
SatIndex grabber now working.
     1 using System;
     2 using System.Collections.Generic;
     3 using System.Linq;
     4 using System.Text;
     5 using CsQuery;
     6 using System.Diagnostics;
     7 using System.Net;
     8 
     9 namespace SatChanGen
    10 {
    11     class KingOfSat
    12     {
    13         //
    14         // Summary:
    15         //     Create a new CQ object wrapping a single element.
    16         //
    17         // Parameters:
    18         //   aUrl:
    19         //     URL to a KingOfSat channel list. Typically a package list.
    20         //
    21         // Return:
    22         //   List of channels parsed.		
    23         public static List<Channel> Parse(List<Channel> aChannels, string aUrl, string aOrbitalPosition, bool aUseChannelIdForName=false, string aCategoryOverride="")
    24         {
    25             //To avoid duplicated name
    26             Dictionary<string, int> names = new Dictionary<string, int>();
    27 
    28             string kos = new WebClient().DownloadString(aUrl);
    29             //Debug.Write(kos);
    30 
    31             CQ dom = kos;
    32 
    33             //Get all the Frequency elements in our page
    34             CQ sats = dom[".frq"];
    35 
    36             //Create our list of channels
    37             List<Channel> channels = new List<Channel>();
    38 
    39             foreach (IDomObject frq in sats.ToList())
    40             {
    41                 Channel common = new Channel();
    42 
    43                 //Parse channel details
    44                 common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td")?.Get(0).InnerText).Trim();
    45                 if (common.OrbitalPosition != aOrbitalPosition)
    46                 {
    47                     //Wrong sat, skip
    48                     continue;
    49                 }
    50                 common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
    51                 common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
    52                 common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
    53                 common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
    54                 common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
    55                 common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
    56                 common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
    57                 common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
    58                 common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
    59                 try
    60                 {
    61                     common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
    62                 }
    63                 catch (Exception)
    64                 {
    65                 }
    66 
    67                 common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
    68                 if (common.Bitrate.Substring(0, ", ".Length) == ", ")
    69                 {
    70                     common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
    71                 }
    72                 //
    73                 common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
    74                 //common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
    75                 //
    76                 common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
    77                 //common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
    78 
    79                 //We got common properties for the coming channels
    80                 //Debug.Write(common.ToString());
    81 
    82                 //Now get all the channels for that frequency
    83                 //Channel common = new Channel();
    84 
    85                 CQ channelsDiv = frq.Cq().Next("div");
    86                 CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
    87 
    88                 foreach (IDomObject row in channelsTableRows)
    89                 {
    90                     Channel channel = new Channel();
    91                     //Initialize this channel with common properties on this frequency
    92                     channel.Copy(common);
    93 
    94                     //Try and parse channel name
    95                     CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
    96                     if (cqChannelName.Length == 0)
    97                     {
    98                         cqChannelName = row.Cq().Find("td:nth-child(3) > i");
    99                         if (cqChannelName.Length == 0)
   100                         {
   101                             //Can't get channel name
   102                             Debug.Write("WARNING: Can't find channel name! Skipping this channel");
   103                             continue;
   104                         }
   105                     }
   106 
   107                     string channelName = "";
   108                     if (cqChannelName.Get(0).HasAttribute("title") && aUseChannelIdForName)
   109                     {
   110                         //We want to use the channel ID
   111                         channelName = cqChannelName.Get(0).GetAttribute("title");
   112                     }
   113                     else
   114                     {
   115                         channelName = cqChannelName.Get(0).InnerText;
   116                     }
   117 
   118                     //Decode HTML
   119                     channel.Name = WebUtility.HtmlDecode(channelName);
   120                     //Convert from default encoding to UTF8
   121                     //We spend a lot of time trying to get this right until we found our answer in the following thread.
   122                     //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
   123                     byte[] bytes = Encoding.Default.GetBytes(channel.Name);
   124                     channel.Name = Encoding.UTF8.GetString(bytes);
   125 
   126 
   127 
   128                     if (channel.Name == "Name" || channel.Name == "Sorted by name")
   129                     {
   130                         //Skipping header rows
   131                         continue;
   132                     }
   133 
   134                     //Make sure our channel name looks descent
   135                     channel.Name = CleanChannelName(channel.Name);
   136                     //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
   137                     if (names.ContainsKey(channel.Name))
   138                     {
   139                         names[channel.Name]++;
   140                         channel.Name += " " + names[channel.Name];
   141                     }
   142                     else
   143                     {
   144                         names.Add(channel.Name, 1);
   145                     }
   146 
   147                     //
   148                     //We don't want channels we already have
   149                     Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
   150                     if (existingChannel != null)
   151                     {
   152                         continue;
   153                     }
   154 
   155 
   156                     //So we have a channel name get the other properties then
   157                     channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
   158                     channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
   159                     if (channel.Category == "")
   160                     {
   161                         channel.Category = "Other";
   162                     }
   163 
   164                     //Override category if needed
   165                     if (aCategoryOverride != "")
   166                     {
   167                         channel.Category = aCategoryOverride;
   168                     }
   169 
   170                     //Skip the packages
   171                     //Skip the encryptions
   172                     channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
   173                     channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
   174                     //Skip audios
   175                     channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   176                     channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   177                     channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
   178 
   179                     //Append that new channel to our list
   180                     channels.Add(channel);
   181 
   182                     //Show it in debug output
   183                     Debug.Write(channel);
   184                 } //For each channel
   185             } //For each frequency
   186 
   187             return channels;
   188         }
   189 
   190         //
   191         public static string CleanChannelName(string aName)
   192         {
   193             aName = aName.Trim();
   194             string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
   195 
   196             foreach (string item in remove)
   197             {
   198                 //if (aName.EndsWith(item))
   199                 if (aName.Contains(item))
   200                 {
   201                     aName = aName.Substring(0, aName.LastIndexOf(item));
   202                     break; //only allow one match at most
   203                 }
   204             }
   205 
   206             string[] removePrefix = { "Id: " };
   207 
   208             foreach (string item in removePrefix)
   209             {
   210                 if (aName.StartsWith(item))
   211                 {
   212                     aName = aName.Substring(item.Length, aName.Length - item.Length);
   213                     break; //only allow one match at most
   214                 }
   215             }
   216 
   217 
   218 
   219             aName = aName.Trim();
   220             return aName;
   221         }
   222 
   223         //
   224         public static List<Channel> CleanChannelList(List<Channel> aChannels)
   225         {
   226             //Create our list of channels
   227             List<Channel> channels = new List<Channel>();
   228 
   229             foreach (Channel channel in aChannels)
   230             {
   231                 Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
   232                 if (hdChannel == null
   233                     && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
   234                     && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
   235                     )
   236                 {
   237                     //Patch some missing or bad categories
   238                     if (channel.Name.Contains("Bundesliga")
   239                         || channel.Name.Contains("Sport"))
   240                     {
   241                         channel.Category = "Sport";
   242                     }
   243 
   244                     if (channel.Name.Contains("Sky Select"))
   245                     {
   246                         channel.Category = "Pay per view";
   247                     }
   248 
   249 
   250                     if (channel.Name.StartsWith("Sky Atlantic")
   251                         || channel.Name.StartsWith("SyFy")
   252                         || channel.Name.StartsWith("Fox"))
   253                     {
   254                         channel.Category = "Series";
   255                     }
   256 
   257                     if (channel.Name.StartsWith("Sky 3D"))
   258                     {
   259                         channel.Category = "Movies";
   260                     }
   261 
   262                     //Collapse some categories
   263                     if (channel.Category == "Entertainment"
   264                         || channel.Category == "Music"
   265                         || channel.Name.Contains("Music"))
   266                     {
   267                         channel.Category = "General";
   268                     }
   269 
   270                     if (channel.Category == "Porn")
   271                     {
   272                         channel.Category = "Erotic";
   273                     }
   274 
   275                     if (channel.Category == "Presentations")
   276                     {
   277                         channel.Category = "News";
   278                     }
   279 
   280                     if (channel.Category == "History")
   281                     {
   282                         channel.Category = "Documentaries";
   283                     }
   284 
   285                     if (channel.Category == "Travel")
   286                     {
   287                         channel.Category = "Documentaries";
   288                     }
   289 
   290 
   291                     if (channel.Category == "Lifestyle")
   292                     {
   293                         channel.Category = "General";
   294                     }
   295 
   296                     //if (channel.Category == "Regional")
   297                     //{
   298                     //    channel.Category = "General";
   299                     //}
   300 
   301                     if (channel.Category == "Other")
   302                     {
   303                         channel.Category = "General";
   304                     }
   305 
   306                     if (channel.Category == "Cultural")
   307                     {
   308                         channel.Category = "General";
   309                     }
   310 
   311                     if (channel.Category == "Movies")
   312                     {
   313                         channel.Category = "Movies & Series";
   314                     }
   315 
   316                     if (channel.Category == "Series")
   317                     {
   318                         channel.Category = "Movies & Series";
   319                     }
   320 
   321 
   322                     //No corresponding HD channel, keep it then
   323                     channels.Add(channel);
   324                 }
   325                 else
   326                 {
   327                     Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");
   328                 }
   329             }
   330 
   331             return channels;
   332         }
   333     }
   334 }