KingOfSat.cs
author StephaneLenclud
Sat, 06 Oct 2018 14:07:31 +0200
changeset 9 b77b09f680e7
parent 8 adff2dec03a0
permissions -rw-r--r--
SatIndex grabber now working.
StephaneLenclud@1
     1
using System;
StephaneLenclud@1
     2
using System.Collections.Generic;
StephaneLenclud@1
     3
using System.Linq;
StephaneLenclud@1
     4
using System.Text;
StephaneLenclud@1
     5
using CsQuery;
StephaneLenclud@1
     6
using System.Diagnostics;
StephaneLenclud@1
     7
using System.Net;
StephaneLenclud@1
     8
StephaneLenclud@1
     9
namespace SatChanGen
StephaneLenclud@1
    10
{
StephaneLenclud@5
    11
    class KingOfSat
StephaneLenclud@5
    12
    {
StephaneLenclud@5
    13
        //
StephaneLenclud@5
    14
        // Summary:
StephaneLenclud@5
    15
        //     Create a new CQ object wrapping a single element.
StephaneLenclud@5
    16
        //
StephaneLenclud@5
    17
        // Parameters:
StephaneLenclud@5
    18
        //   aUrl:
StephaneLenclud@5
    19
        //     URL to a KingOfSat channel list. Typically a package list.
StephaneLenclud@5
    20
        //
StephaneLenclud@5
    21
        // Return:
StephaneLenclud@5
    22
        //   List of channels parsed.		
StephaneLenclud@7
    23
        public static List<Channel> Parse(List<Channel> aChannels, string aUrl, string aOrbitalPosition, bool aUseChannelIdForName=false, string aCategoryOverride="")
StephaneLenclud@5
    24
        {
StephaneLenclud@5
    25
            //To avoid duplicated name
StephaneLenclud@5
    26
            Dictionary<string, int> names = new Dictionary<string, int>();
StephaneLenclud@1
    27
StephaneLenclud@5
    28
            string kos = new WebClient().DownloadString(aUrl);
StephaneLenclud@5
    29
            //Debug.Write(kos);
StephaneLenclud@1
    30
StephaneLenclud@5
    31
            CQ dom = kos;
StephaneLenclud@1
    32
StephaneLenclud@5
    33
            //Get all the Frequency elements in our page
StephaneLenclud@5
    34
            CQ sats = dom[".frq"];
StephaneLenclud@1
    35
StephaneLenclud@5
    36
            //Create our list of channels
StephaneLenclud@5
    37
            List<Channel> channels = new List<Channel>();
StephaneLenclud@1
    38
StephaneLenclud@5
    39
            foreach (IDomObject frq in sats.ToList())
StephaneLenclud@5
    40
            {
StephaneLenclud@5
    41
                Channel common = new Channel();
StephaneLenclud@5
    42
StephaneLenclud@5
    43
                //Parse channel details
StephaneLenclud@8
    44
                common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td")?.Get(0).InnerText).Trim();
StephaneLenclud@5
    45
                if (common.OrbitalPosition != aOrbitalPosition)
StephaneLenclud@5
    46
                {
StephaneLenclud@3
    47
                    //Wrong sat, skip
StephaneLenclud@5
    48
                    continue;
StephaneLenclud@5
    49
                }
StephaneLenclud@5
    50
                common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
StephaneLenclud@5
    51
                common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
StephaneLenclud@5
    52
                common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
StephaneLenclud@5
    53
                common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
StephaneLenclud@5
    54
                common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
StephaneLenclud@5
    55
                common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
StephaneLenclud@5
    56
                common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
StephaneLenclud@5
    57
                common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
StephaneLenclud@5
    58
                common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
StephaneLenclud@5
    59
                try
StephaneLenclud@5
    60
                {
StephaneLenclud@5
    61
                    common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
StephaneLenclud@5
    62
                }
StephaneLenclud@5
    63
                catch (Exception)
StephaneLenclud@5
    64
                {
StephaneLenclud@5
    65
                }
StephaneLenclud@1
    66
StephaneLenclud@5
    67
                common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
StephaneLenclud@5
    68
                if (common.Bitrate.Substring(0, ", ".Length) == ", ")
StephaneLenclud@5
    69
                {
StephaneLenclud@5
    70
                    common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
StephaneLenclud@5
    71
                }
StephaneLenclud@5
    72
                //
StephaneLenclud@5
    73
                common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
StephaneLenclud@8
    74
                //common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
StephaneLenclud@5
    75
                //
StephaneLenclud@5
    76
                common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
StephaneLenclud@8
    77
                //common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
StephaneLenclud@1
    78
StephaneLenclud@5
    79
                //We got common properties for the coming channels
StephaneLenclud@5
    80
                //Debug.Write(common.ToString());
StephaneLenclud@1
    81
StephaneLenclud@5
    82
                //Now get all the channels for that frequency
StephaneLenclud@5
    83
                //Channel common = new Channel();
StephaneLenclud@1
    84
StephaneLenclud@5
    85
                CQ channelsDiv = frq.Cq().Next("div");
StephaneLenclud@5
    86
                CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
StephaneLenclud@1
    87
StephaneLenclud@5
    88
                foreach (IDomObject row in channelsTableRows)
StephaneLenclud@5
    89
                {
StephaneLenclud@5
    90
                    Channel channel = new Channel();
StephaneLenclud@5
    91
                    //Initialize this channel with common properties on this frequency
StephaneLenclud@3
    92
                    channel.Copy(common);
StephaneLenclud@1
    93
StephaneLenclud@5
    94
                    //Try and parse channel name
StephaneLenclud@5
    95
                    CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
StephaneLenclud@5
    96
                    if (cqChannelName.Length == 0)
StephaneLenclud@5
    97
                    {
StephaneLenclud@5
    98
                        cqChannelName = row.Cq().Find("td:nth-child(3) > i");
StephaneLenclud@5
    99
                        if (cqChannelName.Length == 0)
StephaneLenclud@5
   100
                        {
StephaneLenclud@5
   101
                            //Can't get channel name
StephaneLenclud@5
   102
                            Debug.Write("WARNING: Can't find channel name! Skipping this channel");
StephaneLenclud@5
   103
                            continue;
StephaneLenclud@5
   104
                        }
StephaneLenclud@5
   105
                    }
StephaneLenclud@1
   106
StephaneLenclud@7
   107
                    string channelName = "";
StephaneLenclud@7
   108
                    if (cqChannelName.Get(0).HasAttribute("title") && aUseChannelIdForName)
StephaneLenclud@7
   109
                    {
StephaneLenclud@7
   110
                        //We want to use the channel ID
StephaneLenclud@7
   111
                        channelName = cqChannelName.Get(0).GetAttribute("title");
StephaneLenclud@7
   112
                    }
StephaneLenclud@7
   113
                    else
StephaneLenclud@7
   114
                    {
StephaneLenclud@7
   115
                        channelName = cqChannelName.Get(0).InnerText;
StephaneLenclud@7
   116
                    }
StephaneLenclud@7
   117
StephaneLenclud@6
   118
                    //Decode HTML
StephaneLenclud@7
   119
                    channel.Name = WebUtility.HtmlDecode(channelName);
StephaneLenclud@6
   120
                    //Convert from default encoding to UTF8
StephaneLenclud@8
   121
                    //We spend a lot of time trying to get this right until we found our answer in the following thread.
StephaneLenclud@8
   122
                    //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
StephaneLenclud@6
   123
                    byte[] bytes = Encoding.Default.GetBytes(channel.Name);
StephaneLenclud@6
   124
                    channel.Name = Encoding.UTF8.GetString(bytes);
StephaneLenclud@7
   125
StephaneLenclud@7
   126
StephaneLenclud@7
   127
StephaneLenclud@7
   128
                    if (channel.Name == "Name" || channel.Name == "Sorted by name")
StephaneLenclud@5
   129
                    {
StephaneLenclud@5
   130
                        //Skipping header rows
StephaneLenclud@5
   131
                        continue;
StephaneLenclud@5
   132
                    }
StephaneLenclud@1
   133
StephaneLenclud@5
   134
                    //Make sure our channel name looks descent
StephaneLenclud@5
   135
                    channel.Name = CleanChannelName(channel.Name);
StephaneLenclud@5
   136
                    //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
StephaneLenclud@5
   137
                    if (names.ContainsKey(channel.Name))
StephaneLenclud@5
   138
                    {
StephaneLenclud@5
   139
                        names[channel.Name]++;
StephaneLenclud@6
   140
                        channel.Name += " " + names[channel.Name];
StephaneLenclud@5
   141
                    }
StephaneLenclud@5
   142
                    else
StephaneLenclud@5
   143
                    {
StephaneLenclud@5
   144
                        names.Add(channel.Name, 1);
StephaneLenclud@5
   145
                    }
StephaneLenclud@4
   146
StephaneLenclud@5
   147
                    //
StephaneLenclud@5
   148
                    //We don't want channels we already have
StephaneLenclud@5
   149
                    Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
StephaneLenclud@6
   150
                    if (existingChannel != null)
StephaneLenclud@5
   151
                    {
StephaneLenclud@5
   152
                        continue;
StephaneLenclud@5
   153
                    }
StephaneLenclud@1
   154
StephaneLenclud@1
   155
StephaneLenclud@5
   156
                    //So we have a channel name get the other properties then
StephaneLenclud@5
   157
                    channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
StephaneLenclud@5
   158
                    channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
StephaneLenclud@6
   159
                    if (channel.Category == "")
StephaneLenclud@5
   160
                    {
StephaneLenclud@5
   161
                        channel.Category = "Other";
StephaneLenclud@5
   162
                    }
StephaneLenclud@7
   163
StephaneLenclud@7
   164
                    //Override category if needed
StephaneLenclud@7
   165
                    if (aCategoryOverride != "")
StephaneLenclud@7
   166
                    {
StephaneLenclud@7
   167
                        channel.Category = aCategoryOverride;
StephaneLenclud@7
   168
                    }
StephaneLenclud@7
   169
StephaneLenclud@5
   170
                    //Skip the packages
StephaneLenclud@5
   171
                    //Skip the encryptions
StephaneLenclud@5
   172
                    channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
StephaneLenclud@5
   173
                    channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
StephaneLenclud@5
   174
                    //Skip audios
StephaneLenclud@5
   175
                    channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@5
   176
                    channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@5
   177
                    channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@1
   178
StephaneLenclud@5
   179
                    //Append that new channel to our list
StephaneLenclud@5
   180
                    channels.Add(channel);
StephaneLenclud@4
   181
StephaneLenclud@5
   182
                    //Show it in debug output
StephaneLenclud@5
   183
                    Debug.Write(channel);
StephaneLenclud@5
   184
                } //For each channel
StephaneLenclud@5
   185
            } //For each frequency
StephaneLenclud@4
   186
StephaneLenclud@5
   187
            return channels;
StephaneLenclud@5
   188
        }
StephaneLenclud@5
   189
StephaneLenclud@5
   190
        //
StephaneLenclud@5
   191
        public static string CleanChannelName(string aName)
StephaneLenclud@4
   192
        {
StephaneLenclud@5
   193
            aName = aName.Trim();
StephaneLenclud@5
   194
            string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
StephaneLenclud@5
   195
StephaneLenclud@5
   196
            foreach (string item in remove)
StephaneLenclud@5
   197
            {
StephaneLenclud@5
   198
                //if (aName.EndsWith(item))
StephaneLenclud@5
   199
                if (aName.Contains(item))
StephaneLenclud@5
   200
                {
StephaneLenclud@5
   201
                    aName = aName.Substring(0, aName.LastIndexOf(item));
StephaneLenclud@5
   202
                    break; //only allow one match at most
StephaneLenclud@5
   203
                }
StephaneLenclud@5
   204
            }
StephaneLenclud@7
   205
StephaneLenclud@7
   206
            string[] removePrefix = { "Id: " };
StephaneLenclud@7
   207
StephaneLenclud@7
   208
            foreach (string item in removePrefix)
StephaneLenclud@7
   209
            {
StephaneLenclud@7
   210
                if (aName.StartsWith(item))
StephaneLenclud@7
   211
                {
StephaneLenclud@7
   212
                    aName = aName.Substring(item.Length, aName.Length - item.Length);
StephaneLenclud@7
   213
                    break; //only allow one match at most
StephaneLenclud@7
   214
                }
StephaneLenclud@7
   215
            }
StephaneLenclud@7
   216
StephaneLenclud@7
   217
StephaneLenclud@7
   218
StephaneLenclud@5
   219
            aName = aName.Trim();
StephaneLenclud@5
   220
            return aName;
StephaneLenclud@4
   221
        }
StephaneLenclud@5
   222
StephaneLenclud@5
   223
        //
StephaneLenclud@5
   224
        public static List<Channel> CleanChannelList(List<Channel> aChannels)
StephaneLenclud@5
   225
        {
StephaneLenclud@5
   226
            //Create our list of channels
StephaneLenclud@5
   227
            List<Channel> channels = new List<Channel>();
StephaneLenclud@5
   228
StephaneLenclud@5
   229
            foreach (Channel channel in aChannels)
StephaneLenclud@5
   230
            {
StephaneLenclud@5
   231
                Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
StephaneLenclud@6
   232
                if (hdChannel == null
StephaneLenclud@5
   233
                    && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
StephaneLenclud@5
   234
                    && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
StephaneLenclud@5
   235
                    )
StephaneLenclud@5
   236
                {
StephaneLenclud@5
   237
                    //Patch some missing or bad categories
StephaneLenclud@5
   238
                    if (channel.Name.Contains("Bundesliga")
StephaneLenclud@5
   239
                        || channel.Name.Contains("Sport"))
StephaneLenclud@5
   240
                    {
StephaneLenclud@5
   241
                        channel.Category = "Sport";
StephaneLenclud@5
   242
                    }
StephaneLenclud@5
   243
StephaneLenclud@5
   244
                    if (channel.Name.Contains("Sky Select"))
StephaneLenclud@5
   245
                    {
StephaneLenclud@5
   246
                        channel.Category = "Pay per view";
StephaneLenclud@5
   247
                    }
StephaneLenclud@5
   248
StephaneLenclud@5
   249
StephaneLenclud@5
   250
                    if (channel.Name.StartsWith("Sky Atlantic")
StephaneLenclud@5
   251
                        || channel.Name.StartsWith("SyFy")
StephaneLenclud@5
   252
                        || channel.Name.StartsWith("Fox"))
StephaneLenclud@5
   253
                    {
StephaneLenclud@5
   254
                        channel.Category = "Series";
StephaneLenclud@5
   255
                    }
StephaneLenclud@5
   256
StephaneLenclud@5
   257
                    if (channel.Name.StartsWith("Sky 3D"))
StephaneLenclud@5
   258
                    {
StephaneLenclud@5
   259
                        channel.Category = "Movies";
StephaneLenclud@5
   260
                    }
StephaneLenclud@5
   261
StephaneLenclud@5
   262
                    //Collapse some categories
StephaneLenclud@5
   263
                    if (channel.Category == "Entertainment"
StephaneLenclud@5
   264
                        || channel.Category == "Music"
StephaneLenclud@5
   265
                        || channel.Name.Contains("Music"))
StephaneLenclud@5
   266
                    {
StephaneLenclud@5
   267
                        channel.Category = "General";
StephaneLenclud@5
   268
                    }
StephaneLenclud@5
   269
StephaneLenclud@5
   270
                    if (channel.Category == "Porn")
StephaneLenclud@5
   271
                    {
StephaneLenclud@5
   272
                        channel.Category = "Erotic";
StephaneLenclud@5
   273
                    }
StephaneLenclud@5
   274
StephaneLenclud@5
   275
                    if (channel.Category == "Presentations")
StephaneLenclud@5
   276
                    {
StephaneLenclud@5
   277
                        channel.Category = "News";
StephaneLenclud@5
   278
                    }
StephaneLenclud@5
   279
StephaneLenclud@5
   280
                    if (channel.Category == "History")
StephaneLenclud@5
   281
                    {
StephaneLenclud@5
   282
                        channel.Category = "Documentaries";
StephaneLenclud@5
   283
                    }
StephaneLenclud@5
   284
StephaneLenclud@9
   285
                    if (channel.Category == "Travel")
StephaneLenclud@9
   286
                    {
StephaneLenclud@9
   287
                        channel.Category = "Documentaries";
StephaneLenclud@9
   288
                    }
StephaneLenclud@9
   289
StephaneLenclud@9
   290
StephaneLenclud@5
   291
                    if (channel.Category == "Lifestyle")
StephaneLenclud@5
   292
                    {
StephaneLenclud@5
   293
                        channel.Category = "General";
StephaneLenclud@5
   294
                    }
StephaneLenclud@5
   295
StephaneLenclud@5
   296
                    //if (channel.Category == "Regional")
StephaneLenclud@5
   297
                    //{
StephaneLenclud@5
   298
                    //    channel.Category = "General";
StephaneLenclud@5
   299
                    //}
StephaneLenclud@5
   300
StephaneLenclud@5
   301
                    if (channel.Category == "Other")
StephaneLenclud@5
   302
                    {
StephaneLenclud@5
   303
                        channel.Category = "General";
StephaneLenclud@5
   304
                    }
StephaneLenclud@5
   305
StephaneLenclud@5
   306
                    if (channel.Category == "Cultural")
StephaneLenclud@5
   307
                    {
StephaneLenclud@5
   308
                        channel.Category = "General";
StephaneLenclud@5
   309
                    }
StephaneLenclud@5
   310
StephaneLenclud@7
   311
                    if (channel.Category == "Movies")
StephaneLenclud@7
   312
                    {
StephaneLenclud@7
   313
                        channel.Category = "Movies & Series";
StephaneLenclud@7
   314
                    }
StephaneLenclud@7
   315
StephaneLenclud@7
   316
                    if (channel.Category == "Series")
StephaneLenclud@7
   317
                    {
StephaneLenclud@7
   318
                        channel.Category = "Movies & Series";
StephaneLenclud@7
   319
                    }
StephaneLenclud@7
   320
StephaneLenclud@5
   321
StephaneLenclud@5
   322
                    //No corresponding HD channel, keep it then
StephaneLenclud@5
   323
                    channels.Add(channel);
StephaneLenclud@5
   324
                }
StephaneLenclud@5
   325
                else
StephaneLenclud@5
   326
                {
StephaneLenclud@5
   327
                    Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");
StephaneLenclud@5
   328
                }
StephaneLenclud@5
   329
            }
StephaneLenclud@5
   330
StephaneLenclud@5
   331
            return channels;
StephaneLenclud@5
   332
        }
StephaneLenclud@4
   333
    }
StephaneLenclud@1
   334
}