KingOfSat.cs
author StephaneLenclud
Sun, 17 May 2015 19:36:48 +0200
changeset 6 3061de2306d9
parent 5 29ccfbf98e54
child 7 bf908f6c7758
permissions -rw-r--r--
Fixing character encoding issues.
StephaneLenclud@1
     1
using System;
StephaneLenclud@1
     2
using System.Collections.Generic;
StephaneLenclud@1
     3
using System.Linq;
StephaneLenclud@1
     4
using System.Text;
StephaneLenclud@1
     5
using CsQuery;
StephaneLenclud@1
     6
using System.Diagnostics;
StephaneLenclud@1
     7
using System.Net;
StephaneLenclud@1
     8
StephaneLenclud@1
     9
namespace SatChanGen
StephaneLenclud@1
    10
{
StephaneLenclud@5
    11
    class KingOfSat
StephaneLenclud@5
    12
    {
StephaneLenclud@5
    13
        //
StephaneLenclud@5
    14
        // Summary:
StephaneLenclud@5
    15
        //     Create a new CQ object wrapping a single element.
StephaneLenclud@5
    16
        //
StephaneLenclud@5
    17
        // Parameters:
StephaneLenclud@5
    18
        //   aUrl:
StephaneLenclud@5
    19
        //     URL to a KingOfSat channel list. Typically a package list.
StephaneLenclud@5
    20
        //
StephaneLenclud@5
    21
        // Return:
StephaneLenclud@5
    22
        //   List of channels parsed.		
StephaneLenclud@5
    23
        public static List<Channel> Parse(List<Channel> aChannels, string aUrl, string aOrbitalPosition)
StephaneLenclud@5
    24
        {
StephaneLenclud@5
    25
            //To avoid duplicated name
StephaneLenclud@5
    26
            Dictionary<string, int> names = new Dictionary<string, int>();
StephaneLenclud@1
    27
StephaneLenclud@5
    28
            string kos = new WebClient().DownloadString(aUrl);
StephaneLenclud@5
    29
            //Debug.Write(kos);
StephaneLenclud@1
    30
StephaneLenclud@5
    31
            CQ dom = kos;
StephaneLenclud@1
    32
StephaneLenclud@5
    33
            //Get all the Frequency elements in our page
StephaneLenclud@5
    34
            CQ sats = dom[".frq"];
StephaneLenclud@1
    35
StephaneLenclud@5
    36
            //Create our list of channels
StephaneLenclud@5
    37
            List<Channel> channels = new List<Channel>();
StephaneLenclud@1
    38
StephaneLenclud@5
    39
            foreach (IDomObject frq in sats.ToList())
StephaneLenclud@5
    40
            {
StephaneLenclud@5
    41
                Channel common = new Channel();
StephaneLenclud@5
    42
StephaneLenclud@5
    43
                //Parse channel details
StephaneLenclud@5
    44
                common.OrbitalPosition = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td > a > font").Get(0).InnerText).Trim();
StephaneLenclud@5
    45
                if (common.OrbitalPosition != aOrbitalPosition)
StephaneLenclud@5
    46
                {
StephaneLenclud@3
    47
                    //Wrong sat, skip
StephaneLenclud@5
    48
                    continue;
StephaneLenclud@5
    49
                }
StephaneLenclud@5
    50
                common.Satellite = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(2) > a").Get(0).InnerText);
StephaneLenclud@5
    51
                common.Frequency = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(3)").Get(0).InnerText);
StephaneLenclud@5
    52
                common.Polarisation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(4)").Get(0).InnerText);
StephaneLenclud@5
    53
                common.Transponder = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(5) > a").Get(0).InnerText);
StephaneLenclud@5
    54
                common.Beam = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(6) > a").Get(0).InnerText);
StephaneLenclud@5
    55
                common.Standard = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(7)").Get(0).InnerText);
StephaneLenclud@5
    56
                common.Modulation = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(8)").Get(0).InnerText);
StephaneLenclud@5
    57
                common.SymbolRate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a").Get(0).InnerText);
StephaneLenclud@5
    58
                common.FEC = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(9) > a:nth-child(2)").Get(0).InnerText);
StephaneLenclud@5
    59
                try
StephaneLenclud@5
    60
                {
StephaneLenclud@5
    61
                    common.Provider = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10) > b").Get(0).InnerText);
StephaneLenclud@5
    62
                }
StephaneLenclud@5
    63
                catch (Exception)
StephaneLenclud@5
    64
                {
StephaneLenclud@5
    65
                }
StephaneLenclud@1
    66
StephaneLenclud@5
    67
                common.Bitrate = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(10)").Get(0).InnerText);
StephaneLenclud@5
    68
                if (common.Bitrate.Substring(0, ", ".Length) == ", ")
StephaneLenclud@5
    69
                {
StephaneLenclud@5
    70
                    common.Bitrate = common.Bitrate.Substring(", ".Length, common.Bitrate.Length - ", ".Length);
StephaneLenclud@5
    71
                }
StephaneLenclud@5
    72
                //
StephaneLenclud@5
    73
                common.NetworkID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(11)").Get(0).InnerText);
StephaneLenclud@5
    74
                common.NetworkID = common.NetworkID.Substring("NID:".Length, common.NetworkID.Length - "NID:".Length);
StephaneLenclud@5
    75
                //
StephaneLenclud@5
    76
                common.TransponderID = WebUtility.HtmlDecode(frq.Cq().Find("tbody > tr > td:nth-child(12)").Get(0).InnerText);
StephaneLenclud@5
    77
                common.TransponderID = common.TransponderID.Substring("TID:".Length, common.TransponderID.Length - "TID:".Length);
StephaneLenclud@1
    78
StephaneLenclud@5
    79
                //We got common properties for the coming channels
StephaneLenclud@5
    80
                //Debug.Write(common.ToString());
StephaneLenclud@1
    81
StephaneLenclud@5
    82
                //Now get all the channels for that frequency
StephaneLenclud@5
    83
                //Channel common = new Channel();
StephaneLenclud@1
    84
StephaneLenclud@5
    85
                CQ channelsDiv = frq.Cq().Next("div");
StephaneLenclud@5
    86
                CQ channelsTableRows = channelsDiv.Find("table.fl > tbody").Children("tr");
StephaneLenclud@1
    87
StephaneLenclud@5
    88
                foreach (IDomObject row in channelsTableRows)
StephaneLenclud@5
    89
                {
StephaneLenclud@5
    90
                    Channel channel = new Channel();
StephaneLenclud@5
    91
                    //Initialize this channel with common properties on this frequency
StephaneLenclud@3
    92
                    channel.Copy(common);
StephaneLenclud@1
    93
StephaneLenclud@5
    94
                    //Try and parse channel name
StephaneLenclud@5
    95
                    CQ cqChannelName = row.Cq().Find("td:nth-child(3) > a");
StephaneLenclud@5
    96
                    if (cqChannelName.Length == 0)
StephaneLenclud@5
    97
                    {
StephaneLenclud@5
    98
                        cqChannelName = row.Cq().Find("td:nth-child(3) > i");
StephaneLenclud@5
    99
                        if (cqChannelName.Length == 0)
StephaneLenclud@5
   100
                        {
StephaneLenclud@5
   101
                            //Can't get channel name
StephaneLenclud@5
   102
                            Debug.Write("WARNING: Can't find channel name! Skipping this channel");
StephaneLenclud@5
   103
                            continue;
StephaneLenclud@5
   104
                        }
StephaneLenclud@5
   105
                    }
StephaneLenclud@1
   106
StephaneLenclud@5
   107
                    string channelNameInnerText = cqChannelName.Get(0).InnerText;
StephaneLenclud@6
   108
                    //We spend a lot of time trying to get this right until we found our answer in the following thread.
StephaneLenclud@6
   109
                    //http://stackoverflow.com/questions/14057434/how-can-i-transform-string-to-utf-8-in-c
StephaneLenclud@6
   110
                    //Decode HTML
StephaneLenclud@6
   111
                    channel.Name = WebUtility.HtmlDecode(channelNameInnerText);
StephaneLenclud@6
   112
                    //Convert from default encoding to UTF8
StephaneLenclud@6
   113
                    byte[] bytes = Encoding.Default.GetBytes(channel.Name);
StephaneLenclud@6
   114
                    channel.Name = Encoding.UTF8.GetString(bytes);
StephaneLenclud@5
   115
                    if (channel.Name == "Name")
StephaneLenclud@5
   116
                    {
StephaneLenclud@5
   117
                        //Skipping header rows
StephaneLenclud@5
   118
                        continue;
StephaneLenclud@5
   119
                    }
StephaneLenclud@1
   120
StephaneLenclud@5
   121
                    //Make sure our channel name looks descent
StephaneLenclud@5
   122
                    channel.Name = CleanChannelName(channel.Name);
StephaneLenclud@5
   123
                    //Make sure the resulting name is unique to avoid having multiple tuning detail for a single channel
StephaneLenclud@5
   124
                    if (names.ContainsKey(channel.Name))
StephaneLenclud@5
   125
                    {
StephaneLenclud@5
   126
                        names[channel.Name]++;
StephaneLenclud@6
   127
                        channel.Name += " " + names[channel.Name];
StephaneLenclud@5
   128
                    }
StephaneLenclud@5
   129
                    else
StephaneLenclud@5
   130
                    {
StephaneLenclud@5
   131
                        names.Add(channel.Name, 1);
StephaneLenclud@5
   132
                    }
StephaneLenclud@4
   133
StephaneLenclud@5
   134
                    //
StephaneLenclud@5
   135
                    //We don't want channels we already have
StephaneLenclud@5
   136
                    Channel existingChannel = aChannels.Find(c => c.Name == channel.Name);
StephaneLenclud@6
   137
                    if (existingChannel != null)
StephaneLenclud@5
   138
                    {
StephaneLenclud@5
   139
                        continue;
StephaneLenclud@5
   140
                    }
StephaneLenclud@1
   141
StephaneLenclud@1
   142
StephaneLenclud@5
   143
                    //So we have a channel name get the other properties then
StephaneLenclud@5
   144
                    channel.Country = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(4)").Get(0).InnerText).Trim();
StephaneLenclud@5
   145
                    channel.Category = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(5)").Get(0).InnerText).Trim();
StephaneLenclud@6
   146
                    if (channel.Category == "")
StephaneLenclud@5
   147
                    {
StephaneLenclud@5
   148
                        channel.Category = "Other";
StephaneLenclud@5
   149
                    }
StephaneLenclud@5
   150
                    //Skip the packages
StephaneLenclud@5
   151
                    //Skip the encryptions
StephaneLenclud@5
   152
                    channel.SID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(8)").Get(0).InnerText).Trim();
StephaneLenclud@5
   153
                    channel.VPID = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(9)").Get(0).InnerText).Trim();
StephaneLenclud@5
   154
                    //Skip audios
StephaneLenclud@5
   155
                    channel.PMT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@5
   156
                    channel.PCR = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@5
   157
                    channel.TXT = WebUtility.HtmlDecode(row.Cq().Find("td:nth-child(11)").Get(0).InnerText).Trim();
StephaneLenclud@1
   158
StephaneLenclud@5
   159
                    //Append that new channel to our list
StephaneLenclud@5
   160
                    channels.Add(channel);
StephaneLenclud@4
   161
StephaneLenclud@5
   162
                    //Show it in debug output
StephaneLenclud@5
   163
                    Debug.Write(channel);
StephaneLenclud@5
   164
                } //For each channel
StephaneLenclud@5
   165
            } //For each frequency
StephaneLenclud@4
   166
StephaneLenclud@5
   167
            return channels;
StephaneLenclud@5
   168
        }
StephaneLenclud@5
   169
StephaneLenclud@5
   170
        //
StephaneLenclud@5
   171
        public static string CleanChannelName(string aName)
StephaneLenclud@4
   172
        {
StephaneLenclud@5
   173
            aName = aName.Trim();
StephaneLenclud@5
   174
            string[] remove = { " Germany", " Deutschland", " (Germany)", " (Deutschland)" };
StephaneLenclud@5
   175
StephaneLenclud@5
   176
            foreach (string item in remove)
StephaneLenclud@5
   177
            {
StephaneLenclud@5
   178
                //if (aName.EndsWith(item))
StephaneLenclud@5
   179
                if (aName.Contains(item))
StephaneLenclud@5
   180
                {
StephaneLenclud@5
   181
                    aName = aName.Substring(0, aName.LastIndexOf(item));
StephaneLenclud@5
   182
                    break; //only allow one match at most
StephaneLenclud@5
   183
                }
StephaneLenclud@5
   184
            }
StephaneLenclud@5
   185
            aName = aName.Trim();
StephaneLenclud@5
   186
            return aName;
StephaneLenclud@4
   187
        }
StephaneLenclud@5
   188
StephaneLenclud@5
   189
        //
StephaneLenclud@5
   190
        public static List<Channel> CleanChannelList(List<Channel> aChannels)
StephaneLenclud@5
   191
        {
StephaneLenclud@5
   192
            //Create our list of channels
StephaneLenclud@5
   193
            List<Channel> channels = new List<Channel>();
StephaneLenclud@5
   194
StephaneLenclud@5
   195
            foreach (Channel channel in aChannels)
StephaneLenclud@5
   196
            {
StephaneLenclud@5
   197
                Channel hdChannel = aChannels.Find(c => c.Name == channel.Name + " HD");
StephaneLenclud@6
   198
                if (hdChannel == null
StephaneLenclud@5
   199
                    && !(channel.Name.Contains("Bundesliga") && !channel.Name.Contains("HD")) //We don't want non HD bundesliga
StephaneLenclud@5
   200
                    && !(channel.Name.StartsWith("Sky Sport") && !channel.Name.Contains("HD")) //We don't want non HD Sky Sport
StephaneLenclud@5
   201
                    )
StephaneLenclud@5
   202
                {
StephaneLenclud@5
   203
                    //Patch some missing or bad categories
StephaneLenclud@5
   204
                    if (channel.Name.Contains("Bundesliga")
StephaneLenclud@5
   205
                        || channel.Name.Contains("Sport"))
StephaneLenclud@5
   206
                    {
StephaneLenclud@5
   207
                        channel.Category = "Sport";
StephaneLenclud@5
   208
                    }
StephaneLenclud@5
   209
StephaneLenclud@5
   210
                    if (channel.Name.Contains("Sky Select"))
StephaneLenclud@5
   211
                    {
StephaneLenclud@5
   212
                        channel.Category = "Pay per view";
StephaneLenclud@5
   213
                    }
StephaneLenclud@5
   214
StephaneLenclud@5
   215
StephaneLenclud@5
   216
                    if (channel.Name.StartsWith("Sky Atlantic")
StephaneLenclud@5
   217
                        || channel.Name.StartsWith("SyFy")
StephaneLenclud@5
   218
                        || channel.Name.StartsWith("Fox"))
StephaneLenclud@5
   219
                    {
StephaneLenclud@5
   220
                        channel.Category = "Series";
StephaneLenclud@5
   221
                    }
StephaneLenclud@5
   222
StephaneLenclud@5
   223
                    if (channel.Name.StartsWith("Sky 3D"))
StephaneLenclud@5
   224
                    {
StephaneLenclud@5
   225
                        channel.Category = "Movies";
StephaneLenclud@5
   226
                    }
StephaneLenclud@5
   227
StephaneLenclud@5
   228
                    //Collapse some categories
StephaneLenclud@5
   229
                    if (channel.Category == "Entertainment"
StephaneLenclud@5
   230
                        || channel.Category == "Music"
StephaneLenclud@5
   231
                        || channel.Name.Contains("Music"))
StephaneLenclud@5
   232
                    {
StephaneLenclud@5
   233
                        channel.Category = "General";
StephaneLenclud@5
   234
                    }
StephaneLenclud@5
   235
StephaneLenclud@5
   236
                    if (channel.Category == "Porn")
StephaneLenclud@5
   237
                    {
StephaneLenclud@5
   238
                        channel.Category = "Erotic";
StephaneLenclud@5
   239
                    }
StephaneLenclud@5
   240
StephaneLenclud@5
   241
                    if (channel.Category == "Presentations")
StephaneLenclud@5
   242
                    {
StephaneLenclud@5
   243
                        channel.Category = "News";
StephaneLenclud@5
   244
                    }
StephaneLenclud@5
   245
StephaneLenclud@5
   246
                    if (channel.Category == "History")
StephaneLenclud@5
   247
                    {
StephaneLenclud@5
   248
                        channel.Category = "Documentaries";
StephaneLenclud@5
   249
                    }
StephaneLenclud@5
   250
StephaneLenclud@5
   251
                    if (channel.Category == "Lifestyle")
StephaneLenclud@5
   252
                    {
StephaneLenclud@5
   253
                        channel.Category = "General";
StephaneLenclud@5
   254
                    }
StephaneLenclud@5
   255
StephaneLenclud@5
   256
                    //if (channel.Category == "Regional")
StephaneLenclud@5
   257
                    //{
StephaneLenclud@5
   258
                    //    channel.Category = "General";
StephaneLenclud@5
   259
                    //}
StephaneLenclud@5
   260
StephaneLenclud@5
   261
                    if (channel.Category == "Other")
StephaneLenclud@5
   262
                    {
StephaneLenclud@5
   263
                        channel.Category = "General";
StephaneLenclud@5
   264
                    }
StephaneLenclud@5
   265
StephaneLenclud@5
   266
                    if (channel.Category == "Cultural")
StephaneLenclud@5
   267
                    {
StephaneLenclud@5
   268
                        channel.Category = "General";
StephaneLenclud@5
   269
                    }
StephaneLenclud@5
   270
StephaneLenclud@5
   271
StephaneLenclud@5
   272
                    //No corresponding HD channel, keep it then
StephaneLenclud@5
   273
                    channels.Add(channel);
StephaneLenclud@5
   274
                }
StephaneLenclud@5
   275
                else
StephaneLenclud@5
   276
                {
StephaneLenclud@5
   277
                    Debug.Write("WARNING: Found HD channel for " + channel.Name + ". Discarding it!\n");
StephaneLenclud@5
   278
                }
StephaneLenclud@5
   279
            }
StephaneLenclud@5
   280
StephaneLenclud@5
   281
            return channels;
StephaneLenclud@5
   282
        }
StephaneLenclud@4
   283
    }
StephaneLenclud@1
   284
}