
    gE                     6   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZ  ej>                  e       Z!de"fdZ#d Z$d Z%de"de&fdZ'de"de"e"ffdZ(de"de"fdZ)de"de"fdZ*de"de"fdZ+de"de"de"fdZ,de"de"de"fdZ-dede"fdZ.de"de"fdZ/d e"dee"ee"   f   fd!Z0de"de	fd"Z1de"de	fd#Z2de"de"fd$Z3d%e
d&e
d'e"ddfd(Z4d)e
ddfd*Z5de"de"fd+Z6de"de"fd,Z7dee   fd-Z8y).zBThis module contains all non-cipher related data extraction logic.    N)OrderedDict)datetime)AnyDictListOptionalTuple)parse_qsquote	urlencodeurlparse)Cipher)HTMLParseErrorLiveStreamErrorRegexMatchErrorregex_search)YouTubeMetadata)parse_for_objectparse_for_all_objects
watch_htmlc                 j    	 t        d| d      }t        j                  |d      S # t        $ r Y yw xY w)zExtract publish date
    :param str watch_html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Publish date of the video.
    z;(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}r   groupNz%Y-%m-%d)r   r   r   strptime)r   results     W/var/www/it7/html/youtubeDownloader/venv/lib/python3.12/site-packages/pytube/extract.pypublish_dater      sB    Ja
 VZ00  s   & 	22c                 $    dg}|D ]  }|| v s y y)zCheck if live stream recording is available.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    z,This live stream recording is not available.FT )r   unavailable_stringsstrings      r   recording_availabler#   &   s0     	7 & Z     c                 &    g d}|D ]  }|| v s y y)zCheck if content is private.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    )zFThis is a private video. Please sign in to verify that you may see it.z"simpleText":"Private video"zThis video is private.TFr    )r   private_stringsr"   s      r   
is_privater'   8   s*    O
 " Z r$   returnc                 @    	 t        d| d       y# t        $ r Y yw xY w)zCheck if content is age restricted.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is age restricted.
    zog:restrictions:ager   r   FT)r   r   )r   s    r   is_age_restrictedr*   L   s-    +ZqA   s    	c                     t        |       }|j                  di       }d|v ryd|v rd|v r|d   |d   gfS d|v r
|d   |d   fS ddgfS )a  Return the playability status and status explanation of a video.

    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
    of "This is a private video. Please sign in to verify that you may see it."

    This explanation is what gets incorporated into the media player overlay.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Playability status and reason of the video.
    playabilityStatusliveStreamability)LIVE_STREAMzVideo is a live stream.statusreasonmessagesN)initial_player_responseget)r   player_responsestatus_dicts      r   playability_statusr6   \   s}     .j9O!%%&92>Kk)7;{"x(;x+@*AAA$x(+j*AAA$<r$   urlc                     t        d| d      S )ar  Extract the ``video_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/watch?v={video_id}`
    - :samp:`https://youtube.com/embed/{video_id}`
    - :samp:`https://youtu.be/{video_id}`

    :param str url:
        A YouTube url containing a video id.
    :rtype: str
    :returns:
        YouTube video id.
    z(?:v=|\/)([0-9A-Za-z_-]{11}).*   r   r   )r7   s    r   video_idr:   v   s     93aHHr$   c                 v    t         j                  j                  |       }t        |j                        d   d   S )ao  Extract the ``playlist_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/playlist?list={playlist_id}`
    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`

    :param str url:
        A YouTube url containing a playlist id.
    :rtype: str
    :returns:
        YouTube playlist id.
    listr   )urllibparser   r
   query)r7   parseds     r   playlist_idrA      s1     \\""3'FFLL!&)!,,r$   c                    g d}|D ]l  }t        j                  |      }|j                  |       }|s,t        j	                  d|       |j                  d      }|j                  d      }d| d| c S  t        dd      )	a  Extract the ``channel_name`` or ``channel_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/c/{channel_name}/*`
    - :samp:`https://youtube.com/channel/{channel_id}/*
    - :samp:`https://youtube.com/u/{channel_name}/*`
    - :samp:`https://youtube.com/user/{channel_id}/*

    :param str url:
        A YouTube url containing a channel name.
    :rtype: str
    :returns:
        YouTube channel name.
    )z(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)z%(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)z(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)z"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"finished regex search, matched: %sr9      /channel_namepatternscallerpatternrecompilesearchloggerdebugr   r   )r7   rG   rJ   regexfunction_match	uri_styleuri_identifiers          r   rF   rF      s     H  4

7#c*LL=wG&,,Q/I+11!4Nyk>"2334 z r$   r:   	watch_urlc           	      V    t        d| fddt        |      fddddg      }t        |      S )a  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str watch_url:
        A YouTube watch url.
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    r:   )psdefaulteurl)hlen_UShtml51cTVHTML5cverz
7.20201028)r   r   _video_info_url)r:   rU   paramss      r   video_info_urlrf      sD     "U9%&"	

F 6""r$   
embed_htmlc                     	 t        d|d      }d|  }t        d| fd|fd|fd	d
dg      }t        |      S # t        $ r d}Y 4w xY w)a<  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str embed_html:
        The html contents of the embed page (for age restricted videos).
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    z"sts"\s*:\s*(\d+)r9   r    z!https://youtube.googleapis.com/v/r:   rY   stsr\   r_   rb   )r   r   r   rd   )r:   rg   rj   rY   re   s        r   video_info_url_age_restrictedrk      su    /1E
 /xj9D"TNCL"	
	F 6""  s   7 AAre   c                     dt        |       z   S )Nz'https://www.youtube.com/get_video_info?)r   )re   s    r   rd   rd      s    4y7HHHr$   htmlc                 x    	 t        |       d   d   }d|z   S # t        t        f$ r t        |       }Y d|z   S w xY w)zGet the base JavaScript url.

    Construct the base JavaScript url, which contains the decipher
    "transforms".

    :param str html:
        The html contents of the watch page.
    assetsjszhttps://youtube.com)get_ytplayer_configKeyErrorr   get_ytplayer_js)rm   base_jss     r   js_urlru      sS    (%d+H5d; !7** o& (!$' 7**(s    99mime_type_codecc                     d}t        j                  |      }|j                  |       }|st        d|      |j	                         \  }}||j                  d      D cg c]  }|j                          c}fS c c}w )a  Parse the type data.

    Breaks up the data in the ``type`` key of the manifest, which contains the
    mime type and codecs serialized together, and splits them into separate
    elements.

    **Example**:

    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])

    :param str mime_type_codec:
        String containing mime type and codecs.
    :rtype: tuple
    :returns:
        The mime type and a list of codecs.

    z,(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"rv   rH   ,)rL   rM   rN   r   groupssplitstrip)rv   rJ   rQ   results	mime_typecodecsr`   s          r   rv   rv     so    $ >GJJwEll?+G%6HH(Iv&,,s*;<Qqwwy<<<<s   A:c                     dg}|D ]U  }t        j                  |      }|j                  |       }|s,t        j	                  d|       |j                  d      }|c S  t        dd      )zGet the YouTube player base JavaScript path.

    :param str html
        The html contents of the watch page.
    :rtype: str
    :returns:
        Path to YouTube's base.js file.
    z'(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)rC   r9   rs   js_url_patternsrH   rK   )rm   r   rJ   rQ   rR   yt_player_jss         r   rs   rs   '  sx     	3O #  

7#d+LL=wG)//2L   *; r$   c                 L   t         j                  d       ddg}|D ]  }	 t        | |      c S  dg}|D ]  }	 t        | |      c S  t	        dd	      # t        $ r7}t         j                  d|        t         j                  |       Y d}~sd}~ww xY w# t        $ r Y nw xY w)
a  Get the YouTube player configuration data from the watch html.

    Extract the ``ytplayer_config``, which is json data embedded within the
    watch html and serves as the primary source of obtaining the stream
    manifest data.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    zfinding initial function namezytplayer\.config\s*=\s*ytInitialPlayerResponse\s*=\s*zPattern failed: Nz,yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*rq   z#config_patterns, setconfig_patternsrH   )rO   rP   r   r   r   )rm   config_patternsrJ   esetconfig_patternss        r   rq   rq   @  s     LL01")O # 	#D'22 	8 & 	#D'22 $.S '  	LL+G956LLO	   		s(   AB	B-BB	B#"B#c                     i }ddg}|D ]'  }	 t        | |      }|D ]  }|j                  |        ) t        |      dkD  r|S t	        dd      # t        $ r Y Rw xY w)a;  Get the entirety of the ytcfg object.

    This is built over multiple pieces, so we have to find all matches and
    combine the dicts together.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    z
ytcfg\s=\szytcfg\.set\(r   	get_ytcfgytcfg_pattenrsrH   )r   updater   lenr   )rm   ytcfgytcfg_patternsrJ   found_objectsobjs         r   r   r   n  s     EN " 	1$@M$ "S!"	 5zA~
$4   		s   $A	AAstream_manifestvid_inforp   c           	         t        |      }t        |       D ]%  \  }}	 |d   }dv sd|vrd|v sd	|v rt
        j                  d
       3|j                  |d         }t
        j                  d|d          t        |      }	t        t        |      j                        }
|
j                         D ci c]  \  }}||d    }
}}||
d<   d|
j                         vr$t        |
d         }|j                  |      }||
d<   |	j                   d|	j                    |	j"                   dt%        |
       }|| |   d<   ( y# t        $ r2 |j                  di       j                  d      }|rt	        d      Y Ww xY wc c}}w )zApply the decrypted signature to the stream manifest.

    :param dict stream_manifest:
        Details of the media streams available.
    :param str js:
        The contents of the base.js asset file.

    )rp   r7   r,   r-   UNKNOWN	signaturesz&sig=z&lsig=zsignature found, skip decipher)ciphered_signaturez+finished descrambling signature for itag=%sitagr   sig
ratebypassnz://?N)r   	enumeraterr   r3   r   rO   rP   get_signaturer   r
   r?   itemskeysr<   calculate_nschemenetlocpathr   )r   r   rp   cipheristreamr7   live_streamr   
parsed_urlquery_paramskv	initial_nnew_ns                  r   apply_signaturer     s    r]F/ +(	6	1e}C #v7c>X_
 LL9:((F3K(H	96&>	
 c]
   3 34+113
!AqtG
 
 (U|0022 \#./I&&y1E %L""#3z'8'8&9*//9J!IVbLcKde %(5!W+(  	10"6()  %i00 	14
s   E5F 7E=<E=stream_datac                 j   d| v ryg }d| j                         v r|j                  | d          d| j                         v r|j                  | d          |D ]E  }d|vr(d|v r$t        |d         }|d   d   |d<   |d   d   |d<   |j                  d      d	k(  |d
<   G t        j                  d       |S )a-  Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict stream_data:
        Dictionary containing query string encoded values.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    r7   NformatsadaptiveFormatssignatureCipherr   r   typeFORMAT_STREAM_TYPE_OTFis_otfzapplying descrambler)r   extendr
   r3   rO   rP   )r   r   data
cipher_urls       r   apply_descramblerr     s    $  GK$$&&{9-.K,,..{#456  F D(%d+<&=>
(/2U&sOA.S	&)-EEXF LL'(Nr$   c                 l    ddg}|D ]  }	 t        | |      c S  t        dd      # t        $ r Y +w xY w)zExtract the ytInitialData json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z'window\[['\"]ytInitialData['\"]]\s*=\s*zytInitialData\s*=\s*initial_datainitial_data_patternrH   r   r   r   r   rG   rJ   s      r   r   r     sX     	3H  	#J88 9O
PP  		   '	33c                 l    ddg}|D ]  }	 t        | |      c S  t        dd      # t        $ r Y +w xY w)a  Extract the ytInitialPlayerResponse json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z1window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*r   r2   initial_player_response_patternrH   r   r   s      r   r2   r2     s]     	=)H  	#J88 (1   		r   c                     	 | d   d   d   d   d   d   d   d   d   d   }t        d	 |      }|D cg c]  }|d
   	 }}t        |      S # t         t        f$ r t        g       cY S w xY wc c}w )u<  Get the informational metadata for the video.

    e.g.:
    [
        {
            'Song': '강남스타일(Gangnam Style)',
            'Artist': 'PSY',
            'Album': 'PSY SIX RULES Pt.1',
            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
        }
    ]

    :rtype: YouTubeMetadata
    contentstwoColumnWatchNextResultsr|   r9   videoSecondaryInfoRenderermetadataRowContainermetadataRowContainerRendererrowsc                 &    d| j                         v S )NmetadataRowRenderer)r   )xs    r   <lambda>zmetadata.<locals>.<lambda>;  s    '16683 r$   r   )rr   
IndexErrorr   filter)r   metadata_rowsr   s      r   metadatar   !  s    #*:67RS "",../11MO"$$BDDJL 3M 8EE!Q,-EME=)) j! #r""# Fs    A A,A)(A))9__doc__loggingurllib.parser=   rL   collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   pytube.cipherr   pytube.exceptionsr   r   r   pytube.helpersr   pytube.metadatar   pytube.parserr   r   	getLogger__name__rO   strr   r#   r'   boolr*   r6   r:   rA   rF   rf   rk   rd   ru   rv   rs   rq   r   r   r   r   r2   r   r    r$   r   <module>r      s   H   	 #  3 3 = =   N N ' + A 
		8	$1S 1$$(# $  3 C: 4I# I# I$-S -S -$!c !c !H#S #S #S #4#C #S #S #@IK IC I+ + + =S =U3S	>-B =6# # 2+c +c +\C C D6(T 6(T 6(s 6(t 6(r&4 &D &RQS QS Q,  2"*h7 "*r$   