fYUddlmZddlZddlZddlZddlZddlZddlZddlZddl Z ddl m Z ddl m Z ddlmZddlmZmZmZmZmZmZmZmZddlmZer ddlmZdd lmZd Zed Z d' d(d Z! d' d)d Z"iZ#de$d<d*dZ%d+dZ&d,dZ' d- d.dZ(d/dZ)d0dZ* d1dZ+d2dZ, d3 d4dZ-d5dZ.d6dZ/d7dZ0d8dZ1d9dZ2 d: d;dZ3dd!Z7 d? d@d"Z8dAd#Z9ejldBdCd$Z:d%Z;d&Z>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP {"protocol": "file", "path", "/mnt/datasets/test.csv"} >>> infer_storage_options( ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1', ... inherit_storage_options={'extra': 'value'}, ... ) # doctest: +SKIP {"protocol": "hdfs", "username": "username", "password": "pwd", "host": "node", "port": 123, "path": "/mnt/datasets/test.csv", "url_query": "q=1", "extra": "value"} z^[a-zA-Z]:[\\/]z^[a-zA-Z0-9]+://file)protocolpath#z^/([a-zA-Z])[:|]([\\/].*)$z%s:%s)httphttps@:rhost)s3s3agcsgsrportusernamepassword url_query url_fragment)rematchrschemefragmentjoinrgroupsnetlocrsplitr#r$r%queryupdate_storage_options)urlpathinherit_storage_options parsed_pathrr windows_pathoptionss U/var/lib/jenkins/workspace/mettalog/venv/lib/python3.12/site-packages/fsspec/utils.pyinfer_storage_optionsr8%s@ #W- 88' 1 9"G447#K!!+VHxx));+?+?@A6xx =tD \0022D$$$g66+3TBG&,,33C;B?FFsANqQ 1 1%fo?GFO%foGFO   )..GFO   "-"6"6GJ    "-"6"6GJ *00 "-"6"6w(?@ Nc|si}t|t|z}|r8|D]3}|j||j|k7s'td||j|y)Nz9Collision between inferred and specified storage option: )setgetKeyErrorupdate)r6 inherited collisions collisions r7r1r1yso  WI.J#I{{9%y)AA ){,$  NN9r9zdict[str, str] compressionsctjj|djdj }|t vr t |Sy)aInfer compression, if available, from filename. Infer a named compression type, if registered and available, from filename extension. This includes builtin (gz, bz2, zip) compressions, as well as optional compressions. See fsspec.compression.register_compression. r.N)osrsplitextstriplowerrB)filename extensions r7infer_compressionrKsG  *2.44S9??AIL I&& r9c|dz }ttjtj|dfd }|S)aReturns a function that receives a single integer and returns it as a string padded by enough zero characters to align with maximum possible integer >>> name_f = build_name_function(57) >>> name_f(7) '07' >>> name_f(31) '31' >>> build_name_function(1000)(42) '0042' >>> build_name_function(999)(42) '042' >>> build_name_function(0)(0) '0' g:0yE>c8t|jSN)strzfill)i pad_lengths r7 name_functionz*build_name_function..name_functions1v||J''r9)rQintreturnrO)rTmathceillog10)max_intrSrRs @r7build_name_functionrZs7& tOGTYYtzz'234J( r9cv|jdk(ryd} |j|}|sy|r||zn|} ||vrL|j|}|j|jt ||z z t |zyt ||kry |t | d}#t t f$rY!wxYw)aSeek current file to file start, file end, or byte after delimiter seq. Seeks file to next chunk delimiter, where chunks are defined on file start, a delimiting sequence, and file end. Use file.tell() to see location afterwards. Note that file start is a valid split, so must be at offset > 0 to seek for delimiter. Parameters ---------- file: a file delimiter: bytes a delimiter like ``b'\n'`` or message sentinel, matching file .read() type blocksize: int Number of bytes to read from the file at once. Returns ------- Returns True if a delimiter was found, False if at file start or end. rFNT)tellreadindexseeklenOSError ValueError)r delimiter blocksizelastcurrentfullrQs r7seek_delimiterrhs. yy{aD ))I&!%tg~7 D JJy) $))+TQ7#i.HIW )* S^O%&! $   sAB&B&&B87B8c|r|j|t||d}||jS|j}|||z z}|j||zt||d}|j}|r|r|t |z}|r|r|t |z}|}||z }|j||J|j|} | S)aRead a block of bytes from a file Parameters ---------- f: File Open file offset: int Byte offset to start read length: int Number of bytes to read, read through end of file if None delimiter: bytes (optional) Ensure reading starts and stops at delimiter bytestring split_before: bool (optional) Start/stop read *before* delimiter bytestring. If using the ``delimiter=`` keyword argument we ensure that the read starts and stops at delimiter boundaries that follow the locations ``offset`` and ``offset + length``. If ``offset`` is zero then we start at zero, regardless of delimiter. The bytestring returned WILL include the terminating delimiter string. Examples -------- >>> from io import BytesIO # doctest: +SKIP >>> f = BytesIO(b'Alice, 100\nBob, 200\nCharlie, 300') # doctest: +SKIP >>> read_block(f, 0, 13) # doctest: +SKIP b'Alice, 100\nBo' >>> read_block(f, 0, 13, delimiter=b'\n') # doctest: +SKIP b'Alice, 100\nBob, 200\n' >>> read_block(f, 10, 10, delimiter=b'\n') # doctest: +SKIP b'Bob, 200\nCharlie, 300' i)r_rhr]r\r`) foffsetlengthrc split_beforefound_start_delimstartfound_end_delimendbs r7 read_blockrssV v*1i? >668O%&.  uv~(Iu=ffh  S^ #E | 3y> !CuFF6N    vA Hr9c|r||fz } tt|j}|j S#t$r6tt|jd}Y|j SwxYw)zDeterministic token (modified from dask.base) >>> tokenize([1, 2, '3']) '9d71491b50023b06fc76928e6eddb952' >>> tokenize('Hello') == tokenize('Hello') True F)usedforsecurity)rrOencoderb hexdigest)argskwargshs r7tokenizer{2ss  ; D   " # ;;= ; D   "E : ;;=;s"<-A;:A;ct|tr|St|dr|jSt|dr |jS|S)a6Attempt to convert a path-like object to a string. Parameters ---------- filepath: object to be converted Returns ------- filepath_str: maybe a string version of the object Notes ----- Objects supporting the fspath protocol are coerced according to its __fspath__ method. For backwards compatibility with older Python version, pathlib.Path objects are specially coerced. Any other object is passed through unchanged, which includes bytes, strings, buffers, or anything else that's not even path-like. __fspath__r) isinstancerOhasattrr}r)filepaths r7stringify_pathrGsE,(C  < (""$$ 6 "}}r9c6||i|}|j|SrN)_determine_worker)clsrxryinsts r7 make_instancergs$   D Kr9c|Dcgc]}|jdc}tdD}d}t|D]tfdD}|rn|z dj ddScc}w)z;For a list of paths, find the shortest prefix common to all/c32K|]}t|ywrN)r`).0ps r7 z common_prefix..rs%!s1v%rc3:K|]}|dk(yw)rN)rrrQpartss r7rz common_prefix..us"5!!A$%(1+%5sN)splitminrangeallr,)pathsrlmaxrqrQrs @@r7 common_prefixros}#( )aQWWS\ )E %u% %D C 4[5u55 HA 88E!HRaL !! *sA;c t|tr|jd}|r4|Dcgc]'}dj||j ddf)}}|St |}|r|j ddd}|s4td|Dr"|Dcgc]}dj||g}}|S|Dcgc]}|j||d}}|St|t|k(sJ|Scc}wcc}wcc}w)aIn bulk file operations, construct a new file tree from a list of files Parameters ---------- paths: list of str The input file tree path2: str or list of str Root to construct the new list in. If this is already a list of str, we just assert it has the right number of elements. exists: bool (optional) For a str destination, it is already exists (and is a dir), files should end up inside. flatten: bool (optional) Whether to flatten the input directory tree structure so that the output files are in the same directory. Returns ------- list of str rrrrc3@K|]}|jd yw)rN) startswithrss r7rzother_paths..sC!,,s"33Cs) r~rOrstripr,rrr/rreplacer`)rpath2existsflattenrcps r7 other_pathsr|s6% S! BGHQSXXuaggcl2&678HEH Lu%BYYsA&q)#CUCC7<=!5!*-== L;@@Q2ua0@@ L5zSZ''' LI >@s,C-C24C7c"t|tSrN)r~ BaseExceptionobjs r7 is_exceptionrs c= ))r9c,tfddDS)Nc36K|]}t|ywrN)r)rattrrjs r7rzisfilelike..sFDwq$Fs)r]closer\)r)rjs`r7 isfilelikers F,EF FFr9cpt|}tjd|d}t|dkDr|dSy)Nz (\:\:|\://)r)maxsplitrr)rr(rr`)urlrs r7 get_protocolrs5  C HH^S1 5E 5zA~Qx r9crddlm} t|t|ddS#tt f$rYywxYw)z*Can the given URL be used with open_local?r)get_filesystem_class local_fileF)fsspecrgetattrrrb ImportError)rrs r7 can_be_localrs;++L,>?uUU  $s $66c|tjvr+tj|}t|dr |jS t |S#YnxYw ddl}|j |}|jS#ttf$rYywxYw)aFor given package name, try to find the version without importing it Import and package.__version__ is still the backup here, so an import *might* happen. Returns either the version string, or None if the package or the version was not readily found. __version__rN) sysmodulesrrr importlib import_modulerAttributeError)namemodrs r7"get_package_version_without_importrs s{{kk$ 3 &?? " t}  %%d+  (s A A A33BBcF| | td|xstj|}tj}tjd}|j ||r|j j|j||j||S)Nz+Provide either logger object or logger namezD%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s) rblogging getLogger StreamHandler Formatter setFormatterhandlersclear addHandlersetLevel)logger logger_namelevelrhandle formatters r7 setup_loggingrs ~+-FGG  5w((5F  " " $F!!NI  "  f OOE Mr9c$|j|SrN)unstrip_protocol)rfss r7_unstrip_protocolrs  t $$r9c&dfd dfd }|S)zqMirror attributes and methods from the given origin_name attribute of the instance to the decorated classc4t|}t||SrN)r)methodselforigin origin_names r7 origin_getterz"mirror_from..origin_getters{+vv&&r9cZD]$}t|}t||t|&|SrN)rsetattrproperty)rrwrapped_methodmethodsrs r7wrapperzmirror_from..wrappers1F$]F;N C.!9 : r9)rrOrr rUr )rtype[T]rUrr)rrrrs`` @r7 mirror_fromrs' Nr9c#K|ywrNrrs r7 nullcontextr s Isc .t|tstt|ts|gt|z}t|ts|gt|z}t|t|k7st|t|k7rtt|dkr|||fS|Dcgc]}|xsd }}|r'dt t t |||D\}}}|r|dd}|dd}|dd} tdt|D]} || || dz k(r| d|| || dz k7s|| | dz |kDs|K|| |dz |kDr=|j|| |j|| | j|| || | d<||| fS|||fScc}w)a}Merge adjacent byte-offset ranges when the inter-range gap is <= `max_gap`, and when the merged byte range does not exceed `max_block` (if specified). By default, this function will re-order the input paths and byte ranges to ensure sorted order. If the user can guarantee that the inputs are already sorted, passing `sort=False` will skip the re-ordering. rrc32K|]}t|ywrN)list)rvs r7rz&merge_offset_ranges..0s  G rNr) r~r TypeErrorr`rbzipsortedrappend) rstartsendsmax_gap max_blocksortr new_paths new_startsnew_endsrQs r7merge_offset_rangesrs eT " fd #CJ& dD !vE " 6{c%j CIU$; 6{afd""$ %af1f %F %  vt, vt "1I BQZ 8q#e*%AQx5Q<'HRL,@aE!a%L(AY"-8)tAwB/G9.T  q*!!&),Q( $Aw !&"*h.. &$ M&s Fc|j} |jdd|j|S#|j|wxYw)z+Find length of any open read-mode file-liker)r\r_)filelikeposs r7 file_sizerVs9 --/C}}Q" c cs 5Ac#Ktjtjj |tjj |dz\}} t ||5}|dddtj||y#1swY xYw#t$rEtjt5tj|ddd#1swYxYwwxYww)z A context manager that opens a temporary file next to `path` and, on exit, replaces `path` with the temporary file, thereby updating `path` atomically. -)dirprefixN) tempfilemkstemprErdirnamebasenameopenrr contextlibsuppressFileNotFoundErrorunlink)rmodefdfnfps r7 atomic_writer_s   GGOOD !"''*:*:4*@3*FFB "d^ rH  2t     !2 3  IIbM    sNAC( B'B ,B4C( BB"C%9C C%C! C%%C(c`g}|j}dt|}}||kr||}|dz}|dk(r|r|d|ur||n|dk(r ||n|dk(r|}||kr ||dk(r|dz}||kr ||dk(r|dz}||kr||dk7r|dz}||kr ||dk7r||k\r |d nq|||} d | vr| jd d } ng} ||dk(r|d zn|dz} |jd | |} | dkrn| j||| | dz}| dz} 8|||} | r| j| n | dxxd z cc<t t| dz ddD]4} | | dz d| | dkDs| | dz dd| | ddz| | dz <| | =6d j d| D} t jdd| } |dz}| s |dnS| dk(r |dnE| ddk(r d| ddz} n | ddvrd | z} |d| dn|t j|||kr||k(sJ|S)Nrr*r?[!]z\[r\\\rc3`K|]&}|jddjdd(yw)rrrz\-N)rrs r7rz_translate..s-%GH $.66sEB%s,.z([&~|])z\\\1z(?!)rD^)rr) rr`rfindrr,r(subescape) patSTAR QUESTION_MARKresaddrQncjstuffchunkskchunks r7 _translater*tsC **C c#hqA a% F E 8CG4/D #X   #XA1uQ3E1uQ3Ea%CFcMEa%CFcMAvE Aae#!MM$6EF!$Q3AAEAHHS!Q/q5! c!Ah/EE  !HE e,r c) "3v;?Ar:!!a%=,vay|;,21q5M#2,>12,NF1q5M &q ;  HH%LR%Ez7E:EKc\HQx3 #eABiqZ/ $u !E7! %  ! A a%B 6M6 Jr9c,tjjr6tjjtjjz}ntjj}dj t t j|}t|dkDrd|dn|}d|d}|d}||}d|d}d }g} t j||} t| dz } t| D]\} } | d k(r| j| | kr|n|$| d k(r| j| | kr|n|Bd | vr td | r| jt| |d || | ksx| j|dj | }d |dS)zBTranslate a pathname with shell wildcards to a regular expression.rrrz[^+z(?:.+z)?z.*rz**z:Invalid pattern: '**' can only be an entire path componentz(?s:z)\Z)rEraltsepsepr,mapr(rr`r enumeraterrbextendr*)rseps escaped_sepsany_sepnot_sepone_last_segment one_segment any_segmentsany_last_segmentsresultsr last_part_idxidxpartr!s r7glob_translater?s  ww~~ww{{RWW^^+ww{{773ryy$/0L%(Y],q! G<."G!!}%&wi0K7)2&LG HHWc "EJNMu% T 3; NN# *=;CS T  4< NN3+>rZs"  &   "+. CLDHQ Q+@QQjAE   (=   " " n! :.'j# G G G  G  G  G  G T*@ !.8F "  * * * * *  *Z*G6%)"  !     *%,!(   C C C C C  C  C,CL (GT#r9