From d8342a4f77f0031b16e49ec5a244e17c35571daf Mon Sep 17 00:00:00 2001 From: SharonGoliath Date: Wed, 20 Nov 2019 16:17:04 -0800 Subject: [PATCH] t76201 - remove caom2pipe from opencadc. (#118) * t76201 - remove caom2pipe from opencadc. --- caom2pipe/LICENSE | 661 ------ caom2pipe/README.md | 3 - caom2pipe/caom2pipe/__init__.py | 4 - caom2pipe/caom2pipe/astro_composable.py | 216 -- caom2pipe/caom2pipe/caom_composable.py | 126 -- caom2pipe/caom2pipe/execute_composable.py | 1963 ----------------- caom2pipe/caom2pipe/manage_composable.py | 1196 ---------- .../tests/data/C111107_0694_SCI.fits | 1 - caom2pipe/caom2pipe/tests/data/config.yml | 11 - caom2pipe/caom2pipe/tests/data/fail.txt | 1 - caom2pipe/caom2pipe/tests/data/good.txt | 0 caom2pipe/caom2pipe/tests/data/retries.txt | 1 - caom2pipe/caom2pipe/tests/data/retry.txt | 1 - caom2pipe/caom2pipe/tests/data/test_csv.csv | 3 - caom2pipe/caom2pipe/tests/data/test_netrc | 1 - .../caom2pipe/tests/data/test_obs_id.fits.xml | 8 - caom2pipe/caom2pipe/tests/data/test_state.yml | 3 - .../caom2pipe/tests/test_astro_composable.py | 149 -- .../tests/test_execute_composable.py | 1203 ---------- .../caom2pipe/tests/test_manage_composable.py | 353 --- caom2pipe/caom2pipe/tests/test_netrc | 1 - caom2pipe/dev_requirements.txt | 9 - caom2pipe/setup.cfg | 40 - caom2pipe/setup.py | 123 -- caom2utils/caom2utils/fits2caom2.py | 106 +- .../caom2utils/tests/test_collections.py | 39 +- .../caom2utils/tests/test_fits2caom2.py | 31 +- .../caom2utils/tests/test_wcsvalidator.py | 17 +- caom2utils/setup.cfg | 4 +- 29 files changed, 115 insertions(+), 6159 deletions(-) delete mode 100644 caom2pipe/LICENSE delete mode 100644 caom2pipe/README.md delete mode 100644 caom2pipe/caom2pipe/__init__.py delete mode 100644 caom2pipe/caom2pipe/astro_composable.py delete mode 100644 caom2pipe/caom2pipe/caom_composable.py delete mode 100644 caom2pipe/caom2pipe/execute_composable.py delete mode 100644 caom2pipe/caom2pipe/manage_composable.py delete mode 100644 caom2pipe/caom2pipe/tests/data/C111107_0694_SCI.fits delete mode 100644 caom2pipe/caom2pipe/tests/data/config.yml delete mode 100644 caom2pipe/caom2pipe/tests/data/fail.txt delete mode 100644 caom2pipe/caom2pipe/tests/data/good.txt delete mode 100644 caom2pipe/caom2pipe/tests/data/retries.txt delete mode 100644 caom2pipe/caom2pipe/tests/data/retry.txt delete mode 100644 caom2pipe/caom2pipe/tests/data/test_csv.csv delete mode 100644 caom2pipe/caom2pipe/tests/data/test_netrc delete mode 100644 caom2pipe/caom2pipe/tests/data/test_obs_id.fits.xml delete mode 100644 caom2pipe/caom2pipe/tests/data/test_state.yml delete mode 100644 caom2pipe/caom2pipe/tests/test_astro_composable.py delete mode 100644 caom2pipe/caom2pipe/tests/test_execute_composable.py delete mode 100644 caom2pipe/caom2pipe/tests/test_manage_composable.py delete mode 100644 caom2pipe/caom2pipe/tests/test_netrc delete mode 100644 caom2pipe/dev_requirements.txt delete mode 100644 caom2pipe/setup.cfg delete mode 100755 caom2pipe/setup.py diff --git a/caom2pipe/LICENSE b/caom2pipe/LICENSE deleted file mode 100644 index dbbe3558..00000000 --- a/caom2pipe/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. diff --git a/caom2pipe/README.md b/caom2pipe/README.md deleted file mode 100644 index 389680b1..00000000 --- a/caom2pipe/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# caom2pipe -Workflow construction pieces for building 'collection'2caom2 pipelines. - diff --git a/caom2pipe/caom2pipe/__init__.py b/caom2pipe/caom2pipe/__init__.py deleted file mode 100644 index e8eeb2a0..00000000 --- a/caom2pipe/caom2pipe/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .execute_composable import * # noqa -from .manage_composable import * # noqa -from .astro_composable import * # noqa -from .caom_composable import * # noqa diff --git a/caom2pipe/caom2pipe/astro_composable.py b/caom2pipe/caom2pipe/astro_composable.py deleted file mode 100644 index d75c8dfa..00000000 --- a/caom2pipe/caom2pipe/astro_composable.py +++ /dev/null @@ -1,216 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import logging - -from astropy.io import fits -from astropy.coordinates import EarthLocation -from astropy.time import Time, TimeDelta - -from datetime import timedelta as dt_timedelta -from datetime import datetime as dt_datetime -from time import strptime as dt_strptime - -from caom2 import Interval as caom_Interval -from caom2 import Time as caom_Time -from caom2 import shape as caom_shape - -from caom2pipe import manage_composable as mc - - -__all__ = ['convert_time', 'get_datetime', 'build_plane_time', - 'build_plane_time_interval', 'build_plane_time_sample', - 'get_location', 'get_timedelta_in_s', 'make_headers_from_string'] - - -def find_time_bounds(headers): - """Given an observation date, and time exposure length, calculate the - mjd_start and mjd_end time for those values.""" - logging.debug('Begin find_time_bounds.') - date = headers[0].get('DATE-OBS') - exposure = headers[0].get('TEXP') - return convert_time(date, exposure) - - -def convert_time(start_time, exposure): - """Convert a start time and exposure length into an mjd_start and mjd_end - time.""" - logging.debug('Begin convert_time.') - if start_time is not None and exposure is not None: - logging.debug( - 'Use date {} and exposure {} to convert time.'.format(start_time, - exposure)) - if type(start_time) is float: - t_start = Time(start_time, format='mjd') - else: - t_start = Time(start_time) - dt = TimeDelta(exposure, format='sec') - t_end = t_start + dt - t_start.format = 'mjd' - t_end.format = 'mjd' - mjd_start = t_start.value - mjd_end = t_end.value - logging.debug('End convert_time mjd start {} mjd end {} .'.format( - mjd_start, mjd_end)) - return mjd_start, mjd_end - return None, None - - -def get_datetime(from_value): - """ - Ensure datetime values are in MJD. This is meant to handle any odd formats - that telescopes have for datetime values. - - Relies on astropy, until astropy fails. - - :param from_value: - :return: datetime instance - """ - if from_value is not None: - try: - result = Time(from_value) - result.format = 'mjd' - return result - except ValueError: - try: - # VLASS has a format astropy fails to understand - # from datetime import datetime - result = Time( - dt_datetime.strptime(from_value, '%H:%M:%S')) - result.format = 'mjd' - return result - except ValueError: - logging.error('Cannot parse datetime {}'.format(from_value)) - return None - else: - return None - - -def get_location(latitude, longitude, elevation): - """The CAOM model expects the telescope location to be in geocentric - coordinates. Rely on astropy to do the conversion.""" - result = EarthLocation.from_geodetic( - longitude, latitude, elevation, 'WGS84') - return result.x.value, result.y.value, result.z.value - - -def build_plane_time(start_date, end_date, exposure_time): - """Calculate the plane-level bounding box for time, with one sample.""" - sample = build_plane_time_sample(start_date, end_date) - time_bounds = build_plane_time_interval(start_date, end_date, [sample]) - return caom_Time(bounds=time_bounds, - dimension=1, - resolution=exposure_time.to('second').value, - sample_size=exposure_time.to('day').value, - exposure=exposure_time.to('second').value) - - -def build_plane_time_interval(start_date, end_date, samples): - """Create an Interval for the plane-level bounding box for time, given - the start and end dates, and a list of samples. - :param samples list of SubInterval instances - :param start_date minimum SubInterval date - :param end_date maximum SubInterval date. """ - time_bounds = caom_Interval(mc.to_float(start_date.value), - mc.to_float(end_date.value), - samples=samples) - return time_bounds - - -def build_plane_time_sample(start_date, end_date): - """Create a SubInterval for the plane-level bounding box for time, given - the start and end dates. - :param start_date minimum date - :param end_date maximum date. """ - start_date.format = 'mjd' - end_date.format = 'mjd' - return caom_shape.SubInterval( - mc.to_float(start_date.value), - mc.to_float(end_date.value)) - - -def get_timedelta_in_s(from_value): - """ - :param from_value: a string representing time in H:M:S - :return: the value as a timedelta, in seconds - """ - temp = dt_strptime(from_value, '%H:%M:%S') - td = dt_timedelta( - hours=temp.tm_hour, minutes=temp.tm_min, seconds=temp.tm_sec) - return td.seconds - - -def make_headers_from_string(fits_header): - """Create a list of fits.Header instances from a string. - ":param fits_header a string of keyword/value pairs""" - delim = '\nEND' - extensions = \ - [e + delim for e in fits_header.split(delim) if e.strip()] - headers = [fits.Header.fromstring(e, sep='\n') for e in extensions] - return headers diff --git a/caom2pipe/caom2pipe/caom_composable.py b/caom2pipe/caom2pipe/caom_composable.py deleted file mode 100644 index a8d2ef87..00000000 --- a/caom2pipe/caom2pipe/caom_composable.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2019. (c) 2019. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import logging - -from caom2 import TypedSet, ObservationURI, PlaneURI - -from caom2pipe import execute_composable as ec -from caom2pipe import manage_composable as mc - -__all__ = ['update_plane_provenance', 'update_observation_members'] - - -def update_plane_provenance(plane, headers, lookup, collection, - repair, obs_id): - """Add inputs to Planes, based on a particular keyword prefix. - - :param plane Plane instance to add inputs to - :param headers FITS keyword headers that have lookup values. - :param lookup The keyword pattern to find in the FITS header keywords for - input files. - :param collection The collection name for URI construction - :param repair The function to fix input values, to ensure they match - input observation ID values. - :param obs_id String value for logging only. - """ - plane_inputs = TypedSet(PlaneURI,) - - for header in headers: - for keyword in header: - if keyword.startswith(lookup): - value = header.get(keyword) - prov_obs_id, prov_prod_id = repair(value, obs_id) - if prov_obs_id is not None and prov_prod_id is not None: - obs_member_uri_str = \ - ec.CaomName.make_obs_uri_from_obs_id( - collection, prov_obs_id) - obs_member_uri = ObservationURI(obs_member_uri_str) - plane_uri = PlaneURI.get_plane_uri( - obs_member_uri, prov_prod_id) - plane_inputs.add(plane_uri) - logging.debug('Adding PlaneURI {}'.format(plane_uri)) - - mc.update_typed_set(plane.provenance.inputs, plane_inputs) - - -def update_observation_members(observation): - """Add members to Observation from all its Planes. - - :param observation Observation instance to add members to - """ - members_inputs = TypedSet(ObservationURI,) - for plane in observation.planes.values(): - if (plane.provenance is not None and - plane.provenance.inputs is not None): - for inpt in plane.provenance.inputs: - members_inputs.add(inpt.get_observation_uri()) - logging.debug('Adding Observation URI {}'.format( - inpt.get_observation_uri())) - mc.update_typed_set(observation.members, members_inputs) diff --git a/caom2pipe/caom2pipe/execute_composable.py b/caom2pipe/caom2pipe/execute_composable.py deleted file mode 100644 index a0724498..00000000 --- a/caom2pipe/caom2pipe/execute_composable.py +++ /dev/null @@ -1,1963 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -""" -This module contains pipeline execution classes. Each execution class -corresponds to a single task type, correlated with specific configuration -or implementation assumptions. - -The execute methods in each of the class definitions require no if statements. -All the if statements are limited to the choose* methods in the -OrganizeExecutes class. If you find yourself adding an if statement to an -execute method, create a new *Execute class instead. The result is execute -methods that are composable into complex and varied pipelines, while -remaining easily tested. The execute methods do conform to an Airflow API -for operator extension, but please, please, please, do not ever import an -Airflow class here. - -Raise the CadcException upon encountering an error. There is no recovery -effort as part of a failure. Log the error and stop the pipeline -execution for an Observation. - -The correlations that currently exist: -- use_local_data: True => classes have "Local" in their name -- uses the CadcDataClient, the Caom2RepoClient => classes have "Client" - in their name -- requires metadata access only => classes have "Meta" in their name -- requires data access => classes have "Data" in their name - -""" - -import distutils.sysconfig -import logging -import os -import re -import requests -import sys -import traceback - -from argparse import ArgumentParser -from astropy.io import fits -from datetime import datetime - -from cadcutils import net, exceptions -from cadcdata import CadcDataClient -from caom2repo import CAOM2RepoClient -from caom2pipe import manage_composable as mc - -__all__ = ['OrganizeExecutes', 'StorageName', 'CaomName', 'OrganizeChooser', - 'run_single', 'run_by_file', 'run_single_from_state'] - -READ_BLOCK_SIZE = 8 * 1024 - - -class StorageName(object): - """Naming rules for a collection: - - support mixed-case file name storage - - support gzipped and not zipped file names - - This class assumes the obs_id is part of the file name. This assumption - may be broken in the future, in which case lots of CaomExecute - implementations will need to be re-addressed somehow. - - This class assumes the file name in storage, and the file name on disk - are not necessarily the same thing. - """ - - def __init__(self, obs_id, collection, collection_pattern, - fname_on_disk=None, scheme='ad', archive=None): - """ - - :param obs_id: string value for Observation.observationID - :param collection: string value for Observation.collection - :param collection_pattern: regular expression that can be used to - determine if a file name or observation id meets particular - patterns. - :param fname_on_disk: string value for the name of a file on disk, - which is not necessarily the same thing as the name of the file - in storage (i.e. extensions may exist in one location that do - not exist in another. - :param scheme: string value for the scheme of the file URI. - :param archive: ad storage unit, defaults to value of - 'collection' - """ - self.obs_id = obs_id - self.collection = collection - self.collection_pattern = collection_pattern - self.scheme = scheme - self.fname_on_disk = fname_on_disk - if archive is not None: - self.archive = archive - else: - self.archive = collection - - @property - def file_uri(self): - """The ad URI for the file. Assumes compression.""" - return '{}:{}/{}.gz'.format( - self.scheme, self.archive, self.file_name) - - @property - def file_name(self): - """The file name.""" - return '{}.fits'.format(self.obs_id) - - @property - def compressed_file_name(self): - """The compressed file name - adds the .gz extension.""" - return '{}.fits.gz'.format(self.obs_id) - - @property - def model_file_name(self): - """The file name used on local disk that holds the CAOM2 Observation - XML.""" - return '{}.fits.xml'.format(self.obs_id) - - @property - def prev(self): - """The preview file name for the file.""" - return '{}_prev.jpg'.format(self.obs_id) - - @property - def thumb(self): - """The thumbnail file name for the file.""" - return '{}_prev_256.jpg'.format(self.obs_id) - - @property - def prev_uri(self): - """The preview URI.""" - return self._get_uri(self.prev) - - @property - def thumb_uri(self): - """The thumbnail URI.""" - return self._get_uri(self.thumb) - - @property - def obs_id(self): - """The observation ID associated with the file name.""" - return self._obs_id - - @obs_id.setter - def obs_id(self, value): - self._obs_id = value - - @property - def log_file(self): - """The log file name used when running any of the 'execute' steps.""" - return '{}.log'.format(self.obs_id) - - @property - def product_id(self): - """The relationship between the observation ID of an observation, and - the product ID of a plane.""" - return self.obs_id - - @property - def fname_on_disk(self): - """The file name on disk, which is not necessarily the same as the - file name in ad.""" - return self._fname_on_disk - - @fname_on_disk.setter - def fname_on_disk(self, value): - self._fname_on_disk = value - - def is_valid(self): - """:return True if the observation ID conforms to naming rules.""" - pattern = re.compile(self.collection_pattern) - return pattern.match(self.obs_id) - - def _get_uri(self, fname): - """The ad URI for a file, without consideration for compression.""" - return '{}:{}/{}'.format(self.scheme, self.archive, fname) - - @staticmethod - def remove_extensions(name): - """How to get the file_id from a file_name.""" - return name.replace('.fits', '').replace('.gz', '').replace('.header', - '') - - -class CaomName(object): - """The naming rules for making and decomposing CAOM URIs (i.e. Observation - URIs, Plane URIs, and archive URIs, all isolated in one class. There are - probably OMM assumptions built in, but those will slowly go away :). """ - - def __init__(self, uri): - self.uri = uri - - @property - def file_id(self): - """ - - :return: Extracted from an Artifact URI, the file_id is the file - name portion of the URI with all file type and compression type - extensions removed. - """ - return StorageName.remove_extensions(self.uri.split('/')[1]) - - @property - def file_name(self): - """:return The file name extracted from an Artifact URI.""" - return self.uri.split('/')[1] - - @property - def uncomp_file_name(self): - """:return The file name extracted from an Artifact URI, without - the compression extension.""" - return self.file_name.replace('.gz', '') - - @staticmethod - def make_obs_uri_from_obs_id(collection, obs_id): - """:return A string that conforms to the Observation URI - specification from CAOM.""" - return 'caom:{}/{}'.format(collection, obs_id) - - -class CaomExecute(object): - """Abstract class that defines the operations common to all Execute - classes.""" - - def __init__(self, config, task_type, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - meta_visitors): - """ - - :param config: Configurable parts of execution, as stored in - manage_composable.Config. - :param task_type: manage_composable.TaskType enumeration - identifies - the work to do, in words that are user-facing. Used in logging - messages. - :param storage_name: An instance of StorageName. - :param command_name: The collection-specific application to apply a - blueprint. May be 'fits2caom2'. - :param cred_param: either --netrc or --cert , - depending on which credentials have been supplied to the - process. - :param cadc_data_client: Instance of CadcDataClient. Used for data - service access. - :param caom_repo_client: Instance of CAOM2Repo client. Used for - caom2 repository service access. - :param meta_visitors: List of classes with a - 'visit(observation, **kwargs)' method signature. Requires access - to metadata only. - """ - self.logger = logging.getLogger() - self.logger.setLevel(config.logging_level) - formatter = logging.Formatter( - '%(asctime)s:%(levelname)s:%(name)-12s:%(lineno)d:%(message)s') - for handler in self.logger.handlers: - handler.setLevel(config.logging_level) - handler.setFormatter(formatter) - self.logging_level_param = self._set_logging_level_param( - config.logging_level) - self.obs_id = storage_name.obs_id - self.product_id = storage_name.product_id - self.uri = storage_name.file_uri - self.fname = storage_name.file_name - self.command_name = command_name - self.root_dir = config.working_directory - self.collection = config.collection - self.archive = config.archive - self.working_dir = os.path.join(self.root_dir, self.obs_id) - if config.log_to_file: - self.model_fqn = os.path.join(config.log_file_directory, - storage_name.model_file_name) - else: - self.model_fqn = os.path.join(self.working_dir, - storage_name.model_file_name) - self.resource_id = config.resource_id - self.cadc_data_client = cadc_data_client - self.caom_repo_client = caom_repo_client - self.stream = config.stream - self.meta_visitors = meta_visitors - self.task_type = task_type - self.cred_param = cred_param - self.url = storage_name.url - - def _cleanup(self): - """Remove a directory and all its contents.""" - if os.path.exists(self.working_dir): - for ii in os.listdir(self.working_dir): - os.remove(os.path.join(self.working_dir, ii)) - os.rmdir(self.working_dir) - - def _create_dir(self): - """Create the working area if it does not already exist.""" - mc.create_dir(self.working_dir) - - def _define_local_dirs(self, storage_name): - """when files are on disk don't worry about a separate directory - per observation""" - self.working_dir = self.root_dir - self.model_fqn = os.path.join(self.working_dir, - storage_name.model_file_name) - - def _find_fits2caom2_plugin(self): - """Find the code that is passed as the --plugin parameter to - fits2caom2. - - This code makes the assumption that execution always occurs within - the context of a Docker container, and therefore the - get_python_lib call will always have the appropriately-named - module installed in a site package location. - """ - packages = distutils.sysconfig.get_python_lib() - return os.path.join(packages, '{}/{}.py'.format(self.command_name, - self.command_name)) - - def _fits2caom2_cmd_local(self): - """Execute fits2caom with a --local parameter.""" - fqn = os.path.join(self.working_dir, self.fname) - plugin = self._find_fits2caom2_plugin() - # so far, the plugin is also the module :) - cmd = '{} {} {} --observation {} {} --out {} ' \ - '--plugin {} --module {} --local {} --lineage {}/{}'.format( - self.command_name, - self.logging_level_param, self.cred_param, self.collection, - self.obs_id, self.model_fqn, plugin, plugin, fqn, self.obs_id, - self.uri) - mc.exec_cmd(cmd) - - def _fits2caom2_cmd_client(self): - """Execute fits2caom with a --cert parameter.""" - plugin = self._find_fits2caom2_plugin() - # so far, the plugin is also the module :) - cmd = '{} {} {} --observation {} {} --out {} ' \ - '--plugin {} --module {} --lineage {}/{}'.format( - self.command_name, self.logging_level_param, self.cred_param, - self.collection, self.obs_id, self.model_fqn, plugin, plugin, - self.product_id, self.uri) - mc.exec_cmd(cmd) - - def _fits2caom2_cmd_client_local(self): - """ - Execute fits2caom with a --cert parameter and a --local parameter. - """ - plugin = self._find_fits2caom2_plugin() - # so far, the plugin is also the module :) - local_fqn = os.path.join(self.working_dir, self.fname) - cmd = '{} {} {} --observation {} {} --local {} --out {} ' \ - '--plugin {} --module {} --lineage {}/{}'.format( - self.command_name, self.logging_level_param, self.cred_param, - self.collection, self.obs_id, local_fqn, self.model_fqn, - plugin, plugin, self.product_id, self.uri) - mc.exec_cmd(cmd) - - def _fits2caom2_cmd_in_out_client(self): - """Execute fits2caom with a --in and a --cert parameter.""" - plugin = self._find_fits2caom2_plugin() - # so far, the plugin is also the module :) - # TODO add an input parameter - cmd = '{} {} {} --in {} --out {} ' \ - '--plugin {} --module {} --lineage {}/{}'.format( - self.command_name, self.logging_level_param, self.cred_param, - self.model_fqn, self.model_fqn, plugin, plugin, - self.product_id, self.uri) - mc.exec_cmd(cmd) - - def _fits2caom2_cmd_in_out_local_client(self): - """Execute fits2caom with a --in, --local and a --cert parameter.""" - plugin = self._find_fits2caom2_plugin() - # so far, the plugin is also the module :) - local_fqn = os.path.join(self.working_dir, self.fname) - cmd = '{} {} {} --in {} --out {} --local {} ' \ - '--plugin {} --module {} --lineage {}/{}'.format( - self.command_name, self.logging_level_param, self.cred_param, - self.model_fqn, self.model_fqn, local_fqn, plugin, plugin, - self.product_id, self.uri) - mc.exec_cmd(cmd) - - def _compare_checksums_client(self, fname): - """Compare the checksum of a file on disk with a file in ad, - using the client instance from this class.""" - fqn = os.path.join(self.working_dir, fname) - mc.compare_checksum_client( - self.cadc_data_client, self.archive, fqn) - - def _repo_cmd_create_client(self, observation): - """Create an observation instance from the input parameter.""" - try: - self.caom_repo_client.create(observation) - except Exception as e: - raise mc.CadcException( - 'Could not create an observation record for {} in {}. ' - '{}'.format(self.obs_id, self.resource_id, e)) - - def _repo_cmd_update_client(self, observation): - """Update an existing observation instance. Assumes the obs_id - values are set correctly.""" - try: - self.caom_repo_client.update(observation) - except Exception as e: - raise mc.CadcException( - 'Could not update an observation record for {} in {}. ' - '{}'.format(self.obs_id, self.resource_id, e)) - - def _repo_cmd_read_client(self): - """Retrieve the existing observation model metadata.""" - try: - return self.caom_repo_client.read(self.collection, self.obs_id) - except Exception as e: - raise mc.CadcException( - 'Could not read observation record for {} in {}. {}'.format( - self.obs_id, self.resource_id, e)) - - def _repo_cmd_delete_client(self, observation): - """Delete an observation instance based on an input parameter.""" - try: - self.caom_repo_client.delete(observation.collection, - observation.observation_id) - except Exception as e: - raise mc.CadcException( - 'Could not delete the observation record for {} in {}. ' - '{}'.format(self.obs_id, self.resource_id, e)) - - def _cadc_data_put_client(self, fname, mime_type): - """Store a collection file.""" - try: - self.cadc_data_client.put_file(self.collection, fname, self.stream, - mime_type=mime_type) - except Exception as e: - raise mc.CadcException( - 'Did not store {} with {}'.format(fname, e)) - - def _cadc_data_get_client(self): - """Retrieve an archive file, even if it already exists. This might - ensure that the latest version of the file is retrieved from - storage.""" - - fqn = os.path.join(self.working_dir, self.fname) - try: - self.cadc_data_client.get_file(self.archive, self.fname, - destination=fqn) - if not os.path.exists(fqn): - raise mc.CadcException('{} does not exist.'.format(fqn)) - except Exception: - raise mc.CadcException( - 'Did not retrieve {}'.format(fqn)) - - def _cadc_data_info_file_name_client(self): - """Execute CadcDataClient.get_file_info with the client instance from - this class.""" - file_info = self.cadc_data_client.get_file_info( - self.archive, self.fname) - self.fname = file_info['name'] - - def _read_model(self): - """Read an observation into memory from an XML file on disk.""" - return mc.read_obs_from_file(self.model_fqn) - - def _write_model(self, observation): - """Write an observation to disk from memory, represented in XML.""" - mc.write_obs_to_file(observation, self.model_fqn) - - def _visit_meta(self, observation): - """Execute metadata-only visitors on an Observation in - memory.""" - if self.meta_visitors is not None and len(self.meta_visitors) > 0: - kwargs = {'working_directory': self.working_dir, - 'cadc_client': self.cadc_data_client, - 'stream': self.stream, - 'url': self.url} - for visitor in self.meta_visitors: - try: - self.logger.debug('Visit for {}'.format(visitor)) - visitor.visit(observation, **kwargs) - except Exception as e: - raise mc.CadcException(e) - - @staticmethod - def _set_logging_level_param(logging_level): - """Make a configured logging level into command-line parameters.""" - lookup = {logging.DEBUG: '--debug', - logging.INFO: '--verbose', - logging.WARNING: '', - logging.ERROR: '--quiet'} - if logging_level in lookup: - result = lookup[logging_level] - else: - result = '' - return result - - @staticmethod - def repo_cmd_get_client(caom_repo_client, collection, observation_id): - """Execute the CAOM2Repo 'read' operation using the client instance - from this class. - :return an Observation instance, or None, if the observation id - does not exist.""" - try: - observation = caom_repo_client.read(collection, observation_id) - return observation - except exceptions.NotFoundException: - return None - except Exception: - raise mc.CadcException( - 'Could not retrieve an observation record for {}.'.format( - observation_id)) - - -class MetaCreateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo create.""" - - def __init__(self, config, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - meta_visitors): - super(MetaCreateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, meta_visitors) - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('create the work space, if it does not exist') - self._create_dir() - - self.logger.debug('the observation does not exist, so go ' - 'straight to generating the xml, as the main_app ' - 'will retrieve the headers') - self._fits2caom2_cmd_client() - - self.logger.debug('read the xml into memory from the file') - observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(observation) - - self.logger.debug('store the xml') - self._repo_cmd_create_client(observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class MetaUpdateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo update.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, observation, - meta_visitors): - super(MetaUpdateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors) - self.observation = observation - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('create the work space, if it does not exist') - self._create_dir() - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('generate the xml, as the main_app will retrieve ' - 'the headers') - self._fits2caom2_cmd_in_out_client() - - self.logger.debug('read the xml from disk') - self.observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(self.observation) - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('store the xml') - self._repo_cmd_update_client(self.observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class MetaDeleteCreateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo delete followed by - a create, because an update will not support a Simple->Composite - or Composite->Simple type change for the Observation - structure.""" - - def __init__(self, config, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - observation, meta_visitors): - super(MetaDeleteCreateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, meta_visitors) - self.observation = observation - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('create the work space, if it does not exist') - self._create_dir() - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('make a new observation from an existing ' - 'observation') - self._fits2caom2_cmd_in_out_client() - - self.logger.debug('read the xml into memory from the file') - self.observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(self.observation) - - self.logger.debug('the observation exists, delete it') - self._repo_cmd_delete_client(self.observation) - - self.logger.debug('store the xml') - self._repo_cmd_create_client(self.observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class LocalMetaCreateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo create.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors): - super(LocalMetaCreateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors) - self._define_local_dirs(storage_name) - self.fname = storage_name.fname_on_disk - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('the observation does not exist, so go ' - 'straight to generating the xml, as the main_app ' - 'will retrieve the headers') - self._fits2caom2_cmd_client_local() - - self.logger.debug('read the xml from disk') - observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(observation) - - self.logger.debug('store the xml') - self._repo_cmd_create_client(observation) - - self.logger.debug('write the updated xml to disk for debugging') - self._write_model(observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class LocalMetaDeleteCreateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo delete followed by - a create, because an update will not support a Simple->Composite - or Composite->Simple type change for the Observation - structure.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, observation, - meta_visitors): - super(LocalMetaDeleteCreateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors) - self._define_local_dirs(storage_name) - self.observation = observation - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('make a new observation from an existing ' - 'observation') - self._fits2caom2_cmd_in_out_local_client() - - self.logger.debug('read the xml from disk') - observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(observation) - - self.logger.debug('the observation exists, delete it') - self._repo_cmd_delete_client(self.observation) - - self.logger.debug('store the xml') - self._repo_cmd_create_client(observation) - - self.logger.debug('write the updated xml to disk for debugging') - self._write_model(observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class LocalMetaUpdateClient(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into CAOM. - This requires access to only header information. - - This pipeline step will execute a caom2-repo update.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, observation, - meta_visitors): - super(LocalMetaUpdateClient, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors) - self._define_local_dirs(storage_name) - self.observation = observation - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('generate the xml, as the main_app will retrieve ' - 'the headers') - self._fits2caom2_cmd_in_out_local_client() - - self.logger.debug('read the xml from disk') - self.observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(self.observation) - - self.logger.debug('store the xml') - self._repo_cmd_update_client(self.observation) - - self.logger.debug('write the updated xml to disk for debugging') - self._write_model(self.observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class ClientVisit(CaomExecute): - """Defines the pipeline step for Collection augmentation by a visitor - of metadata into CAOM. This assumes a record already exists in CAOM, - and the update DOES NOT require access to either the header or the data. - - This pipeline step will execute a caom2-repo update.""" - - def __init__(self, config, storage_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors): - super(ClientVisit, self).__init__( - config, mc.TaskType.VISIT, storage_name, command_name=None, - cred_param=cred_param, cadc_data_client=cadc_data_client, - caom_repo_client=caom_repo_client, - meta_visitors=meta_visitors) - self.fname = None - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - # TODO - run a test to see if this is necessary - # self.logger.debug('Find the file name as stored.') - # self._find_file_name_storage_client() - - self.logger.debug('retrieve the existing observation, if it exists') - observation = self._repo_cmd_read_client() - - self.logger.debug('the metadata visitors') - self._visit_meta(observation) - - self.logger.debug('store the xml') - self._repo_cmd_update_client(observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class DataClient(CaomExecute): - """Defines the pipeline step for all the operations that - require access to the file on disk, not just the header data. """ - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, data_visitors, - task_type): - super(DataClient, self).__init__( - config, task_type, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors=None) - self.log_file_directory = config.log_file_directory - self.data_visitors = data_visitors - self.prev_fname = storage_name.prev - self.thumb_fname = storage_name.thumb - - def execute(self, context): - self.logger.debug('Begin execute for {} Data'.format(__name__)) - - self.logger.debug('Find the file name as stored.') - self._cadc_data_info_file_name_client() - - self.logger.debug('create the work space, if it does not exist') - self._create_dir() - - self.logger.debug('get the input file') - self._cadc_data_get_client() - - self.logger.debug('get the observation for the existing model') - observation = self._repo_cmd_read_client() - - self.logger.debug('execute the data visitors') - self._visit_data(observation) - - self.logger.debug('store the updated xml') - self._repo_cmd_update_client(observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - def _visit_data(self, observation): - """Execute the visitors that require access to the full data content - of a file.""" - kwargs = {'working_directory': self.working_dir, - 'science_file': self.fname, - 'log_file_directory': self.log_file_directory, - 'cadc_client': self.cadc_data_client} - for visitor in self.data_visitors: - try: - self.logger.debug('Visit for {}'.format(visitor)) - visitor.visit(observation, **kwargs) - except Exception as e: - raise mc.CadcException(e) - - -class LocalDataClient(DataClient): - """Defines the pipeline step for all the operations that - require access to the file on disk. This class assumes it has access to - the files on disk.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, data_visitors): - super(LocalDataClient, self).__init__( - config, storage_name, command_name, cred_param, - cadc_data_client=cadc_data_client, - caom_repo_client=caom_repo_client, data_visitors=data_visitors, - task_type=mc.TaskType.MODIFY) - self._define_local_dirs(storage_name) - self.fname = storage_name.fname_on_disk - - def execute(self, context): - self.logger.debug('Begin execute for {} Data'.format(__name__)) - - self.logger.debug('get the observation for the existing model') - observation = self._repo_cmd_read_client() - - self.logger.debug('execute the data visitors') - self._visit_data(observation) - - self.logger.debug('store the updated xml') - self._repo_cmd_update_client(observation) - - self.logger.debug('write the updated xml to disk for debugging') - self._write_model(observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class PullClient(CaomExecute): - """Defines the pipeline step for Collection storage of a file that - is retrieved via http. The file will be temporarily stored on disk, - because the cadc-data client doesn't support streaming (yet).""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client): - super(PullClient, self).__init__( - config, mc.TaskType.PULL, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors=None) - self.stream = config.stream - self.fname = storage_name.file_name - self.local_fqn = os.path.join(self.working_dir, self.fname) - - def execute(self, context): - self.logger.debug('Begin execute for {} Data'.format(__name__)) - - self.logger.debug('create the work space, if it does not exist') - self._create_dir() - - self.logger.debug('get the input file') - self._http_get() - - self.logger.debug( - 'store the input file {} to ad'.format(self.local_fqn)) - self._cadc_data_put_client(self.local_fqn, 'application/fits') - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - def _http_get(self): - """Retrieve a file via http to temporary local storage. Push to ad, - from local storage.""" - self.logger.debug('retrieve {} from {}'.format(self.fname, self.url)) - try: - with requests.get(self.url, stream=True) as r: - r.raise_for_status() - with open(self.local_fqn, 'wb') as f: - for chunk in r.iter_content(chunk_size=READ_BLOCK_SIZE): - f.write(chunk) - except exceptions.HttpException as e: - raise mc.CadcException( - 'Could not retrieve {} from {}. Failed with {}'.format( - self.local_fqn, self.url, e)) - # not sure how else to figure out if the file is good - try: - hdulist = fits.open(self.local_fqn, memmap=True, - lazy_load_hdus=False) - hdulist.verify('warn') - for h in hdulist: - h.verify('warn') - hdulist.close() - except fits.VerifyError as e: - raise mc.CadcException( - 'astropy verify error {} when reading {}'.format( - self.local_fqn, e)) - self.logger.debug('Successfully retrieved {}'.format(self.fname)) - - -class StoreClient(CaomExecute): - """Defines the pipeline step for Collection storage of a file. This - requires access to the file on disk.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client): - super(StoreClient, self).__init__( - config, mc.TaskType.STORE, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors=None) - # when files are on disk don't worry about a separate directory - # per observation - self.working_dir = self.root_dir - self.stream = config.stream - self.fname = storage_name.fname_on_disk - - def execute(self, context): - self.logger.debug('Begin execute for {} Data'.format(__name__)) - - self.logger.debug('store the input file {} to ad'.format(self.fname)) - self._cadc_data_put_client(self.fname, 'application/fits') - - self.logger.debug('End execute for {}'.format(__name__)) - - -class Scrape(CaomExecute): - """Defines the pipeline step for Collection creation of a CAOM model - observation. The file containing the metadata is located on disk. - No record is written to a web service.""" - - def __init__(self, config, storage_name, command_name): - super(Scrape, self).__init__( - config, mc.TaskType.SCRAPE, storage_name, command_name, - cred_param='', cadc_data_client=None, caom_repo_client=None, - meta_visitors=None) - self._define_local_dirs(storage_name) - self.fname = storage_name.fname_on_disk - if self.fname is None: - self.fname = storage_name.file_name - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('generate the xml from the file on disk') - self._fits2caom2_cmd_local() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class DataScrape(DataClient): - """Defines the pipeline step for Collection generation and ingestion of - operations that require access to the file on disk, with no update to the - service at the end. This class assumes it has access to the files on disk. - The organization of this class assumes the 'Scrape' task has been done - previously, so the model instance exists on disk.""" - - def __init__(self, config, storage_name, command_name, data_visitors): - super(DataScrape, self).__init__( - config, storage_name, command_name, cred_param='', - cadc_data_client=None, caom_repo_client=None, - data_visitors=data_visitors, task_type=mc.TaskType.SCRAPE) - self._define_local_dirs(storage_name) - self.fname = storage_name.fname_on_disk - self.log_file_directory = config.log_file_directory - self.data_visitors = data_visitors - self.prev_fname = storage_name.prev - self.thumb_fname = storage_name.thumb - - def execute(self, context): - self.logger.debug('Begin execute for {} Data'.format(__name__)) - - self.logger.debug('get observation for the existing model from disk') - observation = self._read_model() - - self.logger.debug('execute the data visitors') - self._visit_data(observation) - - self.logger.debug('output the updated xml') - self._write_model(observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class CompareChecksumClient(CaomExecute): - """Defines the pipeline step for comparing the checksum of a file on disk - with the checksum of the supposedly-the-same file stored at CADC. - - This step should be invoked with any other task type that relies on - files on local disk. - """ - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client): - super(CompareChecksumClient, self).__init__( - config, mc.TaskType.CHECKSUM, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - meta_visitors=None) - self._define_local_dirs(storage_name) - self.fname = storage_name.fname_on_disk - - def execute(self, context): - self.logger.debug('Begin execute for {} ' - 'CompareChecksum'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('generate the xml from the file on disk') - self._compare_checksums_client(self.fname) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class LocalMetaCreateClientRemoteStorage(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into - CAOM. This requires access to only header information. - - The file that contains the metadata is available locally, but this file - is not, nor will it, be stored in CADC. - - This pipeline step will execute a caom2-repo create.""" - - def __init__(self, config, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - meta_visitors): - super(LocalMetaCreateClientRemoteStorage, self).__init__( - config, mc.TaskType.REMOTE, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, meta_visitors) - self._define_local_dirs(storage_name) - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('the observation does not exist, so go ' - 'straight to generating the xml, as the main_app ' - 'will retrieve the headers') - self._fits2caom2_cmd_client_local() - - self.logger.debug('read the xml into memory from the file') - observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(observation) - - self.logger.debug('store the xml') - self._repo_cmd_create_client(observation) - - self.logger.debug('clean up the workspace') - self._cleanup() - - self.logger.debug('End execute for {}'.format(__name__)) - - -class LocalMetaUpdateClientRemoteStorage(CaomExecute): - """Defines the pipeline step for Collection ingestion of metadata into - CAOM. This requires access to only header information. - - The file that contains the metadata is available locally, but this file - is not, nor will it, be stored in CADC. - - This pipeline step will execute a caom2-repo update.""" - - def __init__(self, config, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, observation, - meta_visitors): - super(LocalMetaUpdateClientRemoteStorage, self).__init__( - config, mc.TaskType.INGEST, storage_name, command_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors) - self._define_local_dirs(storage_name) - self.observation = observation - - def execute(self, context): - self.logger.debug('Begin execute for {} Meta'.format(__name__)) - self.logger.debug('the steps:') - - self.logger.debug('write the observation to disk for next step') - self._write_model(self.observation) - - self.logger.debug('generate the xml, as the main_app will retrieve ' - 'the headers') - self._fits2caom2_cmd_in_out_local_client() - - self.logger.debug('read the xml from disk') - self.observation = self._read_model() - - self.logger.debug('the metadata visitors') - self._visit_meta(self.observation) - - self.logger.debug('store the xml') - self._repo_cmd_update_client(self.observation) - - self.logger.debug('write the updated xml to disk for debugging') - self._write_model(self.observation) - - self.logger.debug('End execute for {}'.format(__name__)) - - -class OrganizeChooser(object): - """Extend this class to provide a way to make collection-specific - complex conditions available within the OrganizeExecute class.""" - def __init__(self): - pass - - def needs_delete(self, observation): - return False - - def use_compressed(self): - return False - - -class OrganizeExecutes(object): - """How to turn on/off various task types in a CaomExecute pipeline.""" - def __init__(self, config, chooser=None, todo_file=None): - self.config = config - self.chooser = chooser - self.task_types = config.task_types - self.logger = logging.getLogger() - self.logger.setLevel(config.logging_level) - if todo_file is not None: - self.todo_fqn = todo_file - todo_name = os.path.basename(todo_file).split('.')[0] - self.success_fqn = os.path.join( - self.config.log_file_directory, - '{}_success_log.txt'.format(todo_name)) - config.success_fqn = self.success_fqn - self.failure_fqn = os.path.join( - self.config.log_file_directory, - '{}_failure_log.txt'.format(todo_name)) - config.failure_fqn = self.failure_fqn - self.retry_fqn = os.path.join( - self.config.log_file_directory, - '{}_retries.txt'.format(todo_name)) - config.retry_fqn = self.retry_fqn - else: - self.todo_fqn = config.work_fqn - self.success_fqn = config.success_fqn - self.failure_fqn = config.failure_fqn - self.retry_fqn = config.retry_fqn - - if self.config.log_to_file: - mc.create_dir(self.config.log_file_directory) - failure = open(self.failure_fqn, 'w') - failure.close() - retry = open(self.retry_fqn, 'w') - retry.close() - success = open(self.success_fqn, 'w') - success.close() - self.success_count = 0 - self.complete_record_count = 0 - - @property - def complete_record_count(self): - """:return integer indicating how many inputs (files or observations, - depending on the configuration) have been processed.""" - return self._complete_record_count - - @complete_record_count.setter - def complete_record_count(self, value): - self._complete_record_count = value - - def choose(self, storage_name, command_name, meta_visitors, data_visitors): - """The logic that decides which descendants of CaomExecute to - instantiate. This is based on the content of the config.yml file - for an application. - :storage_name StorageName extension that handles the naming rules for - a file in ad. - :command_name Extension of fits2caom2 (or fits2caom2) that is executed - for blueprint handling. - :meta_visitors List of methods that implement the - visit(observation, **kwargs) signature that require metadata - access. - :data_visitors List of methods that implement the - visit(observation, **kwargs) signature that require data access.""" - executors = [] - if storage_name.is_valid(): - if mc.TaskType.SCRAPE in self.task_types: - cred_param = None - cadc_data_client = None - caom_repo_client = None - else: - subject, cred_param = self._define_subject() - cadc_data_client = CadcDataClient(subject) - caom_repo_client = CAOM2RepoClient( - subject, self.config.logging_level, - self.config.resource_id) - for task_type in self.task_types: - self.logger.debug(task_type) - if task_type == mc.TaskType.SCRAPE: - if self.config.use_local_files: - executors.append( - Scrape(self.config, storage_name, - command_name)) - else: - raise mc.CadcException( - 'use_local_files must be True with ' - 'Task Type "SCRAPE"') - elif task_type == mc.TaskType.STORE: - if self.config.use_local_files: - executors.append( - StoreClient( - self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client)) - else: - raise mc.CadcException( - 'use_local_files must be True with ' - 'Task Type "STORE"') - elif task_type == mc.TaskType.INGEST: - observation = CaomExecute.repo_cmd_get_client( - caom_repo_client, self.config.collection, - storage_name.obs_id) - if observation is None: - if self.config.use_local_files: - executors.append( - LocalMetaCreateClient( - self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client, meta_visitors)) - else: - executors.append(MetaCreateClient( - self.config, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client, - meta_visitors)) - else: - if self.config.use_local_files: - if (self.chooser is not None and - self.chooser.needs_delete(observation)): - executors.append( - LocalMetaDeleteCreateClient( - self.config, storage_name, - command_name, - cred_param, cadc_data_client, - caom_repo_client, observation, - meta_visitors)) - else: - executors.append( - LocalMetaUpdateClient( - self.config, storage_name, - command_name, - cred_param, cadc_data_client, - caom_repo_client, observation, - meta_visitors)) - else: - if (self.chooser is not None and - self.chooser.needs_delete(observation)): - executors.append( - MetaDeleteCreateClient( - self.config, storage_name, - command_name, - cred_param, cadc_data_client, - caom_repo_client, observation, - meta_visitors)) - else: - executors.append( - MetaUpdateClient( - self.config, storage_name, - command_name, cred_param, - cadc_data_client, caom_repo_client, - observation, meta_visitors)) - elif task_type == mc.TaskType.MODIFY: - if self.config.use_local_files: - if (executors is not None and len(executors) > 0 and - isinstance( - executors[0], Scrape)): - executors.append( - DataScrape(self.config, - storage_name, - command_name, - data_visitors)) - else: - executors.append( - LocalDataClient( - self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client, data_visitors)) - else: - executors.append(DataClient( - self.config, storage_name, - command_name, cred_param, - cadc_data_client, caom_repo_client, data_visitors, - mc.TaskType.MODIFY)) - elif task_type == mc.TaskType.VISIT: - executors.append(ClientVisit( - self.config, storage_name, cred_param, - cadc_data_client, caom_repo_client, meta_visitors)) - elif task_type == mc.TaskType.REMOTE: - observation = CaomExecute.repo_cmd_get_client( - caom_repo_client, self.config.collection, - storage_name.obs_id) - if observation is None: - if self.config.use_local_files: - executors.append( - LocalMetaCreateClientRemoteStorage( - self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client, meta_visitors)) - else: - raise mc.CadcException( - 'use_local_files must be True with ' - 'Task Type "REMOTE"') - else: - if self.config.use_local_files: - executors.append( - LocalMetaUpdateClientRemoteStorage( - self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client, observation, - meta_visitors)) - else: - raise mc.CadcException( - 'use_local_files must be True with ' - 'Task Type "REMOTE"') - elif task_type == mc.TaskType.PULL: - executors.append( - PullClient(self.config, storage_name, command_name, - cred_param, cadc_data_client, - caom_repo_client)) - else: - raise mc.CadcException( - 'Do not understand task type {}'.format(task_type)) - if (self.config.use_local_files and - mc.TaskType.SCRAPE not in self.task_types and - mc.TaskType.REMOTE not in self.task_types): - executors.append( - CompareChecksumClient( - self.config, storage_name, command_name, - cred_param, cadc_data_client, caom_repo_client)) - else: - logging.error('{} failed naming validation check.'.format( - storage_name.obs_id)) - self.capture_failure(storage_name.obs_id, - storage_name.file_name, - 'Invalid observation ID') - return executors - - def capture_failure(self, obs_id, file_name, e): - """Log an error message to the failure file. - :obs_id observation ID being processed - :file_name file name being processed - :e Exception to log""" - if self.config.log_to_file: - failure = open(self.failure_fqn, 'a') - try: - min_error = self._minimize_error_message(e) - failure.write( - '{} {} {} {}\n'.format(datetime.now(), obs_id, file_name, - min_error)) - finally: - failure.close() - - retry = open(self.retry_fqn, 'a') - try: - if (self.config.features.use_file_names or - self.config.use_local_files): - retry.write('{}\n'.format(file_name)) - else: - retry.write('{}\n'.format(obs_id)) - finally: - retry.close() - - def capture_success(self, obs_id, file_name): - """Capture, with a timestamp, the successful observations/file names - that have been processed. - :obs_id observation ID being processed - :file_name file name being processed""" - self.success_count += 1 - if self.config.log_to_file: - success = open(self.success_fqn, 'a') - try: - success.write( - '{} {} {}\n'.format(datetime.now(), obs_id, file_name)) - logging.info('Progress - processed {} of {} records.'.format( - self.success_count, self.complete_record_count)) - finally: - success.close() - - def _define_subject(self): - """Common code to figure out which credentials to use when - creating an instance of the CadcDataClient and the CAOM2Repo client.""" - if (self.config.proxy_fqn is not None and os.path.exists( - self.config.proxy_fqn)): - logging.debug('Using proxy certificate {} for credentials.'.format( - self.config.proxy_fqn)) - subject = net.Subject(username=None, - certificate=self.config.proxy_fqn) - cred_param = '--cert {}'.format(self.config.proxy_fqn) - elif (self.config.netrc_file is not None and os.path.exists( - self.config.netrc_file)): - logging.debug('Using netrc file {} for credentials.'.format( - self.config.netrc_file)) - subject = net.Subject(username=None, certificate=None, - netrc=self.config.netrc_file) - cred_param = '--netrc {}'.format(self.config.netrc_file) - else: - subject = None - cred_param = '' - logging.warning( - 'No credentials provided (proxy certificate or netrc file).') - return subject, cred_param - - @staticmethod - def _minimize_error_message(e): - """Turn the long-winded stack trace into something minimal that lends - itself to awk.""" - if 'Read timed out' in e: - return 'Read timed out' - elif 'failed to load external entity' in e: - return 'caom2repo xml error' - elif 'Did not retrieve' in e: - return 'cadc-data get error' - elif 'NAXES was not set' in e: - return 'NAXES was not set' - elif 'Invalid SpatialWCS' in e: - return 'Invalid SpatialWCS' - elif 'getProxyCertficate failed' in e: - return 'getProxyCertificate failed' - elif 'AlreadyExistsException' in e: - return 'already exists' - elif 'Could not find the file' in e: - return 'cadc-data info failed' - elif 'md5sum not the same' in e: - return 'md5sum not the same' - elif 'Start tag expected' in e: - return 'XML Syntax Exception' - elif 'failed to compute metadata' in e: - return 'Failed to compute metadata' - elif 'reset by peer' in e: - return 'Connection reset by peer' - elif 'ConnectTimeoutError' in e: - return 'Connection to host timed out' - elif 'FileNotFoundError' in e: - return 'No such file or directory' - elif 'Must set a value of' in e: - return 'Value Error' - elif 'This does not look like a FITS file' in e: - return 'Not a FITS file' - elif 'invalid Polygon: segment intersect' in e: - return 'Segment intersect in polygon' - elif 'Could not read observation record' in e: - return 'Observation not found' - elif 'Broken pipe' in e: - return 'Broken pipe' - else: - return str(e) - - -def _set_up_file_logging(config, storage_name): - """Configure logging to a separate file for each entry being processed.""" - log_h = None - if config.log_to_file: - log_fqn = os.path.join(config.working_directory, - storage_name.log_file) - if config.log_file_directory is not None: - log_fqn = os.path.join(config.log_file_directory, - storage_name.log_file) - log_h = logging.FileHandler(log_fqn) - formatter = logging.Formatter( - '%(asctime)s:%(levelname)s:%(name)-12s:%(lineno)d:%(message)s') - log_h.setLevel(config.logging_level) - log_h.setFormatter(formatter) - logging.getLogger().addHandler(log_h) - return log_h - - -def _unset_file_logging(config, log_h): - """Turn off the logging to the separate file for each entry being - processed.""" - if config.log_to_file: - logging.getLogger().removeHandler(log_h) - - -def _do_one(config, organizer, storage_name, command_name, meta_visitors, - data_visitors): - """Process one entry. - :param config mc.Config - :param organizer instance of OrganizeExecutes - for calling the choose - method. - :param storage_name instance of StorageName for the collection - :param command_name extension of fits2caom2 for the collection - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - """ - log_h = _set_up_file_logging(config, storage_name) - try: - executors = organizer.choose(storage_name, command_name, - meta_visitors, data_visitors) - for executor in executors: - logging.info('Step {} for {}'.format( - executor.task_type, storage_name.obs_id)) - executor.execute(context=None) - if len(executors) > 0: - organizer.capture_success(storage_name.obs_id, - storage_name.file_name) - return 0 - else: - logging.info('No executors for {}'.format( - storage_name.obs_id)) - return -1 # cover the case where file name validation fails - except Exception as e: - organizer.capture_failure(storage_name.obs_id, - storage_name.file_name, - e=traceback.format_exc()) - logging.info('Execution failed for {} with {}'.format( - storage_name.obs_id, e)) - logging.debug(traceback.format_exc()) - return -1 - finally: - _unset_file_logging(config, log_h) - - -def _run_by_file_list(config, organizer, sname, command_name, proxy, - meta_visitors, data_visitors, entry): - """Process an entry from a list of files. Creates the correct instance - of the StorageName extension, based on Config values. - - :param config mc.Config - :param organizer instance of OrganizeExecutes - for calling the choose - method. - :param sname which extension of StorageName to instantiate for the - collection - :param command_name extension of fits2caom2 for the collection - :param proxy Certificate proxy. - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param entry what is being processed. - """ - if config.features.use_file_names: - if config.use_local_files: - storage_name = sname(file_name=entry, fname_on_disk=entry) - else: - storage_name = sname(file_name=entry) - else: - if config.use_local_files: - storage_name = sname(file_name=entry, fname_on_disk=entry) - else: - storage_name = sname(obs_id=entry) - logging.info('Process observation id {} as {}'.format( - storage_name.obs_id, storage_name.file_name)) - config.proxy_fqn = proxy - _do_one(config, organizer, storage_name, command_name, - meta_visitors, data_visitors) - - -def _run_todo_file(config, organizer, sname, command_name, proxy, - meta_visitors, data_visitors): - """Process all entries listed in a file. - - :param config mc.Config - :param organizer instance of OrganizeExecutes - for calling the choose - method. - :param sname which extension of StorageName to instantiate for the - collection - :param command_name extension of fits2caom2 for the collection - :param proxy Certificate proxy. - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - """ - with open(organizer.todo_fqn) as f: - todo_list_length = sum(1 for _ in f) - organizer.complete_record_count = todo_list_length - with open(organizer.todo_fqn) as f: - for line in f: - _run_by_file_list(config, organizer, sname, command_name, - proxy, meta_visitors, data_visitors, - line.strip()) - - -def _run_local_files(config, organizer, sname, command_name, proxy, - meta_visitors, data_visitors, chooser): - """Process all entries located in the current working directory. - - :param config mc.Config - :param organizer instance of OrganizeExecutes - for calling the choose - method. - :param sname which extension of StorageName to instantiate for the - collection - :param command_name extension of fits2caom2 for the collection - :param proxy Certificate proxy. - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param chooser OrganizeChooser access to collection-specific rules - """ - file_list = os.listdir(config.working_directory) - temp_list = [] - for f in file_list: - if f.endswith('.fits') or f.endswith('.fits.gz'): - if chooser is not None and chooser.use_compressed(): - if f.endswith('.fits'): - temp_list.append('{}.gz'.format(f)) - else: - temp_list.append(f) - else: - if f.endswith('.fits.gz'): - temp_list.append(f.replace('.gz', '')) - else: - temp_list.append(f) - elif f.endswith('.header'): - temp_list.append(f) - - # make the entries unique - todo_list = list(set(temp_list)) - organizer.complete_record_count = len(todo_list) - for do_file in todo_list: - _run_by_file_list(config, organizer, sname, command_name, - proxy, meta_visitors, data_visitors, do_file) - - if config.need_to_retry(): - for count in range(0, config.retry_count): - logging.warning( - 'Beginning retry {} in {}'.format(count + 1, os.getcwd())) - config.update_for_retry(count) - - # make another file list - temp_list = mc.read_from_file(config.work_fqn) - todo_list = [] - for ii in temp_list: - # because the entries in retry aren't compressed names - todo_list.append('{}.gz'.format(ii.strip())) - organizer = OrganizeExecutes(config, chooser) - organizer.complete_record_count = len(todo_list) - logging.info('Retry {} entries'.format( - organizer.complete_record_count)) - for redo_file in todo_list: - try: - _run_by_file_list(config, organizer, sname, command_name, - proxy, meta_visitors, data_visitors, - redo_file.strip()) - except Exception as e: - logging.error(e) - if not config.need_to_retry(): - break - logging.warning('Done retry attempts.') - - -def _run_by_file(config, storage_name, command_name, proxy, meta_visitors, - data_visitors, chooser=None): - """Process all entries by file name. The file names may be obtained - from the Config todo entry, from the --todo parameter, or from listing - files on local disk. - - :param config configures the execution of the application - :param storage_name which extension of StorageName to instantiate for the - collection - :param command_name extension of fits2caom2 for the collection - :param proxy Certificate proxy. - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - """ - try: - if config.use_local_files: - logging.debug( - 'Using files from {}'.format(config.working_directory)) - organize = OrganizeExecutes(config, chooser) - _run_local_files(config, organize, storage_name, command_name, - proxy, meta_visitors, data_visitors, chooser) - else: - parser = ArgumentParser() - parser.add_argument('--todo', - help='Fully-qualified todo file name.') - args = parser.parse_args() - if args.todo is not None: - logging.debug('Using entries from todo file {}'.format( - args.todo)) - organize = OrganizeExecutes(config, chooser, args.todo) - else: - logging.debug('Using entries from file {}'.format( - config.work_file)) - organize = OrganizeExecutes(config, chooser) - _run_todo_file( - config, organize, storage_name, command_name, - proxy, meta_visitors, data_visitors) - if config.need_to_retry(): - for count in range(0, config.retry_count): - logging.warning('Beginning retry {}'.format(count + 1)) - config.update_for_retry(count) - try: - _run_by_file(config, storage_name, command_name, proxy, - meta_visitors, data_visitors, chooser) - except Exception as e: - logging.error(e) - if not config.need_to_retry(): - break - logging.warning('Done retry attempts.') - - logging.info('Done, processed {} of {} correctly.'.format( - organize.success_count, organize.complete_record_count)) - except Exception as e: - logging.error(e) - tb = traceback.format_exc() - logging.debug(tb) - - -def run_by_file(storage_name, command_name, collection, proxy, meta_visitors, - data_visitors, chooser=None, archive=None): - """Process all entries by file name. The file names may be obtained - from the Config todo entry, from the --todo parameter, or from listing - files on local disk. - - :param storage_name which extension of StorageName to instantiate for the - collection - :param command_name extension of fits2caom2 for the collection - :param collection string which indicates which collection CAOM instances - are being created for - :param proxy Certificate proxy. - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param chooser OrganizeChooser instance for detailed CaomExecute - descendant choices - :param archive which ad storage files exist in. Defaults to collection - if not set. - """ - try: - config = mc.Config() - config.get_executors() - config.collection = collection - if archive is not None: - config.archive = archive - else: - config.archive = collection - logging.debug(config) - logger = logging.getLogger() - logger.setLevel(config.logging_level) - config.features.supports_composite = False - _run_by_file(config, storage_name, command_name, proxy, meta_visitors, - data_visitors, chooser) - return 0 - except Exception as e: - logging.error(e) - tb = traceback.format_exc() - logging.debug(tb) - return -1 - - -def run_single(config, storage_name, command_name, meta_visitors, - data_visitors, chooser=None): - """Process a single entry by StorageName detail. - - :param config mc.Config - :param storage_name instance of StorageName for the collection - :param command_name extension of fits2caom2 for the collection - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param chooser OrganizeChooser instance for detailed CaomExecute - descendant choices - """ - organizer = OrganizeExecutes(config, chooser) - result = _do_one(config, organizer, storage_name, - command_name, meta_visitors, data_visitors) - sys.exit(result) - - -def run_single_from_state(organizer, config, storage_name, command_name, - meta_visitors, data_visitors): - """Process a single entry by StorageName detail. No sys.exit call. - - :param config mc.Config - :param storage_name instance of StorageName for the collection - :param command_name extension of fits2caom2 for the collection - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param organizer single organizer instance, maintains log records. - """ - result = _do_one(config, organizer, storage_name, - command_name, meta_visitors, data_visitors) - logging.info('Result is {} for {}'.format(result, storage_name.file_name)) - return result - - -def _run_from_state(config, sname, command_name, meta_visitors, data_visitors, - todo): - """Process a list of entries by StorageName detail. No sys.exit call. - - :param config mc.Config - :param command_name extension of fits2caom2 for the collection - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param todo list of work to be done, as URLs to files. - """ - organizer = OrganizeExecutes(config, chooser=None) - for url in todo: - storage_name = sname(url=url) - result = _do_one(config, organizer, storage_name, - command_name, meta_visitors, data_visitors) - logging.info( - 'Result is {} for {}'.format(result, storage_name.file_name)) - - if config.need_to_retry(): - for count in range(0, config.retry_count): - logging.warning( - 'Beginning retry {} in {}'.format(count + 1, os.getcwd())) - config.update_for_retry(count) - temp_list = mc.read_from_file(config.work_fqn) - todo_list = [] - for ii in temp_list: - todo_list.append(sname.make_url_from_file_name(ii)) - organizer = OrganizeExecutes(config, chooser=None) - organizer.complete_record_count = len(todo_list) - logging.info('Retry {} entries'.format( - organizer.complete_record_count)) - for redo_url in todo_list: - try: - storage_name = sname(url=redo_url) - _do_one(config, organizer, storage_name, command_name, - meta_visitors, data_visitors) - except Exception as e: - logging.error(e) - if not config.need_to_retry(): - break - logging.warning('Done retry attempts.') - - -def run_from_state(config, sname, command_name, meta_visitors, data_visitors, - todo): - """Process a list of entries by StorageName detail. No sys.exit call. - - :param config mc.Config - :param command_name extension of fits2caom2 for the collection - :param meta_visitors List of metadata visit methods. - :param data_visitors List of data visit methods. - :param todo list of work to be done, as URLs to files. - """ - try: - _run_from_state(config, sname, command_name, meta_visitors, - data_visitors, todo) - return 0 - except Exception as e: - logging.error(e) - tb = traceback.format_exc() - logging.error(tb) - return -1 diff --git a/caom2pipe/caom2pipe/manage_composable.py b/caom2pipe/caom2pipe/manage_composable.py deleted file mode 100644 index 5ee1408f..00000000 --- a/caom2pipe/caom2pipe/manage_composable.py +++ /dev/null @@ -1,1196 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import csv -import logging -import os -import requests -import subprocess -import yaml - -from datetime import datetime -from enum import Enum -from hashlib import md5 -from io import BytesIO -from os import stat -from requests.adapters import HTTPAdapter -from urllib import parse as parse -from urllib3 import Retry - -from cadcutils import net -from cadcdata import CadcDataClient -from caom2 import ObservationWriter, ObservationReader, Artifact -from caom2 import ChecksumURI - - -__all__ = ['CadcException', 'Config', 'State', 'to_float', 'TaskType', - 'exec_cmd', 'exec_cmd_redirect', 'exec_cmd_info', - 'get_cadc_meta', 'get_file_meta', 'compare_checksum', - 'decompose_lineage', 'check_param', 'read_csv_file', - 'write_obs_to_file', 'read_obs_from_file', - 'compare_checksum_client', 'Features', 'write_to_file', - 'read_from_file', 'read_file_list_from_archive', 'update_typed_set', - 'get_cadc_headers', 'get_lineage', 'get_artifact_metadata', - 'data_put', 'data_get', 'build_uri'] - - -class CadcException(Exception): - """Generic exception raised by failure cases within the caom2pipe - module.""" - pass - - -class Features(object): - """Boolean feature flag implementation.""" - - def __init__(self): - self.use_file_names = True - self.run_in_airflow = True - self.supports_composite = True - self.supports_catalog = True - self.expects_retry = True - - @property - def use_file_names(self): - """If true, the lists of work to be done are expected to - identify file names. If false, they are expected to identify - observation IDs.""" - return self._use_file_names - - @use_file_names.setter - def use_file_names(self, value): - self._use_file_names = value - - @property - def run_in_airflow(self): - """If true, will treat command-line arguments as if the application - is running in airflow.""" - return self._run_in_airflow - - @run_in_airflow.setter - def run_in_airflow(self, value): - self._run_in_airflow = value - - @property - def supports_composite(self): - """If true, will execute any specific code for composite observation - definition.""" - return self._supports_composite - - @supports_composite.setter - def supports_composite(self, value): - self._supports_composite = value - - @property - def supports_catalog(self): - """If true, will execute any specific code for catalog handling - when creating a CAOM instance.""" - return self._supports_catalog - - @supports_catalog.setter - def supports_catalog(self, value): - self._supports_catalog = value - - @property - def expects_retry(self): - """If true, will execute any specific code for running retries - based on retries_log.txt content.""" - return self._expects_retry - - @expects_retry.setter - def expects_retry(self, value): - self._expects_retry = value - - def __str__(self): - return ' '.join( - '{} {}'.format(ii, getattr(self, ii)) for ii in vars(self)) - - -class TaskType(Enum): - """The possible steps in a Collection pipeline. A short-hand, user-facing - way to identify the work to be done by a pipeline.""" - STORE = 'store' # store a local file to ad - SCRAPE = 'scrape' # local CAOM instance creation, no network required - INGEST = 'ingest' # create a CAOM instance from metadata only - MODIFY = 'modify' # modify a CAOM instance from data - CHECKSUM = 'checksum' # is the checksum on local disk the same as in ad? - VISIT = 'visit' # visit an observation - # remote file storage, create CAOM instance via local metadata - REMOTE = 'remote' - # retrieve file via HTTP to local temp storage, store to ad - PULL = 'pull' - - -class State(object): - """Persist information between pipeline invocations. - - Currently the State class persists the concept of a bookmark, which is the - place in the flow of data that was last processed. This 'place' may be a - timestamp, or an id. That value is up to clients of this class. - """ - - def __init__(self, fqn): - self.fqn = fqn - self.bookmarks = {} - self.logger = logging.getLogger('State') - result = read_as_yaml(self.fqn) - if result is None: - raise CadcException('Could not load state from {}'.format(fqn)) - else: - self.bookmarks = result.get('bookmarks') - self.content = result - - def get_bookmark(self, key): - """Lookup for last_record key.""" - result = None - if key in self.bookmarks: - if 'last_record' in self.bookmarks[key]: - result = self.bookmarks[key]['last_record'] - else: - self.logger.warning('No record found for {}'.format(key)) - else: - self.logger.warning('No bookmarks found for {}'.format(key)) - return result - - def save_state(self, key, value): - """Write the current state as a YAML file. - :param key which record is being updated - :param value the value to update the record with - """ - if key in self.bookmarks: - if 'last_record' in self.bookmarks[key]: - self.bookmarks[key]['last_record'] = value - write_as_yaml(self.content, self.fqn) - else: - self.logger.warning('No record found for {}'.format(key)) - else: - self.logger.warning('No bookmarks found for {}'.format(key)) - - -class Config(object): - """Configuration information that remains the same for all steps and all - work in a pipeline execution.""" - - def __init__(self): - self.working_directory = None - self.work_file = None - # the fully qualified name for the work file - self.work_fqn = None - self.netrc_file = None - self.archive = None - self.collection = None - self.use_local_files = False - self.resource_id = None - self.tap_id = None - self.logging_level = None - self.log_to_file = False - self.log_file_directory = None - self.stream = None - self.storage_host = None - self.task_types = None - self.success_log_file_name = None - # the fully qualified name for the file - self.success_fqn = None - self.failure_log_file_name = None - # the fully qualified name for the file - self.failure_fqn = None - self.retry_file_name = None - # the fully qualified name for the file - self.retry_fqn = None - self.retry_failures = False - self.retry_count = 1 - self.proxy_file_name = None - # the fully qualified name for the file - self.proxy_fqn = None - self.state_file_name = None - # the fully qualified name for the file - self.state_fqn = None - self.features = Features() - - @property - def working_directory(self): - """the root directory for all executor operations""" - return self._working_directory - - @working_directory.setter - def working_directory(self, value): - self._working_directory = value - - @property - def work_file(self): - """ the file that contains the list of work to be passed through the - pipeline""" - return self._work_file - - @work_file.setter - def work_file(self, value): - self._work_file = value - if self.working_directory is not None: - self.work_fqn = os.path.join( - self.working_directory, self.work_file) - - @property - def netrc_file(self): - """credentials for any service calls""" - return self._netrc_file - - @netrc_file.setter - def netrc_file(self, value): - self._netrc_file = value - - @property - def collection(self): - """which collection is addressed by the pipeline""" - return self._collection - - @collection.setter - def collection(self, value): - self._collection = value - - @property - def archive(self): - """which archive is addressed by the pipeline""" - return self._archive - - @archive.setter - def archive(self, value): - self._archive = value - - @property - def use_local_files(self): - """changes expectations of the executors for handling files on disk""" - return self._use_local_files - - @use_local_files.setter - def use_local_files(self, value): - self._use_local_files = value - - @property - def resource_id(self): - """which service instance to use""" - return self._resource_id - - @resource_id.setter - def resource_id(self, value): - self._resource_id = value - - @property - def tap_id(self): - """which tap service instance to use""" - return self._tap_id - - @tap_id.setter - def tap_id(self, value): - self._tap_id = value - - @property - def log_to_file(self): - """boolean - write the log to a file?""" - return self._log_to_file - - @log_to_file.setter - def log_to_file(self, value): - self._log_to_file = value - - @property - def log_file_directory(self): - """where log files are written to - defaults to working_directory""" - return self._log_file_directory - - @log_file_directory.setter - def log_file_directory(self, value): - self._log_file_directory = value - - @property - def logging_level(self): - """the logging level - enforced throughout the pipeline""" - return self._logging_level - - @logging_level.setter - def logging_level(self, value): - lookup = {'DEBUG': logging.DEBUG, - 'INFO': logging.INFO, - 'WARNING': logging.WARNING, - 'ERROR': logging.ERROR} - if value in lookup: - self._logging_level = lookup[value] - - @property - def stream(self): - """the ad 'stream' that goes with the archive - use when storing - files""" - return self._stream - - @stream.setter - def stream(self, value): - self._stream = value - - @property - def storage_host(self): - """the ad 'host' to store files to - used for testing cadc-data put - commands only, should usually be None""" - return self._storage_host - - @storage_host.setter - def storage_host(self, value): - self._storage_host = value - - @property - def task_type(self): - """the way to control which steps get executed""" - return self._task_type - - @task_type.setter - def task_type(self, value): - self._task_type = value - - @property - def success_log_file_name(self): - """the filename where success logs are written, this will be created - in log_file_directory""" - return self._success_log_file_name - - @success_log_file_name.setter - def success_log_file_name(self, value): - self._success_log_file_name = value - if self.log_file_directory is not None: - self.success_fqn = os.path.join( - self.log_file_directory, self.success_log_file_name) - - @property - def failure_log_file_name(self): - """the filename where failure logs are written this will be created - in log_file_directory""" - return self._failure_log_file_name - - @failure_log_file_name.setter - def failure_log_file_name(self, value): - self._failure_log_file_name = value - if self.log_file_directory is not None: - self.failure_fqn = os.path.join( - self.log_file_directory, self.failure_log_file_name) - - @property - def retry_file_name(self): - """the filename where retry entries are written this will be created - in log_file_directory""" - return self._retry_file_name - - @retry_file_name.setter - def retry_file_name(self, value): - self._retry_file_name = value - if self.log_file_directory is not None: - self.retry_fqn = os.path.join( - self.log_file_directory, self.retry_file_name) - - @property - def retry_failures(self): - """Will the application retry the entries in the - retries.txt file? If True, the application will attempt to re-run - the work to do for each entry in the retries.txt file. If False, - it will do nothing.""" - return self._retry_failures - - @retry_failures.setter - def retry_failures(self, value): - self._retry_failures = value - - @property - def retry_count(self): - """how many times the application will retry the entries in the - retries.txt file.""" - return self._retry_count - - @retry_count.setter - def retry_count(self, value): - self._retry_count = value - - @property - def proxy_file_name(self): - """If using a proxy certificate for authentication, identify the - fully-qualified pathname here.""" - return self._proxy_file_name - - @proxy_file_name.setter - def proxy_file_name(self, value): - self._proxy_file_name = value - if (self.working_directory is not None and - self.proxy_file_name is not None): - self.proxy_fqn = os.path.join( - self.working_directory, self.proxy_file_name) - - @property - def state_file_name(self): - """If using a state file to communicate persistent information between - invocations, identify the fully-qualified pathname here.""" - return self._state_file_name - - @state_file_name.setter - def state_file_name(self, value): - self._state_file_name = value - if (self.working_directory is not None and - self.state_file_name is not None): - self.state_fqn = os.path.join( - self.working_directory, self.state_file_name) - - @property - def features(self): - """Feature flag setting access.""" - return self._features - - @features.setter - def features(self, value): - self._features = value - - @staticmethod - def _lookup(config, lookup, default): - if lookup in config: - result = config[lookup] - else: - result = default - return result - - def __str__(self): - return 'working_directory:: \'{}\' ' \ - 'work_fqn:: \'{}\' ' \ - 'netrc_file:: \'{}\' ' \ - 'archive:: \'{}\' ' \ - 'collection:: \'{}\' ' \ - 'task_types:: \'{}\' ' \ - 'stream:: \'{}\' ' \ - 'resource_id:: \'{}\' ' \ - 'tap_id:: \'{}\' ' \ - 'use_local_files:: \'{}\' ' \ - 'log_to_file:: \'{}\' ' \ - 'log_file_directory:: \'{}\' ' \ - 'success_log_file_name:: \'{}\' ' \ - 'success_fqn:: \'{}\' ' \ - 'failure_log_file_name:: \'{}\' ' \ - 'failure_fqn:: \'{}\' ' \ - 'retry_file_name:: \'{}\' ' \ - 'retry_fqn:: \'{}\' ' \ - 'retry_failures:: \'{}\' ' \ - 'retry_count:: \'{}\' ' \ - 'proxy_file:: \'{}\' ' \ - 'state_fqn:: \'{}\' ' \ - 'features:: \'{}\' ' \ - 'logging_level:: \'{}\''.format( - self.working_directory, self.work_fqn, self.netrc_file, - self.archive, self.collection, self.task_types, self.stream, - self.resource_id, - self.tap_id, self.use_local_files, self.log_to_file, - self.log_file_directory, self.success_log_file_name, - self.success_fqn, self.failure_log_file_name, - self.failure_fqn, self.retry_file_name, self.retry_fqn, - self.retry_failures, self.retry_count, self.proxy_fqn, - self.state_fqn, self.features, self.logging_level) - - @staticmethod - def _obtain_task_types(config, default=None): - """Make the configuration file entries into the Enum.""" - task_types = [] - if 'task_types' in config: - for ii in config['task_types']: - task_types.append(TaskType(ii)) - return task_types - else: - return default - - @staticmethod - def _obtain_features(config): - """Make the configuration file entries into the class members.""" - feature_flags = Features() - if 'features' in config: - for ii in config['features']: - if not config['features'][ii]: - getattr(feature_flags, ii) - setattr(feature_flags, ii, False) - return feature_flags - - def get(self): - """Look up the configuration values in the data structure extracted - from the configuration file.""" - return self.get_executors() - - def get_executors(self): - """Look up the configuration values in the data structure extracted - from the configuration file. - - Consider this deprecated - use get instead, because the name is - non-representative of the work being done. - """ - try: - config = self.get_config() - self.working_directory = \ - self._lookup(config, 'working_directory', os.getcwd()) - self.work_file = self._lookup(config, 'todo_file_name', 'todo.txt') - self.netrc_file = \ - self._lookup(config, 'netrc_filename', 'test_netrc') - self.resource_id = self._lookup( - config, 'resource_id', 'ivo://cadc.nrc.ca/sc2repo') - self.tap_id = self._lookup( - config, 'tap_id', 'ivo://cadc.nrc.ca/sc2tap') - self.use_local_files = bool( - self._lookup(config, 'use_local_files', False)) - self.logging_level = self._lookup(config, 'logging_level', 'DEBUG') - self.log_to_file = self._lookup(config, 'log_to_file', False) - self.log_file_directory = self._lookup( - config, 'log_file_directory', self.working_directory) - self.stream = self._lookup(config, 'stream', 'raw') - self.task_types = self._obtain_task_types( - config, [TaskType.SCRAPE]) - self.collection = self._lookup(config, 'collection', 'TEST') - self.archive = self._lookup(config, 'archive', self.collection) - self.success_log_file_name = self._lookup(config, - 'success_log_file_name', - 'success_log.txt') - self.failure_log_file_name = self._lookup(config, - 'failure_log_file_name', - 'failure_log.txt') - self.retry_file_name = self._lookup(config, 'retry_file_name', - 'retries.txt') - self.retry_failures = self._lookup(config, 'retry_failures', False) - self.retry_count = self._lookup(config, 'retry_count', 1) - self.features = self._obtain_features(config) - self.proxy_file_name = self._lookup( - config, 'proxy_file_name', None) - self.state_file_name = self._lookup( - config, 'state_file_name', None) - except KeyError as e: - raise CadcException( - 'Error in config file {}'.format(e)) - - def get_config(self): - """Return a configuration dictionary. Assumes a file named config.yml - in the current working directory.""" - config_fqn = os.path.join(os.getcwd(), 'config.yml') - config = self.load_config(config_fqn) - if config is None: - raise CadcException( - 'Could not find the file {}'.format(config_fqn)) - return config - - def need_to_retry(self): - """Evaluate the need to have the pipeline try to re-execute for any - files/observations that have been logged as failures. - - If log_to_file is not set to True, there is no retry file content - to retry on. - - :param config does the configuration identify retry information? - :return True if the configuration and logging information indicate a - need to attempt to retry the pipeline execution for any entries. - """ - result = True - if (self.features is not None and self.features.expects_retry and - self.retry_failures and self.log_to_file): - meta = get_file_meta(self.retry_fqn) - if meta['size'] == 0: - logging.info('Checked the retry file {}. There are no logged ' - 'failures.'.format(self.retry_fqn)) - result = False - else: - result = False - return result - - def update_for_retry(self, count): - """ - When retrying, the application will: - - - use the retries.txt file as the todo list - - retry as many times as the 'retry count' in the config.yml file. - - make a new log directory, in the working directory, with the name - logs_{retry_count}. Any failures for the retry execution that - need to be logged will be logged here. - - in the new log directory, make a new .xml file for the - output, with the name {obs_id}.xml - - :param count the current retry iteration - """ - self.work_file = '{}'.format(self.retry_file_name) - self.work_fqn = self.retry_fqn - if '_' in self.log_file_directory: - temp = self.log_file_directory.split('_')[0] - self.log_file_directory = '{}_{}'.format(temp, count) - else: - self.log_file_directory = '{}_{}'.format( - self.log_file_directory, count) - # reset the location of the log file names - self.success_log_file_name = self.success_log_file_name - self.failure_log_file_name = self.failure_log_file_name - self.retry_file_name = self.retry_file_name - - logging.info('Retry work file is {}'.format(self.work_fqn)) - - @staticmethod - def load_config(config_fqn): - """Read a configuration as a YAML file. - :param config_fqn the fully qualified name for the configuration - file. - """ - try: - logging.debug('Begin load_config.') - with open(config_fqn) as f: - data_map = yaml.safe_load(f) - logging.debug('End load_config.') - return data_map - except (yaml.scanner.ScannerError, FileNotFoundError) as e: - logging.error(e) - return None - - -def to_float(value): - """Cast to float, without throwing an exception.""" - return float(value) if value is not None else None - - -def exec_cmd(cmd): - """ - This does command execution as a subprocess call. - - :param cmd the text version of the command being executed - :return None - """ - logging.debug(cmd) - cmd_array = cmd.split() - try: - child = subprocess.Popen(cmd_array, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - output, outerr = child.communicate() - logging.debug('stdout {}'.format(output.decode('utf-8'))) - logging.debug('stderr {}'.format(outerr.decode('utf-8'))) - if child.returncode != 0: - logging.debug('Command {} failed.'.format(cmd)) - raise CadcException( - 'Command {} had stdout{} stderr {}'.format( - cmd, output.decode('utf-8'), outerr.decode('utf-8'))) - except Exception as e: - logging.debug('Error with command {}:: {}'.format(cmd, e)) - raise CadcException('Could not execute cmd {}. ' - 'Exception {}'.format(cmd, e)) - - -def exec_cmd_info(cmd): - """ - This does command execution as a subprocess call. - - :param cmd the text version of the command being executed - :return The text from stdout. - """ - logging.debug(cmd) - cmd_array = cmd.split() - try: - output, outerr = subprocess.Popen(cmd_array, stdout=subprocess.PIPE, - stderr=subprocess.PIPE).communicate() - if outerr is not None and len(outerr) > 0 and outerr[0] is not None: - raise CadcException('Command {} had stderr {}'.format( - cmd, outerr.decode('utf-8'))) - if output is not None and len(output) > 0: - return output.decode('utf-8') - except Exception as e: - logging.debug('Error with command {}:: {}'.format(cmd, e)) - raise CadcException('Could not execute cmd {}.' - 'Exception {}'.format(cmd, e)) - - -def exec_cmd_redirect(cmd, fqn): - """ - This does command execution as a subprocess call. It redirects stdout - to fqn, and assumes binary output for the re-direct. - - :param cmd the text version of the command being executed - :param fqn the fully-qualified name of the file to which stdout is - re-directed - :return None - """ - logging.debug(cmd) - cmd_array = cmd.split() - try: - with open(fqn, 'wb') as outfile: - outerr = subprocess.Popen( - cmd_array, stdout=outfile, - stderr=subprocess.PIPE).communicate() - if (outerr is not None and len(outerr) > 0 and - outerr[0] is not None): - logging.debug('Command {} had stderr {}'.format( - cmd, outerr.decode('utf-8'))) - raise CadcException( - 'Command {} had outerr {}'.format( - cmd, outerr.decode('utf-8'))) - except Exception as e: - logging.debug('Error with command {}:: {}'.format(cmd, e)) - raise CadcException('Could not execute cmd {}.' - 'Exception {}'.format(cmd, e)) - - -def get_cadc_headers(uri): - """ - Creates the FITS headers object by fetching the FITS headers of a CADC - file. The function takes advantage of the fhead feature of the CADC - storage service and retrieves just the headers and no data, minimizing - the transfer time. - - The file must be public, because the header retrieval is done as an - anonymous user. - - :param uri: CADC URI - :return: a string of keyword/value pairs. - """ - file_url = parse.urlparse(uri) - # create possible types of subjects - subject = net.Subject() - client = CadcDataClient(subject) - # do a fhead on the file - archive, file_id = file_url.path.split('/') - b = BytesIO() - b.name = uri - client.get_file(archive, file_id, b, fhead=True) - fits_header = b.getvalue().decode('ascii') - b.close() - return fits_header - - -def get_cadc_meta(netrc_fqn, archive, fname): - """ - Gets contentType, contentLength and contentChecksum of a CADC artifact - :param netrc_fqn: user credentials - :param archive: archive file has been stored to - :param fname: name of file in the archive - :return: - """ - subject = net.Subject(username=None, certificate=None, netrc=netrc_fqn) - client = CadcDataClient(subject) - return client.get_file_info(archive, fname) - - -def get_file_meta(fqn): - """ - Gets contentType, contentLength and contentChecksum of an artifact on disk. - - :param fqn: Fully-qualified name of the file for which to get the metadata. - :return: - """ - if fqn is None or not os.path.exists(fqn): - raise CadcException('Could not find {} in get_file_meta'.format(fqn)) - meta = {} - s = stat(fqn) - meta['size'] = s.st_size - meta['md5sum'] = md5(open(fqn, 'rb').read()).hexdigest() - if fqn.endswith('.header') or fqn.endswith('.txt'): - meta['type'] = 'text/plain' - elif fqn.endswith('.csv'): - meta['type'] = 'text/csv' - elif fqn.endswith('.gif'): - meta['type'] = 'image/gif' - elif fqn.endswith('.png'): - meta['type'] = 'image/png' - elif fqn.endswith('.jpg'): - meta['type'] = 'image/jpeg' - elif fqn.endswith('tar.gz'): - meta['type'] = 'application/gzip' - else: - meta['type'] = 'application/fits' - logging.debug(meta) - return meta - - -def _check_checksums(fqn, archive, local_meta, ad_meta): - """Raise CadcException if the checksum of a file in ad is not the same as - the checksum of a file on disk. - - :param fqn: Fully-qualified name of file for which to compare metadata. - :param archive: archive file has been stored to - :param local_meta: md5 checksum for the file on disk - :param ad_meta: md5 checksum for the file in ad storage - """ - - if ((fqn.endswith('.gz') and local_meta['md5sum'] != - ad_meta['md5sum']) or ( - not fqn.endswith('.gz') and local_meta['md5sum'] != - ad_meta['umd5sum'])): - raise CadcException( - '{} md5sum not the same as the one in the ad ' - '{} archive.'.format(fqn, archive)) - - -def compare_checksum(netrc_fqn, archive, fqn): - """ - Raise CadcException if the checksum of a file in ad is not the same as - the checksum of a file on disk. - - :param netrc_fqn: fully-qualified file name for the netrc file - :param archive: archive file has been stored to - :param fqn: Fully-qualified name of the file for which to get the metadata. - """ - fname = os.path.basename(fqn) - try: - local_meta = get_file_meta(fqn) - ad_meta = get_cadc_meta(netrc_fqn, archive, fname) - except Exception as e: - raise CadcException('Could not find md5 checksum for {} in the ad {} ' - 'archive. {}'.format(fqn, archive, e)) - _check_checksums(fqn, archive, local_meta, ad_meta) - - -def compare_checksum_client(client, archive, fqn): - """ - Raise CadcException if the checksum of a file in ad is not the same as - the checksum of a file on disk. - - :param client: access to CADC data service - :param archive: archive file has been stored to - :param fqn: Fully-qualified name of the file for which to get the metadata. - """ - fname = os.path.basename(fqn) - try: - local_meta = get_file_meta(fqn) - ad_meta = client.get_file_info(archive, fname) - except Exception as e: - raise CadcException('Could not find md5 checksum for {} in the ad {} ' - 'archive. {}'.format(fqn, archive, e)) - _check_checksums(fqn, archive, local_meta, ad_meta) - - -def create_dir(dir_name): - """Create the working area if it does not already exist.""" - if not os.path.exists(dir_name): - os.mkdir(dir_name) - if not os.path.exists(dir_name): - raise CadcException( - 'Could not mkdir {}'.format(dir_name)) - if not os.access(dir_name, os.W_OK | os.X_OK): - raise CadcException( - '{} is not writeable.'.format(dir_name)) - - -def decompose_lineage(lineage): - """Returns a product id and an artifact uri from the command line.""" - try: - result = lineage.split('/', 1) - return result[0], result[1] - except Exception as e: - logging.debug('Lineage {} caused error {}. Expected ' - 'product_id/ad:ARCHIVE/FILE_NAME'.format( - lineage, e)) - raise CadcException('Expected product_id/ad:ARCHIVE/FILE_NAME') - - -def check_param(param, param_type): - """Generic code to check if a parameter is not None, and is of the - expected type.""" - if param is None or not isinstance(param, param_type): - raise CadcException( - 'Parameter {} failed check for {}'.format(param, param_type)) - - -def read_csv_file(fqn): - """Read a csv file. - - :returns a list of lists.""" - results = [] - try: - with open(fqn) as csv_file: - reader = csv.reader(csv_file) - for row in reader: - if row[0].startswith('#'): - continue - results.append(row) - except Exception as e: - logging.error('Could not read from csv file {}'.format(fqn)) - raise CadcException(e) - return results - - -def write_obs_to_file(obs, fqn): - """Common code to write a CAOM Observation to a file.""" - ow = ObservationWriter() - ow.write(obs, fqn) - - -def read_obs_from_file(fqn): - """Common code to read a CAOM Observation from a file.""" - if not os.path.exists(fqn): - raise CadcException('Could not find {}'.format(fqn)) - reader = ObservationReader(False) - return reader.read(fqn) - - -def read_from_file(fqn): - """Common code to read from a text file. Mostly to make it easy to - mock.""" - if not os.path.exists(fqn): - raise CadcException('Could not find {}'.format(fqn)) - with open(fqn, 'r') as f: - return f.readlines() - - -def write_to_file(fqn, content): - """Common code to write to a fully-qualified file name. Mostly to make - it easy to mock.""" - try: - with open(fqn, 'w') as f: - f.write(content) - except Exception: - logging.error('Could not write file {}'.format(fqn)) - raise CadcException('Could not write file {}'.format(fqn)) - - -def update_typed_set(typed_set, new_set): - """Common code to remove all the entries from an existing set, and - then replace those entries with a new set.""" - # remove the previous values - while len(typed_set) > 0: - typed_set.pop() - typed_set.update(new_set) - - -def format_time_for_query(from_time): - length = len(datetime.now().strftime('%Y-%m-%dT%H:%M:%S')) - return datetime.strptime(from_time[:length], '%Y-%m-%dT%H:%M:%S') - - -def read_file_list_from_archive(config, app_name, prev_exec_date, exec_date): - """Code to execute a time-boxed query for files that have arrived in ad. - - :param config Config instance - :param app_name Information used in the http connection for tracing - queries. - :param prev_exec_date Timestamp that indicates the beginning of the - chunk of time. Results will be > than this time. - :param exec_date Timestamp. that indicates the end of the chunk of time. - Results will be <= this time. - """ - start_time = format_time_for_query(prev_exec_date) - end_time = format_time_for_query(exec_date) - ad_resource_id = 'ivo://cadc.nrc.ca/ad' - agent = '{}/{}'.format(app_name, '1.0') - subject = net.Subject(certificate=config.proxy_fqn) - client = net.BaseWsClient(resource_id=ad_resource_id, - subject=subject, agent=agent, retry=True) - query_meta = "SELECT fileName FROM archive_files WHERE " \ - "archiveName = '{}' AND ingestDate > '{}' and " \ - "ingestDate <= '{}' ORDER BY ingestDate".format( - config.archive, start_time, end_time) - data = {'QUERY': query_meta, 'LANG': 'ADQL', 'FORMAT': 'csv'} - logging.debug('Query is {}'.format(query_meta)) - try: - response = client.get('https://{}/ad/sync?{}'.format( - client.host, parse.urlencode(data)), cert=config.proxy_fqn) - if response.status_code == 200: - # ignore the column name as the first part of the response - artifact_files_list = response.text.split()[1:] - return artifact_files_list - else: - logging.warning('No work to do. Query failure {!r}'.format( - response)) - return [] - except Exception as e: - raise CadcException('Failed ad content query: {}'.format(e)) - - -def get_lineage(archive, product_id, file_name, scheme='ad'): - """Construct an instance of the caom2gen lineage parameter. - :param archive archive name at CADC. - :param product_id CAOM2 Plane unique identifier. - :param file_name String representation of the file name. - :param scheme Usually 'ad', otherwise an indication of external storage. - :return str understood by the caom2gen application, lineage parameter - value""" - return '{}/{}:{}/{}'.format(product_id, scheme, archive, file_name) - - -def get_artifact_metadata(fqn, product_type, release_type, uri=None, - artifact=None): - """ - Build or update artifact content metadata using the CAOM2 objects, and - with access to a file on disk. - - :param fqn: The fully-qualified name of the file on disk, for which an - Artifact is being created or updated. - :param product_type: which ProductType enumeration value - :param release_type: which ReleaseType enumeration value - :param uri: mandatory if creating an Artifact, a URI of the form - scheme:ARCHIVE/file_name - :param artifact: use when updating an existing Artifact instance - - :return: the created or updated Artifact instance, with the - content_* elements filled in. - """ - local_meta = get_file_meta(fqn) - md5uri = ChecksumURI('md5:{}'.format(local_meta['md5sum'])) - if artifact is None: - if uri is None: - raise CadcException('Cannot build an Artifact without a URI.') - return Artifact(uri, product_type, release_type, local_meta['type'], - local_meta['size'], md5uri) - else: - artifact.product_type = product_type - artifact.content_type = local_meta['type'] - artifact.content_length = local_meta['size'] - artifact.content_checksum = md5uri - return artifact - - -def data_put(client, working_directory, file_name, archive, stream='raw', - mime_type=None): - """ - Make a copy of a locally available file by writing it to CADC. Assumes - file and directory locations are correct. Does a checksum comparison to - test whether the file made it to storage as it exists on disk. - - :param client: The CadcDataClient for write access to CADC storage. - :param working_directory: Where 'file_name' exists locally. - :param file_name: What to copy to CADC storage. - :param archive: Which archive to associate the file with. - :param stream: Defaults to raw - use is deprecated, however necessary it - may be at the current moment to the 'put_file' call. - :param mime_type: Because libmagic can't see inside a zipped fits file. - """ - cwd = os.getcwd() - try: - os.chdir(working_directory) - client.put_file(archive, file_name, archive_stream=stream, - mime_type=mime_type) - except Exception as e: - raise CadcException('Failed to store data with {}'.format(e)) - finally: - os.chdir(cwd) - compare_checksum_client(client, archive, - os.path.join(working_directory, file_name)) - - -def data_get(client, working_directory, file_name, archive): - """ - Retrieve a local copy of a file available from CADC. Assumes the working - directory location exists and is writeable. - - :param client: The CadcDataClient for read access to CADC storage. - :param working_directory: Where 'file_name' will be written. - :param file_name: What to copy from CADC storage. - :param archive: Which archive to retrieve the file from. - """ - fqn = os.path.join(working_directory, file_name) - try: - client.get_file(archive, file_name, destination=fqn) - if not os.path.exists(fqn): - raise CadcException( - 'Retrieve failed. {} does not exist.'.format(fqn)) - except Exception as e: - raise CadcException('Did not retrieve {} because {}'.format( - fqn, e)) - - -def build_uri(archive, file_name, scheme='ad'): - """One location to keep the syntax for an Artifact URI.""" - return '{}:{}/{}'.format(scheme, archive, file_name) - - -def query_endpoint(url, timeout=20): - """Return a response for an endpoint. Caller needs to call 'close' - on the response. - """ - - # Open the URL and fetch the JSON document for the observation - session = requests.Session() - retries = 10 - retry = Retry(total=retries, read=retries, connect=retries, - backoff_factor=0.5) - adapter = HTTPAdapter(max_retries=retry) - session.mount('http://', adapter) - session.mount('https://', adapter) - try: - response = session.get(url, timeout=timeout) - response.raise_for_status() - return response - except Exception as e: - raise CadcException('Endpoint {} failure {}'.format(url, str(e))) - - -def read_as_yaml(fqn): - """Read and return YAML content of 'fqn'.""" - try: - logging.debug('Begin read_as_yaml for {}.'.format(fqn)) - with open(fqn) as f: - data_map = yaml.safe_load(f) - logging.debug('End read_as_yaml.') - return data_map - except (yaml.scanner.ScannerError, FileNotFoundError) as e: - logging.error(e) - return None - - -def write_as_yaml(content, fqn): - """Write 'content' to 'fqn' as YAML.""" - try: - logging.debug('Begin write_as_yaml for {}.'.format(fqn)) - with open(fqn, 'w') as f: - yaml.dump(content, f, default_flow_style=False) - logging.debug('End write_as_yaml.') - except Exception as e: - logging.error(e) diff --git a/caom2pipe/caom2pipe/tests/data/C111107_0694_SCI.fits b/caom2pipe/caom2pipe/tests/data/C111107_0694_SCI.fits deleted file mode 100644 index 30d74d25..00000000 --- a/caom2pipe/caom2pipe/tests/data/C111107_0694_SCI.fits +++ /dev/null @@ -1 +0,0 @@ -test \ No newline at end of file diff --git a/caom2pipe/caom2pipe/tests/data/config.yml b/caom2pipe/caom2pipe/tests/data/config.yml deleted file mode 100644 index b7f6c90a..00000000 --- a/caom2pipe/caom2pipe/tests/data/config.yml +++ /dev/null @@ -1,11 +0,0 @@ -working_directory: /usr/src/app/omm2caom2/omm2caom2/tests/data -netrc_filename: .netrc -proxy_file_name: test_proxy.pem -resource_id: ivo://cadc.nrc.ca/sc2repo -todo_file_name: todo.txt -state_file_name: state.yml -use_local_files: False -logging_level: DEBUG -features: - supports_composite: False - run_in_airflow: True diff --git a/caom2pipe/caom2pipe/tests/data/fail.txt b/caom2pipe/caom2pipe/tests/data/fail.txt deleted file mode 100644 index a90f07ea..00000000 --- a/caom2pipe/caom2pipe/tests/data/fail.txt +++ /dev/null @@ -1 +0,0 @@ -2019-08-15 12:54:45.887223 test_obs_id test_obs_id.fits Invalid observation ID diff --git a/caom2pipe/caom2pipe/tests/data/good.txt b/caom2pipe/caom2pipe/tests/data/good.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/caom2pipe/caom2pipe/tests/data/retries.txt b/caom2pipe/caom2pipe/tests/data/retries.txt deleted file mode 100644 index 044d5e36..00000000 --- a/caom2pipe/caom2pipe/tests/data/retries.txt +++ /dev/null @@ -1 +0,0 @@ -retry_obs_id diff --git a/caom2pipe/caom2pipe/tests/data/retry.txt b/caom2pipe/caom2pipe/tests/data/retry.txt deleted file mode 100644 index 78b8b7d6..00000000 --- a/caom2pipe/caom2pipe/tests/data/retry.txt +++ /dev/null @@ -1 +0,0 @@ -test_obs_id diff --git a/caom2pipe/caom2pipe/tests/data/test_csv.csv b/caom2pipe/caom2pipe/tests/data/test_csv.csv deleted file mode 100644 index fe499a55..00000000 --- a/caom2pipe/caom2pipe/tests/data/test_csv.csv +++ /dev/null @@ -1,3 +0,0 @@ -# saved from url=(0106)https://archive.nrao.edu/archive/ArchiveQuery?PROTOCOL=TEXT-stream&QUERYTYPE=ARCHIVE&PROJECT_CODE=VLASS1.1 -# file_root logical_file lock_status project_code segment starttime stoptime filesize telescope:config obs_bands format calib raw_project_code arch_file_id thread_id = 6414 queryDateTime = 2018-Aug-08 19:13:31 -ngas_host=aocngas-master.aoc.nrao.edu:7777, VLASS1.1.sb34346984.eb34356943.58004.0160307176, public, VLASS1.1, x, 17-Sep-08 00:23:24, 17-Sep-08 04:11:11, 353899621, EVLA:B, X L S, SDMset, raw, VLASS1.1, 558163504, #### ngas_host=aocngas-master.aoc.nrao.edu:7777,VLASS1.1.sb34346984.eb34356943.58004.0160307176,VLASS1.1,x,xxxxxx,170908,1,all,SDMset,353.90GB diff --git a/caom2pipe/caom2pipe/tests/data/test_netrc b/caom2pipe/caom2pipe/tests/data/test_netrc deleted file mode 100644 index d8398ad1..00000000 --- a/caom2pipe/caom2pipe/tests/data/test_netrc +++ /dev/null @@ -1 +0,0 @@ -machine www.example.com login userid password userpass diff --git a/caom2pipe/caom2pipe/tests/data/test_obs_id.fits.xml b/caom2pipe/caom2pipe/tests/data/test_obs_id.fits.xml deleted file mode 100644 index c8a48065..00000000 --- a/caom2pipe/caom2pipe/tests/data/test_obs_id.fits.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - test_collection - test_obs_id - - exposure - - diff --git a/caom2pipe/caom2pipe/tests/data/test_state.yml b/caom2pipe/caom2pipe/tests/data/test_state.yml deleted file mode 100644 index 058c8cdb..00000000 --- a/caom2pipe/caom2pipe/tests/data/test_state.yml +++ /dev/null @@ -1,3 +0,0 @@ -bookmarks: - gemini_timestamp: - last_record: '2017-06-19T03:21:29.345417' diff --git a/caom2pipe/caom2pipe/tests/test_astro_composable.py b/caom2pipe/caom2pipe/tests/test_astro_composable.py deleted file mode 100644 index 0e17d62f..00000000 --- a/caom2pipe/caom2pipe/tests/test_astro_composable.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import math -import pytest -import sys - -from astropy.io import fits - -import six - -if six.PY3: - from caom2pipe import astro_composable as ac - -PY_VERSION = '3.6' - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_convert_time(): - hdr1 = fits.Header() - mjd_start, mjd_end = ac.find_time_bounds([hdr1]) - assert mjd_start is None - assert mjd_end is None - - hdr1['DATE-OBS'] = '2012-09-03T01:04:44' - hdr1['TEXP'] = 20.000 - mjd_start, mjd_end = ac.find_time_bounds([hdr1]) - assert mjd_start is not None - assert mjd_end is not None - assert math.isclose(mjd_start, 56173.044953703706), mjd_start - assert math.isclose(mjd_end, 56173.04518518518), mjd_end - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_get_datetime(): - result = ac.get_datetime('2006-12-12T12:12:12') - assert result is not None - assert result == '2006-12-12 12:12:12.000' - - result = ac.get_datetime('2006-12-12 12:12:12.001') - assert result is not None - assert result == '2006-12-12 12:12:12.001' - - result = ac.get_datetime('2006-12-12') - assert result is not None - assert result == '2006-12-12 00:00:00.000' - - # a format that is not understood - result = ac.get_datetime('16-Dec-12T01:23:45') - assert result is None - - result = ac.get_datetime(None) - assert result is None - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_get_location(): - x, y, z = ac.get_location(21.0, -32.0, 12) - assert x == 5051887.288718968, x - assert y == -3156769.536020791, y - assert z == 2271399.319625149, z - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_build_plane_time(): - start = ac.get_datetime('2012-09-03T01:04:44') - end = ac.get_datetime('2012-09-03T03:04:44') - exposure = end - start - result = ac.build_plane_time(start, end, exposure) - assert result is not None, 'expected a value' - assert result.bounds is not None, 'expected a bounds value' - assert result.exposure == 7199.999999999994, 'wrong exposure value' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_get_time_delta_in_s(): - result = ac.get_timedelta_in_s('0:06:41') - assert result is not None - assert result == 401, 'wrong value returned' diff --git a/caom2pipe/caom2pipe/tests/test_execute_composable.py b/caom2pipe/caom2pipe/tests/test_execute_composable.py deleted file mode 100644 index 101a9c43..00000000 --- a/caom2pipe/caom2pipe/tests/test_execute_composable.py +++ /dev/null @@ -1,1203 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import distutils.sysconfig -import os -import pytest -import sys - -import six - -from mock import Mock, patch - -from astropy.io import fits - -from caom2 import SimpleObservation, Algorithm -from caom2repo import CAOM2RepoClient -from cadcdata import CadcDataClient - - -if six.PY3: - from caom2pipe import CadcException - from caom2pipe import execute_composable as ec - from caom2pipe import manage_composable as mc - -PY_VERSION = '3.6' -THIS_DIR = os.path.dirname(os.path.realpath(__file__)) -TEST_DATA_DIR = os.path.join(THIS_DIR, 'data') -TEST_APP = 'collection2caom2' - - -class MyExitError(Exception): - pass - - -if six.PY3: - class TestVisit: - @staticmethod - def visit(observation, **kwargs): - x = kwargs['working_directory'] - assert x is not None, 'working directory' - y = kwargs['science_file'] - assert y is not None, 'science file' - z = kwargs['log_file_directory'] - assert z is not None, 'log file directory' - assert observation is not None, 'undefined observation' - - class TestStorageName(ec.StorageName): - def __init__(self, obs_id=None, file_name=None): - super(TestStorageName, self).__init__( - 'test_obs_id', 'TEST', '*', 'test_file.fits.gz') - self.url = 'https://test_url/' - - def is_valid(self): - return True - - class TestChooser(ec.OrganizeChooser): - def __init(self): - super(TestChooser, self).__init__() - - def needs_delete(self, observation): - return True - - def _init_config(): - test_config = mc.Config() - test_config.working_directory = THIS_DIR - test_config.collection = 'OMM' - test_config.netrc_file = os.path.join(TEST_DATA_DIR, 'test_netrc') - test_config.work_file = 'todo.txt' - test_config.logging_level = 'DEBUG' - test_config.log_file_directory = TEST_DATA_DIR - test_config.failure_fqn = '{}/fail.txt'.format(TEST_DATA_DIR) - test_config.retry_fqn = '{}/retry.txt'.format(TEST_DATA_DIR) - test_config.success_fqn = '{}/good.txt'.format(TEST_DATA_DIR) - test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' - test_config.features.run_in_airflow = False - test_config.features.use_file_names = False - test_config.stream = 'TEST' - return test_config - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_meta_create_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - mc.read_obs_from_file = Mock() - mc.read_obs_from_file.return_value = _read_obs(None) - - test_executor = ec.MetaCreateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, meta_visitors=None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --observation OMM test_obs_id ' - '--out {}/test_obs_id/test_obs_id.fits.xml --plugin {} ' - '--module {} --lineage ' - 'test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, test_source, test_source)) - assert repo_client_mock.create.is_called, 'create call missed' - assert test_executor.url == 'https://test_url/', 'url' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_meta_update_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - test_executor = ec.MetaUpdateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, _read_obs(None), - meta_visitors=None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --in {}/test_obs_id/test_obs_id.fits.xml ' - '--out {}/test_obs_id/test_obs_id.fits.xml --plugin {} ' - '--module {} --lineage ' - 'test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, test_source, test_source)) - assert repo_client_mock.update.is_called, 'update call missed' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_meta_delete_create_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - test_executor = ec.MetaDeleteCreateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, _read_obs(None), None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --in {}/test_obs_id/test_obs_id.fits.xml ' - '--out {}/test_obs_id/test_obs_id.fits.xml --plugin {} ' - '--module {} --lineage ' - 'test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, test_source, test_source)) - assert repo_client_mock.update.is_called, 'update call missed' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_local_meta_create_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - - test_executor = ec.LocalMetaCreateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, meta_visitors=None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --observation OMM test_obs_id ' - '--local {}/test_file.fits --out {}/test_obs_id.fits.xml ' - '--plugin {} --module {} ' - '--lineage test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, test_source, test_source)) - assert repo_client_mock.create.is_called, 'create call missed' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_local_meta_update_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - test_executor = ec.LocalMetaUpdateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, _read_obs(None), - meta_visitors=None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --in {}/test_obs_id.fits.xml ' - '--out {}/test_obs_id.fits.xml --local {}/test_file.fits ' - '--plugin {} --module {} ' - '--lineage test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, THIS_DIR, test_source, - test_source)) - assert repo_client_mock.update.is_called, 'update call missed' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_local_meta_delete_create_client_execute(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - repo_client_mock = Mock() - test_executor = ec.LocalMetaDeleteCreateClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, meta_visitors=None, - observation=_read_obs(None)) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - try: - test_executor.execute(None) - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --in {}/test_obs_id.fits.xml ' - '--out {}/test_obs_id.fits.xml --local {}/test_file.fits ' - '--plugin {} --module {} ' - '--lineage test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, THIS_DIR, test_source, - test_source)) - assert repo_client_mock.update.is_called, 'update call missed' - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_client_visit(): - test_config = _init_config() - test_cred = None - data_client_mock = Mock() - repo_client_mock = Mock() - - test_executor = ec.ClientVisit(test_config, - TestStorageName(), test_cred, - data_client_mock, - repo_client_mock, - meta_visitors=None) - - test_executor.execute(None) - assert repo_client_mock.read.is_called, 'read call missed' - assert repo_client_mock.update.is_called, 'update call missed' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_data_execute(): - test_obs_id = 'TEST_OBS_ID' - test_dir = os.path.join(THIS_DIR, test_obs_id) - test_fits_fqn = os.path.join(test_dir, - TestStorageName().file_name) - if not os.path.exists(test_dir): - os.mkdir(test_dir) - precondition = open(test_fits_fqn, 'w') - precondition.close() - - test_data_visitors = [TestVisit] - os_path_exists_orig = os.path.exists - os.path.exists = Mock(return_value=True) - os_listdir_orig = os.listdir - os.listdir = Mock(return_value=[]) - os_rmdir_orig = os.rmdir - os.rmdir = Mock() - test_config = _init_config() - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - repo_client_mock = Mock() - test_cred = None - - try: - ec.CaomExecute._data_cmd_info = Mock(side_effect=_get_fname) - - # run the test - test_executor = ec.DataClient( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, test_data_visitors, - mc.TaskType.MODIFY) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - # check that things worked as expected - assert data_client_mock.get_file_info.is_called, \ - 'get_file_info call missed' - assert data_client_mock.get_file.is_called, 'get_file call missed' - assert repo_client_mock.read.is_called, 'read call missed' - assert repo_client_mock.update.is_called, 'update call missed' - - finally: - os.path.exists = os_path_exists_orig - os.listdir = os_listdir_orig - os.rmdir = os_rmdir_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_data_local_execute(): - test_data_visitors = [TestVisit] - - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - repo_client_mock = Mock() - repo_client_mock.read.return_value = _read_obs(None) - test_cred = None - - test_config = _init_config() - # run the test - test_executor = ec.LocalDataClient( - test_config, TestStorageName(), TEST_APP, - test_cred, data_client_mock, repo_client_mock, test_data_visitors) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - # check that things worked as expected - no cleanup - assert data_client_mock.get_file_info.is_called, \ - 'get_file_info call missed' - assert data_client_mock.get_file.is_called, 'get_file call missed' - assert repo_client_mock.read.is_called, 'read call missed' - assert repo_client_mock.update.is_called, 'update call missed' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_data_store(): - test_config = _init_config() - data_client_mock = Mock() - repo_client_mock = Mock() - test_executor = ec.StoreClient( - test_config, TestStorageName(), 'command_name', '', data_client_mock, - repo_client_mock) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - # check that things worked as expected - no cleanup - assert data_client_mock.put_file.is_called, 'put_file call missed' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_scrape(): - # clean up from previous tests - if os.path.exists(TestStorageName().model_file_name): - os.remove(TestStorageName().model_file_name) - netrc = os.path.join(TEST_DATA_DIR, 'test_netrc') - assert os.path.exists(netrc) - - test_config = _init_config() - test_config.working_directory = TEST_DATA_DIR - test_config.logging_level = 'INFO' - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - test_source = '{}/command_name/command_name.py'.format( - distutils.sysconfig.get_python_lib()) - - try: - test_executor = ec.Scrape( - test_config, TestStorageName(), 'command_name') - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - 'command_name --verbose ' - '--observation OMM test_obs_id --out {}/test_obs_id.fits.xml ' - '--plugin {} ' - '--module {} ' - '--local {}/test_file.fits.gz ' - '--lineage test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_DATA_DIR, test_source, test_source, TEST_DATA_DIR)) - - finally: - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_data_scrape_execute(): - test_data_visitors = [TestVisit] - read_orig = mc.read_obs_from_file - mc.read_obs_from_file = Mock(side_effect=_read_obs) - try: - - test_config = _init_config() - - # run the test - test_executor = ec.DataScrape( - test_config, TestStorageName(), TEST_APP, - test_data_visitors) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - assert mc.read_obs_from_file.is_called, 'read obs call missed' - - finally: - mc.read_obs_from_file = read_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_organize_executes_client(): - test_obs_id = TestStorageName() - test_config = _init_config() - test_config.use_local_files = True - log_file_directory = os.path.join(THIS_DIR, 'logs') - test_config.log_file_directory = log_file_directory - success_log_file_name = 'success_log.txt' - test_config.success_log_file_name = success_log_file_name - failure_log_file_name = 'failure_log.txt' - test_config.failure_log_file_name = failure_log_file_name - test_config.features.use_clients = True - retry_file_name = 'retries.txt' - test_config.retry_file_name = retry_file_name - exec_cmd_orig = mc.exec_cmd_info - repo_cmd_orig = ec.CaomExecute.repo_cmd_get_client - CadcDataClient.__init__ = Mock(return_value=None) - CAOM2RepoClient.__init__ = Mock(return_value=None) - - try: - ec.CaomExecute.repo_cmd_get_client = Mock(return_value=None) - mc.exec_cmd_info = \ - Mock(return_value='INFO:cadc-data:info\n' - 'File C170324_0054_SCI_prev.jpg:\n' - ' archive: OMM\n' - ' encoding: None\n' - ' lastmod: Mon, 25 Jun 2018 16:52:07 GMT\n' - ' md5sum: f37d21c53055498d1b5cb7753e1c6d6f\n' - ' name: C120902_sh2-132_J_old_' - 'SCIRED.fits.gz\n' - ' size: 754408\n' - ' type: image/jpeg\n' - ' umd5sum: 704b494a972eed30b18b817e243ced7d\n' - ' usize: 754408\n'.encode('utf-8')) - - test_config.task_types = [mc.TaskType.SCRAPE] - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], ec.Scrape) - - test_config.task_types = [mc.TaskType.STORE, - mc.TaskType.INGEST, - mc.TaskType.MODIFY] - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 4 - assert isinstance(executors[0], ec.StoreClient), \ - type(executors[0]) - assert isinstance(executors[1], - ec.LocalMetaCreateClient) - assert isinstance(executors[2], ec.LocalDataClient) - assert isinstance( - executors[3], ec.CompareChecksumClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.use_local_files = False - test_config.task_types = [mc.TaskType.INGEST, - mc.TaskType.MODIFY] - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 2 - assert isinstance(executors[0], ec.MetaCreateClient) - assert isinstance(executors[1], ec.DataClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.use_local_files = True - test_config.task_types = [mc.TaskType.INGEST, - mc.TaskType.MODIFY] - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 3 - assert isinstance( - executors[0], ec.LocalMetaCreateClient) - assert isinstance(executors[1], ec.LocalDataClient) - assert isinstance( - executors[2], ec.CompareChecksumClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.task_types = [mc.TaskType.SCRAPE, - mc.TaskType.MODIFY] - test_config.use_local_files = True - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 2 - assert isinstance(executors[0], ec.Scrape) - assert isinstance(executors[1], ec.DataScrape) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.task_types = [mc.TaskType.REMOTE] - test_config.use_local_files = True - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], ec.LocalMetaCreateClientRemoteStorage) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.task_types = [mc.TaskType.INGEST] - test_config.use_local_files = False - test_chooser = TestChooser() - ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) - test_oe = ec.OrganizeExecutes(test_config, test_chooser) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], - ec.MetaDeleteCreateClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - test_config.task_types = [mc.TaskType.PULL] - test_config.use_local_files = False - test_chooser = TestChooser() - ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) - test_oe = ec.OrganizeExecutes(test_config, test_chooser, - '/tmp/todo.txt') - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], ec.PullClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - assert executors[0].url == 'https://test_url/', 'url' - assert executors[0].fname == 'test_obs_id.fits', 'file name' - assert executors[0].stream == 'TEST', 'stream' - assert executors[0].working_dir == '{}/test_obs_id'.format(THIS_DIR), \ - 'working_dir' - assert executors[0].local_fqn == \ - '{}/test_obs_id/test_obs_id.fits'.format(THIS_DIR), 'local_fqn' - assert test_oe.success_fqn == \ - '{}/logs/todo_success_log.txt'.format(THIS_DIR), 'wrong success' - assert test_oe.retry_fqn == \ - '{}/logs/todo_retries.txt'.format(THIS_DIR), 'wrong retry' - assert test_oe.failure_fqn == \ - '{}/logs/todo_failure_log.txt'.format(THIS_DIR), 'wrong failure' - assert test_oe.todo_fqn == '/tmp/todo.txt', 'wrong todo' - finally: - mc.exec_cmd_orig = exec_cmd_orig - ec.CaomExecute.repo_cmd_get_client = repo_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_organize_executes_chooser(): - test_obs_id = TestStorageName() - test_config = _init_config() - test_config.use_local_files = True - log_file_directory = os.path.join(THIS_DIR, 'logs') - test_config.log_file_directory = log_file_directory - test_config.features.supports_composite = True - exec_cmd_orig = mc.exec_cmd_info - repo_cmd_orig = ec.CaomExecute.repo_cmd_get_client - CadcDataClient.__init__ = Mock(return_value=None) - CAOM2RepoClient.__init__ = Mock(return_value=None) - - try: - ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) - mc.exec_cmd_info = \ - Mock(return_value='INFO:cadc-data:info\n' - 'File C170324_0054_SCI_prev.jpg:\n' - ' archive: OMM\n' - ' encoding: None\n' - ' lastmod: Mon, 25 Jun 2018 16:52:07 GMT\n' - ' md5sum: f37d21c53055498d1b5cb7753e1c6d6f\n' - ' name: C120902_sh2-132_J_old_' - 'SCIRED.fits.gz\n' - ' size: 754408\n' - ' type: image/jpeg\n' - ' umd5sum: 704b494a972eed30b18b817e243ced7d\n' - ' usize: 754408\n'.encode('utf-8')) - - test_config.task_types = [mc.TaskType.INGEST] - test_chooser = TestChooser() - test_oe = ec.OrganizeExecutes(test_config, test_chooser) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 2 - assert isinstance(executors[0], - ec.LocalMetaDeleteCreateClient) - assert executors[0].fname == 'test_obs_id.fits', 'file name' - assert executors[0].stream == 'TEST', 'stream' - assert executors[0].working_dir == THIS_DIR, 'working_dir' - assert isinstance(executors[1], - ec.CompareChecksumClient) - - test_config.use_local_files = False - test_config.task_types = [mc.TaskType.INGEST] - test_oe = ec.OrganizeExecutes(test_config, test_chooser) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], - ec.MetaDeleteCreateClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - finally: - mc.exec_cmd_orig = exec_cmd_orig - ec.CaomExecute.repo_cmd_get_client = repo_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_organize_executes_client_existing(): - test_obs_id = TestStorageName() - test_config = _init_config() - test_config.features.use_clients = True - repo_cmd_orig = ec.CaomExecute.repo_cmd_get_client - CadcDataClient.__init__ = Mock(return_value=None) - CAOM2RepoClient.__init__ = Mock(return_value=None) - try: - - ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) - - test_config.task_types = [mc.TaskType.INGEST] - test_config.use_local_files = False - test_oe = ec.OrganizeExecutes(test_config) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], ec.MetaUpdateClient) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - finally: - ec.CaomExecute.repo_cmd_get_client = repo_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_organize_executes_client_visit(): - test_obs_id = TestStorageName() - test_config = _init_config() - test_config.features.use_clients = True - test_config.task_types = [mc.TaskType.VISIT] - test_config.use_local_files = False - test_oe = ec.OrganizeExecutes(test_config) - CadcDataClient.__init__ = Mock(return_value=None) - CAOM2RepoClient.__init__ = Mock(return_value=None) - executors = test_oe.choose(test_obs_id, 'command_name', [], []) - assert executors is not None - assert len(executors) == 1 - assert isinstance(executors[0], ec.ClientVisit) - assert CadcDataClient.__init__.is_called, 'mock not called' - assert CAOM2RepoClient.__init__.is_called, 'mock not called' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_checksum_client(): - test_config = _init_config() - test_executor = ec.CompareChecksumClient( - test_config, TestStorageName(), 'test2caom2', None, None, None) - compare_orig = mc.compare_checksum_client - - try: - mc.compare_checksum_client = Mock() - test_executor.execute(None) - assert mc.compare_checksum_client.called - assert test_executor.fname == 'test_file.fits.gz', 'fname' - assert test_executor.working_dir == THIS_DIR, 'working dir' - assert test_executor.model_fqn == os.path.join( - THIS_DIR, 'test_obs_id.fits.xml'), 'model fqn' - assert test_executor.url == 'https://test_url/', 'url' - finally: - mc.compare_checksum_client = compare_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_capture_failure(): - test_obs_id = 'test_obs_id' - test_config = _init_config() - log_file_directory = os.path.join(THIS_DIR, 'logs') - test_config.log_to_file = True - test_config.log_file_directory = log_file_directory - success_log_file_name = 'success_log.txt' - test_config.success_log_file_name = success_log_file_name - failure_log_file_name = 'failure_log.txt' - test_config.failure_log_file_name = failure_log_file_name - retry_file_name = 'retries.txt' - test_config.retry_file_name = retry_file_name - - if not os.path.exists(log_file_directory): - os.mkdir(log_file_directory) - if os.path.exists(test_config.success_fqn): - os.remove(test_config.success_fqn) - if os.path.exists(test_config.failure_fqn): - os.remove(test_config.failure_fqn) - if os.path.exists(test_config.retry_fqn): - os.remove(test_config.retry_fqn) - - # clean up from last execution - - test_oe = ec.OrganizeExecutes(test_config) - test_oe.capture_failure(test_obs_id, None, 'exception text') - test_oe.capture_success(test_obs_id, 'C121212_01234_CAL.fits.gz') - - assert os.path.exists(test_config.success_fqn) - assert os.path.exists(test_config.failure_fqn) - assert os.path.exists(test_config.retry_fqn) - - success_content = open(test_config.success_fqn).read() - assert success_content.endswith('test_obs_id C121212_01234_CAL.fits.gz\n') - retry_content = open(test_config.retry_fqn).read() - assert retry_content == 'test_obs_id\n' - failure_content = open(test_config.failure_fqn).read() - assert failure_content.endswith('test_obs_id None exception text\n') - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_run_by_file(): - try: - os.getcwd = Mock(return_value=TEST_DATA_DIR) - todo_file = os.path.join(os.getcwd(), 'todo.txt') - f = open(todo_file, 'w') - f.write('') - f.close() - ec.run_by_file(ec.StorageName, TEST_APP, 'collection', - proxy=None, meta_visitors=None, data_visitors=None) - except mc.CadcException as e: - assert False, 'but the work list is empty {}'.format(e) - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_run_by_file_expects_retry(): - retry_dir = '{}_0'.format(TEST_DATA_DIR) - if os.path.exists(retry_dir): - f_log = '{}/failure_log.txt'.format(retry_dir) - if os.path.exists(f_log): - os.remove(f_log) - r_txt = '{}/retries.txt'.format(retry_dir) - if os.path.exists(r_txt): - os.remove(r_txt) - s_log = '{}/success_log.txt'.format(retry_dir) - if os.path.exists(s_log): - os.remove(s_log) - t_log = '{}/test_obs_id.log'.format(retry_dir) - if os.path.exists(t_log): - os.remove(t_log) - os.rmdir(retry_dir) - - test_config = _init_config() - test_config.log_to_file = True - test_config.features.expects_retry = True - test_config.retry_failures = True - test_config.retry_count = 1 - test_config.retry_file_name = 'retries.txt' - test_config.success_log_file_name = 'success_log.txt' - test_config.failure_log_file_name = 'failure_log.txt' - test_retry_count = 0 - test_config.task_types = [] - assert test_config.log_file_directory == TEST_DATA_DIR - assert test_config.work_file == 'todo.txt' - - assert test_config.need_to_retry(), 'should require retries' - - test_config.update_for_retry(test_retry_count) - assert test_config.log_file_directory == '{}_{}'.format(TEST_DATA_DIR, - test_retry_count) - assert test_config.work_file == 'retries.txt' - assert test_config.work_fqn == os.path.join(TEST_DATA_DIR, 'retries.txt') - try: - ec._run_by_file(test_config, TestStorageName, TEST_APP, - proxy=None, meta_visitors=[], data_visitors=[]) - except mc.CadcException as e: - assert False, 'but the work list is empty {}'.format(e) - - if TEST_DATA_DIR.startswith('/usr/src/app'): - # these checks fail on travis .... - assert os.path.exists('{}_0'.format(TEST_DATA_DIR)) - assert os.path.exists(test_config.success_fqn) - assert os.path.exists(test_config.failure_fqn) - assert os.path.exists(test_config.retry_fqn) - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_do_one(): - test_config = _init_config() - test_config.task_types = [] - test_organizer = ec.OrganizeExecutes(test_config) - # no client - test_result = ec._do_one(config=test_config, organizer=test_organizer, - storage_name=TestStorageName(), - command_name='test2caom2', - meta_visitors=[], data_visitors=[]) - assert test_result is not None - assert test_result == -1 - - # client - test_config.features.use_clients = True - test_result = ec._do_one(config=test_config, organizer=test_organizer, - storage_name=TestStorageName(), - command_name='test2caom2', - meta_visitors=[], data_visitors=[]) - assert test_result is not None - assert test_result == -1 - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_storage_name(): - sn = ec.StorageName(obs_id='test_obs_id', collection='TEST', - collection_pattern='T[\\w+-]+') - assert sn.file_uri == 'ad:TEST/test_obs_id.fits.gz' - assert sn.file_name == 'test_obs_id.fits' - assert sn.compressed_file_name == 'test_obs_id.fits.gz' - assert sn.model_file_name == 'test_obs_id.fits.xml' - assert sn.prev == 'test_obs_id_prev.jpg' - assert sn.thumb == 'test_obs_id_prev_256.jpg' - assert sn.prev_uri == 'ad:TEST/test_obs_id_prev.jpg' - assert sn.thumb_uri == 'ad:TEST/test_obs_id_prev_256.jpg' - assert sn.obs_id == 'test_obs_id' - assert sn.log_file == 'test_obs_id.log' - assert sn.product_id == 'test_obs_id' - assert sn.fname_on_disk is None - assert not sn.is_valid() - sn = ec.StorageName(obs_id='Test_obs_id', collection='TEST', - collection_pattern='T[\\w+-]+') - assert sn.is_valid() - x = ec.StorageName.remove_extensions('test_obs_id.fits.header.gz') - assert x == 'test_obs_id' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_caom_name(): - cn = ec.CaomName(uri='ad:TEST/test_obs_id.fits.gz') - assert cn.file_id == 'test_obs_id' - assert cn.file_name == 'test_obs_id.fits.gz' - assert cn.uncomp_file_name == 'test_obs_id.fits' - assert ec.CaomName.make_obs_uri_from_obs_id('TEST', 'test_obs_id') == \ - 'caom:TEST/test_obs_id' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_local_meta_create_client_remote_storage_execute(): - os_path_exists_orig = os.path.exists - os.path.exists = Mock(return_value=True) - os_listdir_orig = os.listdir - os.listdir = Mock(return_value=[]) - os_rmdir_orig = os.rmdir - os.rmdir = Mock() - test_config = _init_config() - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - repo_client_mock = Mock() - test_cred = None - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - mc.read_obs_from_file = Mock() - mc.read_obs_from_file.return_value = _read_obs(None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - test_local = '{}/test_obs_id.fits'.format(THIS_DIR) - - try: - ec.CaomExecute._data_cmd_info = Mock(side_effect=_get_fname) - - # run the test - test_executor = ec.LocalMetaCreateClientRemoteStorage( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, None) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - # check that things worked as expected - assert repo_client_mock.create.is_called, 'create call missed' - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --observation OMM test_obs_id --local {} ' - '--out {}/test_obs_id.fits.xml --plugin {} ' - '--module {} --lineage ' - 'test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, test_local, THIS_DIR, test_source, test_source)) - finally: - os.path.exists = os_path_exists_orig - os.listdir = os_listdir_orig - os.rmdir = os_rmdir_orig - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_local_meta_update_client_remote_storage_execute(): - os_path_exists_orig = os.path.exists - os.path.exists = Mock(return_value=True) - os_listdir_orig = os.listdir - os.listdir = Mock(return_value=[]) - os_rmdir_orig = os.rmdir - os.rmdir = Mock() - test_config = _init_config() - data_client_mock = Mock() - data_client_mock.get_file_info.return_value = {'name': 'test_file.fits'} - repo_client_mock = Mock() - test_cred = None - exec_cmd_orig = mc.exec_cmd - mc.exec_cmd = Mock() - mc.read_obs_from_file = Mock() - mc.read_obs_from_file.return_value = _read_obs(None) - test_source = '{}/{}/{}.py'.format(distutils.sysconfig.get_python_lib(), - TEST_APP, TEST_APP) - test_local = '{}/test_obs_id.fits'.format(THIS_DIR) - - try: - ec.CaomExecute._data_cmd_info = Mock(side_effect=_get_fname) - # run the test - test_executor = ec.LocalMetaUpdateClientRemoteStorage( - test_config, TestStorageName(), TEST_APP, test_cred, - data_client_mock, repo_client_mock, _read_obs(None), None) - try: - test_executor.execute(None) - except CadcException as e: - assert False, e - - # check that things worked as expected - assert repo_client_mock.read.is_called, 'read call missed' - assert repo_client_mock.update.is_called, 'update call missed' - assert mc.exec_cmd.called - mc.exec_cmd.assert_called_with( - '{} --debug None --in {}/test_obs_id.fits.xml ' - '--out {}/test_obs_id.fits.xml --local {} --plugin {} ' - '--module {} --lineage ' - 'test_obs_id/ad:TEST/test_obs_id.fits.gz'.format( - TEST_APP, THIS_DIR, THIS_DIR, test_local, test_source, - test_source)) - finally: - os.path.exists = os_path_exists_orig - os.listdir = os_listdir_orig - os.rmdir = os_rmdir_orig - mc.exec_cmd = exec_cmd_orig - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_omm_name_dots(): - TEST_NAME = 'C121121_J024345.57-021326.4_K_SCIRED' - TEST_URI = 'ad:OMM/{}.fits.gz'.format(TEST_NAME) - test_file_id = ec.CaomName(TEST_URI).file_id - assert TEST_NAME == test_file_id, 'dots messing with things' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_pull_client(): - # Response mock - class Object(object): - pass - - def raise_for_status(self): - pass - - def iter_content(self, chunk_size): - return ['aaa'.encode(), 'bbb'.encode()] - - def __enter__(self): - return self - - def __exit__(self, a, b, c): - return None - - test_config = _init_config() - data_client_mock = Mock() - repo_client_mock = Mock() - test_sn = TestStorageName() - test_sn.url = 'file://{}/{}'.format(TEST_DATA_DIR, 'C111107_0694_SCI.fits') - test_sn.fname_on_disk = '{}/{}'.format(TEST_DATA_DIR, 'x.fits') - ec.CaomExecute._cleanup = Mock() - with patch('requests.get') as get_mock: - get_mock.return_value = Object() - test_executor = ec.PullClient(test_config, test_sn, TEST_APP, None, - data_client_mock, repo_client_mock) - with pytest.raises(OSError): - test_executor.execute(None) - assert data_client_mock.put_file.is_called, 'call missed' - assert ec.CaomExecute._cleanup.is_called, 'cleanup call missed' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_choose_exceptions(): - test_config = _init_config() - test_config.init_local_files = False - test_config.task_types = [mc.TaskType.SCRAPE] - with pytest.raises(mc.CadcException): - test_organizer = ec.OrganizeExecutes(test_config) - test_organizer.choose(TestStorageName(), 'command name', [], []) - - test_config.task_types = [mc.TaskType.STORE] - with pytest.raises(mc.CadcException): - test_organizer = ec.OrganizeExecutes(test_config) - test_organizer.choose(TestStorageName(), 'command name', [], []) - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('sys.exit', Mock(side_effect=MyExitError)) -def test_storage_name_failure(): - class TestStorageNameFails(TestStorageName): - - def __init__(self): - super(TestStorageNameFails, self).__init__() - - def is_valid(self): - return False - test_config = _init_config() - test_config.log_to_file = True - good_start = os.path.getmtime(test_config.success_fqn) - fail_start = os.path.getmtime(test_config.failure_fqn) - retry_start = os.path.getmtime(test_config.retry_fqn) - test_organizer = ec.OrganizeExecutes(test_config) - test_organizer.choose(TestStorageNameFails(), 'command name', [], []) - good_end = os.path.getmtime(test_config.success_fqn) - fail_end = os.path.getmtime(test_config.failure_fqn) - retry_end = os.path.getmtime(test_config.retry_fqn) - assert good_end > good_start, 'good' - assert retry_end > retry_start, 'retry' - assert fail_end > fail_start, 'failure' - - -def _communicate(): - return ['return status', None] - - -def _get_headers(uri, subject): - x = """SIMPLE = T / Written by IDL: Fri Oct 6 01:48:35 2017 -BITPIX = -32 / Bits per pixel -NAXIS = 2 / Number of dimensions -NAXIS1 = 2048 / -NAXIS2 = 2048 / -DATATYPE= 'REDUC ' /Data type, SCIENCE/CALIB/REJECT/FOCUS/TEST -END -""" - delim = '\nEND' - extensions = \ - [e + delim for e in x.split(delim) if e.strip()] - headers = [fits.Header.fromstring(e, sep='\n') for e in extensions] - return headers - - -def _get_test_metadata(subject, path): - return {'size': 37, - 'md5sum': 'e330482de75d5c4c88ce6f6ef99035ea', - 'type': 'applicaton/octect-stream'} - - -def _get_test_file_meta(path): - return _get_test_metadata(None, None) - - -def _read_obs(arg1): - return SimpleObservation(collection='test_collection', - observation_id='test_obs_id', - algorithm=Algorithm(str('exposure'))) - - -def _get_file_headers(fname): - return _get_headers(None, None) - - -def _get_fname(): - return 'TBD' - - -def _test_map_todo(): - """For a mock.""" - return '' - - -def _get_file_info(): - return {'fname': 'test_file.fits'} diff --git a/caom2pipe/caom2pipe/tests/test_manage_composable.py b/caom2pipe/caom2pipe/tests/test_manage_composable.py deleted file mode 100644 index 3f474023..00000000 --- a/caom2pipe/caom2pipe/tests/test_manage_composable.py +++ /dev/null @@ -1,353 +0,0 @@ -# -*- coding: utf-8 -*- -# *********************************************************************** -# ****************** CANADIAN ASTRONOMY DATA CENTRE ******************* -# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** -# -# (c) 2018. (c) 2018. -# Government of Canada Gouvernement du Canada -# National Research Council Conseil national de recherches -# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 -# All rights reserved Tous droits réservés -# -# NRC disclaims any warranties, Le CNRC dénie toute garantie -# expressed, implied, or énoncée, implicite ou légale, -# statutory, of any kind with de quelque nature que ce -# respect to the software, soit, concernant le logiciel, -# including without limitation y compris sans restriction -# any warranty of merchantability toute garantie de valeur -# or fitness for a particular marchande ou de pertinence -# purpose. NRC shall not be pour un usage particulier. -# liable in any event for any Le CNRC ne pourra en aucun cas -# damages, whether direct or être tenu responsable de tout -# indirect, special or general, dommage, direct ou indirect, -# consequential or incidental, particulier ou général, -# arising from the use of the accessoire ou fortuit, résultant -# software. Neither the name de l'utilisation du logiciel. Ni -# of the National Research le nom du Conseil National de -# Council of Canada nor the Recherches du Canada ni les noms -# names of its contributors may de ses participants ne peuvent -# be used to endorse or promote être utilisés pour approuver ou -# products derived from this promouvoir les produits dérivés -# software without specific prior de ce logiciel sans autorisation -# written permission. préalable et particulière -# par écrit. -# -# This file is part of the Ce fichier fait partie du projet -# OpenCADC project. OpenCADC. -# -# OpenCADC is free software: OpenCADC est un logiciel libre ; -# you can redistribute it and/or vous pouvez le redistribuer ou le -# modify it under the terms of modifier suivant les termes de -# the GNU Affero General Public la “GNU Affero General Public -# License as published by the License” telle que publiée -# Free Software Foundation, par la Free Software Foundation -# either version 3 of the : soit la version 3 de cette -# License, or (at your option) licence, soit (à votre gré) -# any later version. toute version ultérieure. -# -# OpenCADC is distributed in the OpenCADC est distribué -# hope that it will be useful, dans l’espoir qu’il vous -# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE -# without even the implied GARANTIE : sans même la garantie -# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ -# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF -# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence -# General Public License for Générale Publique GNU Affero -# more details. pour plus de détails. -# -# You should have received Vous devriez avoir reçu une -# a copy of the GNU Affero copie de la Licence Générale -# General Public License along Publique GNU Affero avec -# with OpenCADC. If not, see OpenCADC ; si ce n’est -# . pas le cas, consultez : -# . -# -# $Revision: 4 $ -# -# *********************************************************************** -# - -import os -import pytest -import sys - -import six - -from mock import Mock, patch - -from caom2 import ProductType, ReleaseType, Artifact, ChecksumURI -from caom2 import SimpleObservation -if six.PY3: - from caom2pipe import manage_composable as mc - - -PY_VERSION = '3.6' -THIS_DIR = os.path.dirname(os.path.realpath(__file__)) -TEST_DATA_DIR = os.path.join(THIS_DIR, 'data') -TEST_STATE_FILE = os.path.join(TEST_DATA_DIR, 'test_state.yml') -TEST_OBS_FILE = os.path.join(TEST_DATA_DIR, 'test_obs_id.fits.xml') -ISO8601_FORMAT = '%Y-%m-%dT%H:%M:%S.%f' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_read_obs(): - test_subject = mc.read_obs_from_file(TEST_OBS_FILE) - assert test_subject is not None, 'expect a result' - assert isinstance(test_subject, SimpleObservation), 'wrong read' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_read_from_file(): - test_subject = mc.read_from_file(TEST_OBS_FILE) - assert test_subject is not None, 'expect a result' - assert isinstance(test_subject, list), 'wrong type of result' - assert len(test_subject) == 8, 'missed some content' - assert test_subject[0].startswith(' 0 - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('caom2utils.fits2caom2.CadcDataClient.get_file_info') -def test_compare_checksum(mock_get_file_info): - - # fail case - file doesn't exist - test_file = os.path.join(TEST_DATA_DIR, 'test_omm.fits.gz') - test_netrc = os.path.join(TEST_DATA_DIR, 'test_netrc') - with pytest.raises(mc.CadcException): - mc.compare_checksum(test_netrc, 'OMM', test_file) - - # fail case - file exists, different checksum - make a small test file - test_file = os.path.join(TEST_DATA_DIR, 'C111107_0694_SCI.fits') - f = open(test_file, 'w') - f.write('test') - f.close() - with pytest.raises(mc.CadcException): - mc.compare_checksum(test_netrc, 'OMM', test_file) - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_decompose_lineage(): - test_product_id = 'product_id' - test_uri = 'ad:STARS/galaxies.fits.gz' - test_lineage = '{}/{}'.format(test_product_id, test_uri) - actual_product_id, actual_uri = mc.decompose_lineage(test_lineage) - assert actual_product_id == test_product_id, 'expected {}'.format( - test_product_id) - assert actual_uri == test_uri, 'expected {}'.format(test_uri) - - with pytest.raises(mc.CadcException): - mc.decompose_lineage('') - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_read_csv_file(): - # bad read - with pytest.raises(mc.CadcException): - mc.read_csv_file(None) - - # good read - test_file_name = os.path.join(TEST_DATA_DIR, 'test_csv.csv') - content = mc.read_csv_file(test_file_name) - assert content is not None, 'empty results returned' - assert len(content) == 1, 'missed the comment and the header' - assert len(content[0]) == 24, 'missed the content' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_get_file_meta(): - # None - with pytest.raises(mc.CadcException): - mc.get_file_meta(None) - - # non-existent file - fqn = os.path.join(TEST_DATA_DIR, 'abc.txt') - with pytest.raises(mc.CadcException): - mc.get_file_meta(fqn) - - # empty file - fqn = os.path.join(TEST_DATA_DIR, 'todo.txt') - result = mc.get_file_meta(fqn) - assert result['size'] == 0, result['size'] - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('cadcdata.core.net.BaseWsClient') -def test_read_file_list_from_archive(basews_mock): - - response = Mock() - response.status_code.return_value = 200 - basews_mock.return_value.get.return_value = response - test_config = mc.Config() - result = mc.read_file_list_from_archive(test_config, 'test_app_name', - '2018-11-18T22:39:56.186443+00:00', - '2018-11-19T22:39:56.186443+00:00') - assert result is not None - assert type(result) is list - assert len(result) == 0 - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_write_to_file(): - content = ['a.txt', 'b.jpg', 'c.fits.gz'] - test_fqn = '{}/test_out.txt'.format(TEST_DATA_DIR) - if os.path.exists(test_fqn): - os.remove(test_fqn) - - mc.write_to_file(test_fqn, '\n'.join(content)) - assert os.path.exists(test_fqn) - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support 3.6 only') -def test_get_lineage(): - result = mc.get_lineage('TEST_COLLECTION', 'TEST_PRODUCT_ID', - 'TEST_FILE_NAME.fits') - assert result == 'TEST_PRODUCT_ID/ad:TEST_COLLECTION/TEST_FILE_NAME.fits' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_get_artifact_metadata(): - test_fqn = os.path.join(TEST_DATA_DIR, 'config.yml') - test_uri = 'ad:TEST/config.yml' - - # wrong command line parameters - with pytest.raises(mc.CadcException): - mc.get_artifact_metadata(test_fqn, ProductType.WEIGHT, - ReleaseType.META) - - # create action - result = mc.get_artifact_metadata(test_fqn, ProductType.WEIGHT, - ReleaseType.META, uri=test_uri) - assert result is not None, 'expect a result' - assert isinstance(result, Artifact), 'expect an artifact' - assert result.product_type == ProductType.WEIGHT, 'wrong product type' - assert result.content_length == 314, 'wrong length' - assert result.content_checksum.uri == \ - 'md5:a75377d8d7cc55464944947c01cef816', 'wrong checksum' - - # update action - result.content_checksum = ChecksumURI('md5:abc') - result = mc.get_artifact_metadata(test_fqn, ProductType.WEIGHT, - ReleaseType.META, artifact=result) - assert result is not None, 'expect a result' - assert isinstance(result, Artifact), 'expect an artifact' - assert result.content_checksum.uri == \ - 'md5:a75377d8d7cc55464944947c01cef816', 'wrong checksum' - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('cadcdata.core.CadcDataClient') -def test_data_put(mock_client): - with pytest.raises(mc.CadcException): - mc.data_put(mock_client, TEST_DATA_DIR, 'TEST.fits', 'TEST', 'default') - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -@patch('cadcdata.core.CadcDataClient') -def test_data_get(mock_client): - with pytest.raises(mc.CadcException): - mc.data_get(mock_client, TEST_DATA_DIR, 'TEST.fits', 'TEST') - - -@pytest.mark.skipif(not sys.version.startswith(PY_VERSION), - reason='support one python version') -def test_state(): - test_start = os.path.getmtime(TEST_STATE_FILE) - with pytest.raises(mc.CadcException): - test_subject = mc.State('nonexistent') - - test_subject = mc.State(TEST_STATE_FILE) - assert test_subject is not None, 'expect result' - test_result = test_subject.get_bookmark('gemini_timestamp') - assert test_result is not None, 'expect content' - assert test_result == '2017-06-19T03:21:29.345417' - - test_subject.save_state('gemini_timestamp', test_result) - test_end = os.path.getmtime(TEST_STATE_FILE) - assert test_start != test_end, 'file should be modified' diff --git a/caom2pipe/caom2pipe/tests/test_netrc b/caom2pipe/caom2pipe/tests/test_netrc deleted file mode 100644 index d8398ad1..00000000 --- a/caom2pipe/caom2pipe/tests/test_netrc +++ /dev/null @@ -1 +0,0 @@ -machine www.example.com login userid password userpass diff --git a/caom2pipe/dev_requirements.txt b/caom2pipe/dev_requirements.txt deleted file mode 100644 index 905bee93..00000000 --- a/caom2pipe/dev_requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ --e ../caom2 --e . -pytest>=3.6 -pytest-cov>=2.5.1 -flake8>=3.4.1 -funcsigs==1.0.2 -mock==2.0.0 -xml-compare==1.0.5 -PyYAML>=3.13 diff --git a/caom2pipe/setup.cfg b/caom2pipe/setup.cfg deleted file mode 100644 index 782930f1..00000000 --- a/caom2pipe/setup.cfg +++ /dev/null @@ -1,40 +0,0 @@ -[build_sphinx] -source-dir = docs -build-dir = docs/_build -all_files = 1 - -[build_docs] -source-dir = docs -build-dir = docs/_build -all_files = 1 - -[upload_docs] -upload-dir = docs/_build/html -show-response = 1 - -[aliases] -test = pytest - -[coverage:run] -omit = */tests/* - -[tool:pytest] -norecursedirs = build docs/_build -doctest_plus = enabled -testpaths = caom2pipe - -[metadata] -package_name = caom2pipe -description = CAOM-2.3 Pipeline Components -long_description = Bits to put together to create CAOM2 instances. -author = Canadian Astronomy Data Centre -author_email = cadc@nrc-cnrc.gc.ca -license = AGPLv3 -url = http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 -edit_on_github = False -github_project = opencadc/caom2tools -install_requires = cadcdata caom2utils>=1.4.2 caom2repo vos -# version should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) -version = 0.4 - -[entry_points] diff --git a/caom2pipe/setup.py b/caom2pipe/setup.py deleted file mode 100755 index f08845d3..00000000 --- a/caom2pipe/setup.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python -# Licensed under a 3-clause BSD style license - see LICENSE.rst - -import glob -import os -import sys -import imp -from setuptools.command.test import test as TestCommand -from setuptools import find_packages - -from setuptools import setup - -import distutils.cmd -import distutils.log -import subprocess - -# read the README.md file and return as string. -def readme(): - with open('README.md') as r_obj: - return r_obj.read() - -# Get some values from the setup.cfg -try: - from ConfigParser import ConfigParser -except ImportError: - from configparser import ConfigParser - -conf = ConfigParser() -conf.optionxform=str -conf.read(['setup.cfg']) -metadata = dict(conf.items('metadata')) - -PACKAGENAME = metadata.get('package_name', 'packagename') -DESCRIPTION = metadata.get('description', 'CADC package') -AUTHOR = metadata.get('author', 'CADC') -AUTHOR_EMAIL = metadata.get('author_email', 'cadc@nrc.gc.ca') -LICENSE = metadata.get('license', 'unknown') -URL = metadata.get('url', 'http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca') - -# VERSION should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) -VERSION = metadata.get('version', 'none') - -# generate the version file -with open(os.path.join(PACKAGENAME, 'version.py'), 'w') as f: - f.write('version = \'{}\'\n'.format(VERSION)) - -# Treat everything in scripts except README.md as a script to be installed -scripts = [fname for fname in glob.glob(os.path.join('scripts', '*')) - if os.path.basename(fname) != 'README.md'] - -# Define entry points for command-line scripts -entry_points = {'console_scripts': []} - -entry_point_list = conf.items('entry_points') -for entry_point in entry_point_list: - entry_points['console_scripts'].append('{0} = {1}'.format(entry_point[0], - entry_point[1])) - -# add the --cov option to the test command -class PyTest(TestCommand): - """class py.test for the testing - - """ - user_options = [] - - def __init__(self, dist, **kw): - TestCommand.__init__(self, dist, **kw) - self.pytest_args = ['--cov', PACKAGENAME] - - def run_tests(self): - # import here, cause outside the eggs aren't loaded - import pytest - err_no = pytest.main(self.pytest_args) - sys.exit(err_no) - -class IntTestCommand(distutils.cmd.Command): - """A custom command to run integration tests.""" - - description = 'Integration tests' - user_options = [] - - def initialize_options(self): - """Set default values for options.""" - # Each user option must be listed here with their default value. - - def finalize_options(self): - """Post-process options.""" - - def run(self): - """Run command.""" - import pytest - testfile = os.getcwd() + '/tests/test_integration.py' - pytest.main(['-s', '--capture=no','-x', testfile]) - -install_requires=metadata.get('install_requires', '').strip().split() - -setup(name=PACKAGENAME, - version=VERSION, - description=DESCRIPTION, - scripts=scripts, - install_requires=install_requires, - author=AUTHOR, - author_email=AUTHOR_EMAIL, - license=LICENSE, - url=URL, - long_description=readme(), - zip_safe=False, - use_2to3=False, - setup_requires=['pytest-runner'], - entry_points=entry_points, - packages=find_packages(), - package_data={PACKAGENAME: ['data/*', 'tests/data/*', '*/data/*', '*/tests/data/*']}, - classifiers=[ - 'Natural Language :: English', - 'License :: OSI Approved :: GNU Affero General Public License v3', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.6' - ], - cmdclass = { - 'coverage': PyTest, - 'inttest': IntTestCommand - } -) diff --git a/caom2utils/caom2utils/fits2caom2.py b/caom2utils/caom2utils/fits2caom2.py index 65cbbed9..72e34148 100755 --- a/caom2utils/caom2utils/fits2caom2.py +++ b/caom2utils/caom2utils/fits2caom2.py @@ -1587,13 +1587,15 @@ def _execute_external(self, value, key, extension): :param key: :param extension: the current extension name or number. """ - # determine which of the two possible values for parameter the user + # determine which of the possible values for parameter the user # is hoping for - parameter = '' if 'uri' in value: parameter = self.uri elif 'header' in value: parameter = self._headers[extension] + else: + parameter = {'uri': self.uri, + 'header': self._headers[extension]} result = '' execute = None @@ -1632,27 +1634,20 @@ def _get_datetime(self, from_value): if isinstance(from_value, datetime): return from_value else: - try: - return datetime.strptime(from_value, '%Y-%m-%dT%H:%M:%S') - except ValueError: + result = None + for dt_format in ['%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', + '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d', + '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S']: try: - return datetime.strptime(from_value, - '%Y-%m-%dT%H:%M:%S.%f') + result = datetime.strptime(from_value, dt_format) except ValueError: - try: - return datetime.strptime(from_value, - '%Y-%m-%d %H:%M:%S.%f') - except ValueError: - try: - return datetime.strptime( - from_value, '%Y-%m-%d') - except ValueError: - self.logger.error( - 'Cannot parse datetime {}'.format( - from_value)) - self.add_error( - 'get_datetime', sys.exc_info()[1]) - return None + pass + + if result is None: + self.logger.error('Cannot parse datetime {}'.format( + from_value)) + self.add_error('get_datetime', sys.exc_info()[1]) + return result else: return None @@ -2350,8 +2345,9 @@ def apply_blueprint_to_fits(self): for header in self.headers: sip = False for i in range(1, 6): - if ('CTYPE{}'.format(i) in header) and \ - ('-SIP' in header['CTYPE{}'.format(i)]): + if (('CTYPE{}'.format(i) in header) and + isinstance(header['CTYPE{}'.format(i)], str) and + ('-SIP' in header['CTYPE{}'.format(i)])): sip = True break if sip: @@ -2550,8 +2546,18 @@ def _get_telescope(self, current): self.logger.debug('name is {}'.format(name)) aug_tel = Telescope(str(name), geo_x, geo_y, geo_z) if keywords: - for k in keywords.split(): - aug_tel.keywords.add(k) + if isinstance(keywords, set): + if len(keywords) == 1: + temp = keywords.pop() + if temp == 'none': + aug_tel.keywords = set() + else: + aug_tel.keywords.add(temp) + else: + aug_tel.keywords = keywords + else: + for k in keywords.split(): + aug_tel.keywords.add(k) return aug_tel else: return None @@ -3384,8 +3390,6 @@ def get_external_headers(external_url): def get_vos_headers(uri, subject=None): """ Creates the FITS headers object from a vospace file. - The function uses cutouts to retrieve the miniumum amount of data, - minimizing the transfer time. :param uri: vos URI :param subject: user credentials. Anonymous if subject is None :return: List of headers corresponding to each extension. Each header is @@ -3397,12 +3401,8 @@ def get_vos_headers(uri, subject=None): else: client = Client() - # make the smallest cutout possible, to get the least amount of data - # transferred, then transfer it to a temporary file - # - uri_with_cutout = '{}[1:1,1:1]'.format(uri) temp_filename = tempfile.NamedTemporaryFile() - client.copy(uri_with_cutout, temp_filename.name) + client.copy(uri, temp_filename.name, head=True) return _get_headers_from_fits(temp_filename.name) else: # this should be a programming error by now @@ -3491,19 +3491,21 @@ def _update_artifact_meta(uri, artifact, subject=None): raise NotImplementedError( 'Only ad, gemini and vos type URIs supported') - if metadata['md5sum'].startswith('md5:'): - checksum = ChecksumURI('{}'.format(metadata['md5sum'])) - else: - checksum = ChecksumURI('md5:{}'.format(metadata['md5sum'])) logging.debug('old artifact metadata - ' 'uri({}), encoding({}), size({}), type({})'. format(artifact.uri, artifact.content_checksum, artifact.content_length, artifact.content_type)) - artifact.content_checksum = checksum - artifact.content_length = int(metadata['size']) - artifact.content_type = str(metadata['type']) + md5sum = metadata.get('md5sum') + if md5sum is not None: + if md5sum.startswith('md5:'): + checksum = ChecksumURI('{}'.format(md5sum)) + else: + checksum = ChecksumURI('md5:{}'.format(md5sum)) + artifact.content_checksum = checksum + artifact.content_length = _to_int(metadata.get('size')) + artifact.content_type = _to_str(metadata.get('type')) logging.debug('updated artifact metadata - ' 'uri({}), encoding({}), size({}), type({})'. format(artifact.uri, @@ -3520,7 +3522,7 @@ def _get_cadc_meta(subject, path): :return: """ client = CadcDataClient(subject) - archive, file_id = path.split('/') + archive, file_id = path.split('/')[-2:] return client.get_file_info(archive, file_id) @@ -3640,9 +3642,11 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, if local: if uri.startswith('vos'): if '.fits' in local or '.fits.gz' in local: + meta_uri = 'file://{}'.format(local) logging.debug( 'Using a FitsParser for vos local {}'.format(local)) - parser = FitsParser(get_vos_headers(uri), blueprint, uri=uri) + parser = FitsParser( + get_cadc_headers(meta_uri), blueprint, uri=uri) elif '.csv' in local: logging.debug( 'Using a GenericParser for vos local {}'.format(local)) @@ -3696,14 +3700,15 @@ def _augment(obs, product_id, uri, blueprint, subject, dumpconfig=False, result = _visit(plugin, parser, obs, visit_local, product_id, **kwargs) - if validate_wcs: - try: - validate(obs) - except InvalidWCSError as e: - logging.error(e) - tb = traceback.format_exc() - logging.error(tb) - raise e + if result is not None: + if validate_wcs: + try: + validate(obs) + except InvalidWCSError as e: + logging.error(e) + tb = traceback.format_exc() + logging.error(tb) + raise e if len(parser._errors) > 0: logging.debug( @@ -4058,6 +4063,7 @@ def gen_proc(args, blueprints, **kwargs): and a plugin parameter, that supports external programmatic blueprint modification.""" _set_logging(args.verbose, args.debug, args.quiet) + result = 0 if args.in_obs_xml: obs = _gen_obs(blueprints, args.in_obs_xml) @@ -4099,6 +4105,7 @@ def gen_proc(args, blueprints, **kwargs): else: log_id = args.observation logging.warning('No Observation generated for {}'.format(log_id)) + result = -1 else: writer = ObservationWriter() if args.out_obs_xml: @@ -4106,6 +4113,7 @@ def gen_proc(args, blueprints, **kwargs): else: sys.stdout.flush() writer.write(obs, sys.stdout) + return result def get_gen_proc_arg_parser(): diff --git a/caom2utils/caom2utils/tests/test_collections.py b/caom2utils/caom2utils/tests/test_collections.py index 522dce08..58e9943b 100644 --- a/caom2utils/caom2utils/tests/test_collections.py +++ b/caom2utils/caom2utils/tests/test_collections.py @@ -139,10 +139,29 @@ def test_differences(directory): cardinality = '{} {}'.format(product_id, temp) # return # TODO shorter testing cycle - with patch('caom2utils.fits2caom2.CadcDataClient') as data_client_mock: + with patch('caom2utils.fits2caom2.CadcDataClient') as dc_mock, \ + patch('caom2utils.fits2caom2.get_vos_headers') as gvh_mock, \ + patch('caom2utils.fits2caom2._get_vos_meta') as gvm_mock: def get_file_info(archive, file_id): return file_meta[1][(archive, file_id)] - data_client_mock.return_value.get_file_info.side_effect = get_file_info + + def _get_vos_headers(uri, subject=None): + if uri.startswith('vos'): + fname = data_files_parameter.split()[1].strip() + fits_header = open(fname).read() + return fits2caom2._make_headers_from_string(fits_header) + else: + return None + + def _vos_client_meta(subject, uri): + return {'md5sum': '5b00b00d4b06aba986c3663d09aa581f', + 'size': 682560, + 'type': 'application/fits'} + + dc_mock.return_value.get_file_info.side_effect = get_file_info + gvh_mock.side_effect = _get_vos_headers + gvm_mock.side_effect = _vos_client_meta + temp = tempfile.NamedTemporaryFile() sys.argv = ('{} -o {} --observation {} {} {} {} '.format( application, temp.name, @@ -168,7 +187,12 @@ def _get_cardinality(directory): 'MegaPipe.080.156.Z.MP9801/ad:CFHTSG/' \ 'MegaPipe.080.156.Z.MP9801.fits.gif' elif '/omm/' in directory: - return '--lineage Cdemo_ext2_SCIRED/ad:OMM/Cdemo_ext2_SCIRED.fits.gz' + if 'SCIRED' in directory: + return '--lineage Cdemo_ext2_SCIRED/ad:OMM/' \ + 'Cdemo_ext2_SCIRED.fits.gz' + else: + return '--lineage C190531_0432_SCI/ad:OMM/' \ + 'C190531_0432_SCI.fits.gz' elif 'apass/catalog' in directory: return '--lineage catalog/vos://cadc.nrc.ca!vospace/CAOMworkshop/' \ 'Examples/DAO/dao_c122_2016_012725.fits' @@ -239,18 +263,19 @@ def _get_uris(collection, fnames, obs): f = os.path.basename(fname).replace('.header', '') for p in obs.planes.values(): for a in p.artifacts.values(): - if 'ad:{}/{}'.format(collection, f) in a.uri: + if ('ad:{}/{}'.format(collection, f) in a.uri or + (a.uri.startswith('vos') and f in a.uri)): uris.append(a.uri) meta = {} meta['type'] = a.content_type meta['size'] = a.content_length meta['md5sum'] = a.content_checksum.checksum file_url = urlparse(a.uri) - if file_url.scheme != 'ad': + if file_url.scheme not in ['ad', 'vos']: # TODO add hook to support other service providers raise NotImplementedError( - 'Only ad type URIs supported') - archive, file_id = file_url.path.split('/') + 'Only ad, vos type URIs supported') + archive, file_id = file_url.path.split('/')[-2:] file_meta[(archive, file_id)] = meta return uris, file_meta else: diff --git a/caom2utils/caom2utils/tests/test_fits2caom2.py b/caom2utils/caom2utils/tests/test_fits2caom2.py index a268813e..450be68c 100755 --- a/caom2utils/caom2utils/tests/test_fits2caom2.py +++ b/caom2utils/caom2utils/tests/test_fits2caom2.py @@ -516,10 +516,10 @@ def test_help(): EXPECTED_OBS_XML = """ + """xsi:type="caom2:SimpleObservation" +caom2:id="d2893703-b21e-425f-b7d0-ca1f58fdc011"> collection MA1_DRAO-ST 1999-01-01T00:00:00.000 @@ -544,7 +544,7 @@ def test_help(): DRAO-ST - + HI-line cube 2 @@ -556,11 +556,11 @@ def test_help(): 2000-10-16T00:00:00.000 - + caom:CGPS/TEST/4axes_obs.fits info - + 0 @@ -1207,35 +1207,34 @@ def test_visit_generic_parser(): @pytest.mark.skipif(single_test, reason='Single test mode') -def test_get_vos_headers(): +@patch('caom2utils.fits2caom2.Client') +def test_get_vos_headers(vos_mock): test_uri = 'vos://cadc.nrc.ca!vospace/CAOMworkshop/Examples/DAO/' \ 'dao_c122_2016_012725.fits' - client_orig = vos.Client get_orig = caom2utils.fits2caom2._get_headers_from_fits try: caom2utils.fits2caom2._get_headers_from_fits = Mock( side_effect=_get_headers) - vos.Client = Mock() test_headers = caom2utils.get_vos_headers(test_uri, subject=None) - assert test_headers is not None - assert len(test_headers) == 1 + assert test_headers is not None, 'expect result' + assert len(test_headers) == 1, 'wrong size of result' assert test_headers[0]['SIMPLE'] is True, 'SIMPLE header not found' + assert vos_mock.called, 'mock not called' finally: - vos.Client = client_orig caom2utils.fits2caom2._get_headers_from_fits = get_orig @pytest.mark.skipif(single_test, reason='Single test mode') -def test_get_vos_meta(): +@patch('caom2utils.fits2caom2.Client') +def test_get_vos_meta(vos_mock): get_orig = caom2utils.get_vos_headers - node_orig = vos.Client.get_node try: caom2utils.get_vos_headers = Mock( return_value={'md5sum': '5b00b00d4b06aba986c3663d09aa581f', 'size': 682560, 'type': 'application/octet-stream'}) - vos.Client.get_node = Mock(side_effect=_get_node) + vos_mock.return_value.get_node.side_effect = _get_node test_uri = 'vos://cadc.nrc.ca!vospace/CAOMworkshop/Examples/DAO/' \ 'dao_c122_2016_012725.fits' test_artifact = Artifact(test_uri, ProductType.SCIENCE, @@ -1247,9 +1246,9 @@ def test_get_vos_meta(): assert test_artifact.content_length == 682560, 'length wrong' assert test_artifact.content_type == 'application/fits', \ 'content_type wrong' + assert vos_mock.called, 'mock not called' finally: caom2utils.get_vos_headers = get_orig - vos.Client.get_node = node_orig @pytest.mark.skipif(single_test, reason='Single test mode') diff --git a/caom2utils/caom2utils/tests/test_wcsvalidator.py b/caom2utils/caom2utils/tests/test_wcsvalidator.py index dae5c881..9b4ab53d 100644 --- a/caom2utils/caom2utils/tests/test_wcsvalidator.py +++ b/caom2utils/caom2utils/tests/test_wcsvalidator.py @@ -77,6 +77,7 @@ from caom2.caom_util import TypedList, TypedOrderedDict from ..wcsvalidator import WcsPolarizationState import pytest +import six import unittest single_test = False @@ -94,24 +95,24 @@ def test_temporalwcs_validator(self): def test_bad_temporalwcs(self): bad_temporal_wcs = TimeTestUtil.bad_ctype_wcs() - with pytest.raises(InvalidWCSError) as ex: + with six.assertRaisesRegex( + self, InvalidWCSError, 'unexpected TIMESYS, CTYPE'): wcsvalidator._validate_temporal_wcs(bad_temporal_wcs) - assert('unexpected TIMESYS, CTYPE' in str(ex)) bad_temporal_wcs = TimeTestUtil.bad_cunit_wcs() - with pytest.raises(InvalidWCSError) as ex: + with six.assertRaisesRegex( + self, InvalidWCSError, 'unexpected CUNIT'): wcsvalidator._validate_temporal_wcs(bad_temporal_wcs) - assert('unexpected CUNIT' in str(ex)) bad_temporal_wcs = TimeTestUtil.bad_range_wcs() - with pytest.raises(InvalidWCSError) as ex: + with six.assertRaisesRegex( + self, InvalidWCSError, 'range.end not >= range.start'): wcsvalidator._validate_temporal_wcs(bad_temporal_wcs) - assert('range.end not >= range.start' in str(ex)) bad_temporal_wcs = TimeTestUtil.bad_delta() - with pytest.raises(InvalidWCSError) as ex: + with six.assertRaisesRegex( + self, InvalidWCSError, 'delta must be greater than 0.0'): wcsvalidator._validate_temporal_wcs(bad_temporal_wcs) - assert('delta must be greater than 0.0' in str(ex)) @pytest.mark.skipif(single_test, reason='Single test mode') diff --git a/caom2utils/setup.cfg b/caom2utils/setup.cfg index 7410ab05..fa904484 100644 --- a/caom2utils/setup.cfg +++ b/caom2utils/setup.cfg @@ -33,9 +33,9 @@ url = http://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2 edit_on_github = False github_project = opencadc/caom2tools #install_requires = cadcdata>=1.2.3 caom2>=2.3.5 astropy>=2.0 spherical-geometry==1.2.11 vos>=3.0.6 -install_requires = cadcdata>=1.2.3 caom2>=2.3.5 astropy>=2.0 spherical-geometry==1.2.11;python_version=="2.7" spherical-geometry>=1.2.17;python_version>="3.5" vos>=3.0.6 +install_requires = cadcdata>=1.2.3 caom2>=2.3.5 astropy>=2.0 spherical-geometry==1.2.11;python_version=="2.7" spherical-geometry>=1.2.17;python_version>="3.4" vos>=3.0.6 # version should be PEP386 compatible (http://www.python.org/dev/peps/pep-0386) -version = 1.4.3 +version = 1.4.4